xref: /sqlite-3.40.0/ext/misc/spellfix.c (revision e89feee5)
1 /*
2 ** 2012 April 10
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 *************************************************************************
12 **
13 ** This module implements the spellfix1 VIRTUAL TABLE that can be used
14 ** to search a large vocabulary for close matches.  See separate
15 ** documentation (http://www.sqlite.org/spellfix1.html) for details.
16 */
17 #include "sqlite3ext.h"
18 SQLITE_EXTENSION_INIT1
19 
20 #ifndef SQLITE_AMALGAMATION
21 # if !defined(NDEBUG) && !defined(SQLITE_DEBUG)
22 #  define NDEBUG 1
23 # endif
24 # if defined(NDEBUG) && defined(SQLITE_DEBUG)
25 #  undef NDEBUG
26 # endif
27 # include <string.h>
28 # include <stdio.h>
29 # include <stdlib.h>
30 # include <assert.h>
31 # define ALWAYS(X)  1
32 # define NEVER(X)   0
33   typedef unsigned char u8;
34   typedef unsigned short u16;
35 #endif
36 #include <ctype.h>
37 
38 #ifndef SQLITE_OMIT_VIRTUALTABLE
39 
40 /*
41 ** Character classes for ASCII characters:
42 **
43 **   0   ''        Silent letters:   H W
44 **   1   'A'       Any vowel:   A E I O U (Y)
45 **   2   'B'       A bilabeal stop or fricative:  B F P V W
46 **   3   'C'       Other fricatives or back stops:  C G J K Q S X Z
47 **   4   'D'       Alveolar stops:  D T
48 **   5   'H'       Letter H at the beginning of a word
49 **   6   'L'       Glide:  L
50 **   7   'R'       Semivowel:  R
51 **   8   'M'       Nasals:  M N
52 **   9   'Y'       Letter Y at the beginning of a word.
53 **   10  '9'       Digits: 0 1 2 3 4 5 6 7 8 9
54 **   11  ' '       White space
55 **   12  '?'       Other.
56 */
57 #define CCLASS_SILENT         0
58 #define CCLASS_VOWEL          1
59 #define CCLASS_B              2
60 #define CCLASS_C              3
61 #define CCLASS_D              4
62 #define CCLASS_H              5
63 #define CCLASS_L              6
64 #define CCLASS_R              7
65 #define CCLASS_M              8
66 #define CCLASS_Y              9
67 #define CCLASS_DIGIT         10
68 #define CCLASS_SPACE         11
69 #define CCLASS_OTHER         12
70 
71 /*
72 ** The following table gives the character class for non-initial ASCII
73 ** characters.
74 */
75 static const unsigned char midClass[] = {
76  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
77  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
78  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
79  /*   */ CCLASS_SPACE,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
80  /*   */ CCLASS_SPACE,    /*   */ CCLASS_SPACE,   /*   */ CCLASS_OTHER,
81  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
82  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
83  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
84  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
85  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
86  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_SPACE,
87  /* ! */ CCLASS_OTHER,    /* " */ CCLASS_OTHER,   /* # */ CCLASS_OTHER,
88  /* $ */ CCLASS_OTHER,    /* % */ CCLASS_OTHER,   /* & */ CCLASS_OTHER,
89  /* ' */ CCLASS_SILENT,   /* ( */ CCLASS_OTHER,   /* ) */ CCLASS_OTHER,
90  /* * */ CCLASS_OTHER,    /* + */ CCLASS_OTHER,   /* , */ CCLASS_OTHER,
91  /* - */ CCLASS_OTHER,    /* . */ CCLASS_OTHER,   /* / */ CCLASS_OTHER,
92  /* 0 */ CCLASS_DIGIT,    /* 1 */ CCLASS_DIGIT,   /* 2 */ CCLASS_DIGIT,
93  /* 3 */ CCLASS_DIGIT,    /* 4 */ CCLASS_DIGIT,   /* 5 */ CCLASS_DIGIT,
94  /* 6 */ CCLASS_DIGIT,    /* 7 */ CCLASS_DIGIT,   /* 8 */ CCLASS_DIGIT,
95  /* 9 */ CCLASS_DIGIT,    /* : */ CCLASS_OTHER,   /* ; */ CCLASS_OTHER,
96  /* < */ CCLASS_OTHER,    /* = */ CCLASS_OTHER,   /* > */ CCLASS_OTHER,
97  /* ? */ CCLASS_OTHER,    /* @ */ CCLASS_OTHER,   /* A */ CCLASS_VOWEL,
98  /* B */ CCLASS_B,        /* C */ CCLASS_C,       /* D */ CCLASS_D,
99  /* E */ CCLASS_VOWEL,    /* F */ CCLASS_B,       /* G */ CCLASS_C,
100  /* H */ CCLASS_SILENT,   /* I */ CCLASS_VOWEL,   /* J */ CCLASS_C,
101  /* K */ CCLASS_C,        /* L */ CCLASS_L,       /* M */ CCLASS_M,
102  /* N */ CCLASS_M,        /* O */ CCLASS_VOWEL,   /* P */ CCLASS_B,
103  /* Q */ CCLASS_C,        /* R */ CCLASS_R,       /* S */ CCLASS_C,
104  /* T */ CCLASS_D,        /* U */ CCLASS_VOWEL,   /* V */ CCLASS_B,
105  /* W */ CCLASS_B,        /* X */ CCLASS_C,       /* Y */ CCLASS_VOWEL,
106  /* Z */ CCLASS_C,        /* [ */ CCLASS_OTHER,   /* \ */ CCLASS_OTHER,
107  /* ] */ CCLASS_OTHER,    /* ^ */ CCLASS_OTHER,   /* _ */ CCLASS_OTHER,
108  /* ` */ CCLASS_OTHER,    /* a */ CCLASS_VOWEL,   /* b */ CCLASS_B,
109  /* c */ CCLASS_C,        /* d */ CCLASS_D,       /* e */ CCLASS_VOWEL,
110  /* f */ CCLASS_B,        /* g */ CCLASS_C,       /* h */ CCLASS_SILENT,
111  /* i */ CCLASS_VOWEL,    /* j */ CCLASS_C,       /* k */ CCLASS_C,
112  /* l */ CCLASS_L,        /* m */ CCLASS_M,       /* n */ CCLASS_M,
113  /* o */ CCLASS_VOWEL,    /* p */ CCLASS_B,       /* q */ CCLASS_C,
114  /* r */ CCLASS_R,        /* s */ CCLASS_C,       /* t */ CCLASS_D,
115  /* u */ CCLASS_VOWEL,    /* v */ CCLASS_B,       /* w */ CCLASS_B,
116  /* x */ CCLASS_C,        /* y */ CCLASS_VOWEL,   /* z */ CCLASS_C,
117  /* { */ CCLASS_OTHER,    /* | */ CCLASS_OTHER,   /* } */ CCLASS_OTHER,
118  /* ~ */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,
119 };
120 /*
121 ** This tables gives the character class for ASCII characters that form the
122 ** initial character of a word.  The only difference from midClass is with
123 ** the letters H, W, and Y.
124 */
125 static const unsigned char initClass[] = {
126  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
127  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
128  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
129  /*   */ CCLASS_SPACE,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
130  /*   */ CCLASS_SPACE,    /*   */ CCLASS_SPACE,   /*   */ CCLASS_OTHER,
131  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
132  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
133  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
134  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
135  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
136  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_SPACE,
137  /* ! */ CCLASS_OTHER,    /* " */ CCLASS_OTHER,   /* # */ CCLASS_OTHER,
138  /* $ */ CCLASS_OTHER,    /* % */ CCLASS_OTHER,   /* & */ CCLASS_OTHER,
139  /* ' */ CCLASS_OTHER,    /* ( */ CCLASS_OTHER,   /* ) */ CCLASS_OTHER,
140  /* * */ CCLASS_OTHER,    /* + */ CCLASS_OTHER,   /* , */ CCLASS_OTHER,
141  /* - */ CCLASS_OTHER,    /* . */ CCLASS_OTHER,   /* / */ CCLASS_OTHER,
142  /* 0 */ CCLASS_DIGIT,    /* 1 */ CCLASS_DIGIT,   /* 2 */ CCLASS_DIGIT,
143  /* 3 */ CCLASS_DIGIT,    /* 4 */ CCLASS_DIGIT,   /* 5 */ CCLASS_DIGIT,
144  /* 6 */ CCLASS_DIGIT,    /* 7 */ CCLASS_DIGIT,   /* 8 */ CCLASS_DIGIT,
145  /* 9 */ CCLASS_DIGIT,    /* : */ CCLASS_OTHER,   /* ; */ CCLASS_OTHER,
146  /* < */ CCLASS_OTHER,    /* = */ CCLASS_OTHER,   /* > */ CCLASS_OTHER,
147  /* ? */ CCLASS_OTHER,    /* @ */ CCLASS_OTHER,   /* A */ CCLASS_VOWEL,
148  /* B */ CCLASS_B,        /* C */ CCLASS_C,       /* D */ CCLASS_D,
149  /* E */ CCLASS_VOWEL,    /* F */ CCLASS_B,       /* G */ CCLASS_C,
150  /* H */ CCLASS_SILENT,   /* I */ CCLASS_VOWEL,   /* J */ CCLASS_C,
151  /* K */ CCLASS_C,        /* L */ CCLASS_L,       /* M */ CCLASS_M,
152  /* N */ CCLASS_M,        /* O */ CCLASS_VOWEL,   /* P */ CCLASS_B,
153  /* Q */ CCLASS_C,        /* R */ CCLASS_R,       /* S */ CCLASS_C,
154  /* T */ CCLASS_D,        /* U */ CCLASS_VOWEL,   /* V */ CCLASS_B,
155  /* W */ CCLASS_B,        /* X */ CCLASS_C,       /* Y */ CCLASS_Y,
156  /* Z */ CCLASS_C,        /* [ */ CCLASS_OTHER,   /* \ */ CCLASS_OTHER,
157  /* ] */ CCLASS_OTHER,    /* ^ */ CCLASS_OTHER,   /* _ */ CCLASS_OTHER,
158  /* ` */ CCLASS_OTHER,    /* a */ CCLASS_VOWEL,   /* b */ CCLASS_B,
159  /* c */ CCLASS_C,        /* d */ CCLASS_D,       /* e */ CCLASS_VOWEL,
160  /* f */ CCLASS_B,        /* g */ CCLASS_C,       /* h */ CCLASS_SILENT,
161  /* i */ CCLASS_VOWEL,    /* j */ CCLASS_C,       /* k */ CCLASS_C,
162  /* l */ CCLASS_L,        /* m */ CCLASS_M,       /* n */ CCLASS_M,
163  /* o */ CCLASS_VOWEL,    /* p */ CCLASS_B,       /* q */ CCLASS_C,
164  /* r */ CCLASS_R,        /* s */ CCLASS_C,       /* t */ CCLASS_D,
165  /* u */ CCLASS_VOWEL,    /* v */ CCLASS_B,       /* w */ CCLASS_B,
166  /* x */ CCLASS_C,        /* y */ CCLASS_Y,       /* z */ CCLASS_C,
167  /* { */ CCLASS_OTHER,    /* | */ CCLASS_OTHER,   /* } */ CCLASS_OTHER,
168  /* ~ */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,
169 };
170 
171 /*
172 ** Mapping from the character class number (0-13) to a symbol for each
173 ** character class.  Note that initClass[] can be used to map the class
174 ** symbol back into the class number.
175 */
176 static const unsigned char className[] = ".ABCDHLRMY9 ?";
177 
178 /*
179 ** Generate a "phonetic hash" from a string of ASCII characters
180 ** in zIn[0..nIn-1].
181 **
182 **   * Map characters by character class as defined above.
183 **   * Omit double-letters
184 **   * Omit vowels beside R and L
185 **   * Omit T when followed by CH
186 **   * Omit W when followed by R
187 **   * Omit D when followed by J or G
188 **   * Omit K in KN or G in GN at the beginning of a word
189 **
190 ** Space to hold the result is obtained from sqlite3_malloc()
191 **
192 ** Return NULL if memory allocation fails.
193 */
194 static unsigned char *phoneticHash(const unsigned char *zIn, int nIn){
195   unsigned char *zOut = sqlite3_malloc64( nIn + 1 );
196   int i;
197   int nOut = 0;
198   char cPrev = 0x77;
199   char cPrevX = 0x77;
200   const unsigned char *aClass = initClass;
201 
202   if( zOut==0 ) return 0;
203   if( nIn>2 ){
204     switch( zIn[0] ){
205       case 'g':
206       case 'k': {
207         if( zIn[1]=='n' ){ zIn++; nIn--; }
208         break;
209       }
210     }
211   }
212   for(i=0; i<nIn; i++){
213     unsigned char c = zIn[i];
214     if( i+1<nIn ){
215       if( c=='w' && zIn[i+1]=='r' ) continue;
216       if( c=='d' && (zIn[i+1]=='j' || zIn[i+1]=='g') ) continue;
217       if( i+2<nIn ){
218         if( c=='t' && zIn[i+1]=='c' && zIn[i+2]=='h' ) continue;
219       }
220     }
221     c = aClass[c&0x7f];
222     if( c==CCLASS_SPACE ) continue;
223     if( c==CCLASS_OTHER && cPrev!=CCLASS_DIGIT ) continue;
224     aClass = midClass;
225     if( c==CCLASS_VOWEL && (cPrevX==CCLASS_R || cPrevX==CCLASS_L) ){
226        continue; /* No vowels beside L or R */
227     }
228     if( (c==CCLASS_R || c==CCLASS_L) && cPrevX==CCLASS_VOWEL ){
229        nOut--;   /* No vowels beside L or R */
230     }
231     cPrev = c;
232     if( c==CCLASS_SILENT ) continue;
233     cPrevX = c;
234     c = className[c];
235     assert( nOut>=0 );
236     if( nOut==0 || c!=zOut[nOut-1] ) zOut[nOut++] = c;
237   }
238   zOut[nOut] = 0;
239   return zOut;
240 }
241 
242 /*
243 ** This is an SQL function wrapper around phoneticHash().  See
244 ** the description of phoneticHash() for additional information.
245 */
246 static void phoneticHashSqlFunc(
247   sqlite3_context *context,
248   int argc,
249   sqlite3_value **argv
250 ){
251   const unsigned char *zIn;
252   unsigned char *zOut;
253 
254   zIn = sqlite3_value_text(argv[0]);
255   if( zIn==0 ) return;
256   zOut = phoneticHash(zIn, sqlite3_value_bytes(argv[0]));
257   if( zOut==0 ){
258     sqlite3_result_error_nomem(context);
259   }else{
260     sqlite3_result_text(context, (char*)zOut, -1, sqlite3_free);
261   }
262 }
263 
264 /*
265 ** Return the character class number for a character given its
266 ** context.
267 */
268 static char characterClass(char cPrev, char c){
269   return cPrev==0 ? initClass[c&0x7f] : midClass[c&0x7f];
270 }
271 
272 /*
273 ** Return the cost of inserting or deleting character c immediately
274 ** following character cPrev.  If cPrev==0, that means c is the first
275 ** character of the word.
276 */
277 static int insertOrDeleteCost(char cPrev, char c, char cNext){
278   char classC = characterClass(cPrev, c);
279   char classCprev;
280 
281   if( classC==CCLASS_SILENT ){
282     /* Insert or delete "silent" characters such as H or W */
283     return 1;
284   }
285   if( cPrev==c ){
286     /* Repeated characters, or miss a repeat */
287     return 10;
288   }
289   if( classC==CCLASS_VOWEL && (cPrev=='r' || cNext=='r') ){
290     return 20;  /* Insert a vowel before or after 'r' */
291   }
292   classCprev = characterClass(cPrev, cPrev);
293   if( classC==classCprev ){
294     if( classC==CCLASS_VOWEL ){
295       /* Remove or add a new vowel to a vowel cluster */
296       return 15;
297     }else{
298       /* Remove or add a consonant not in the same class */
299       return 50;
300     }
301   }
302 
303   /* any other character insertion or deletion */
304   return 100;
305 }
306 
307 /*
308 ** Divide the insertion cost by this factor when appending to the
309 ** end of the word.
310 */
311 #define FINAL_INS_COST_DIV  4
312 
313 /*
314 ** Return the cost of substituting cTo in place of cFrom assuming
315 ** the previous character is cPrev.  If cPrev==0 then cTo is the first
316 ** character of the word.
317 */
318 static int substituteCost(char cPrev, char cFrom, char cTo){
319   char classFrom, classTo;
320   if( cFrom==cTo ){
321     /* Exact match */
322     return 0;
323   }
324   if( cFrom==(cTo^0x20) && ((cTo>='A' && cTo<='Z') || (cTo>='a' && cTo<='z')) ){
325     /* differ only in case */
326     return 0;
327   }
328   classFrom = characterClass(cPrev, cFrom);
329   classTo = characterClass(cPrev, cTo);
330   if( classFrom==classTo ){
331     /* Same character class */
332     return 40;
333   }
334   if( classFrom>=CCLASS_B && classFrom<=CCLASS_Y
335       && classTo>=CCLASS_B && classTo<=CCLASS_Y ){
336     /* Convert from one consonant to another, but in a different class */
337     return 75;
338   }
339   /* Any other subsitution */
340   return 100;
341 }
342 
343 /*
344 ** Given two strings zA and zB which are pure ASCII, return the cost
345 ** of transforming zA into zB.  If zA ends with '*' assume that it is
346 ** a prefix of zB and give only minimal penalty for extra characters
347 ** on the end of zB.
348 **
349 ** Smaller numbers mean a closer match.
350 **
351 ** Negative values indicate an error:
352 **    -1  One of the inputs is NULL
353 **    -2  Non-ASCII characters on input
354 **    -3  Unable to allocate memory
355 **
356 ** If pnMatch is not NULL, then *pnMatch is set to the number of bytes
357 ** of zB that matched the pattern in zA. If zA does not end with a '*',
358 ** then this value is always the number of bytes in zB (i.e. strlen(zB)).
359 ** If zA does end in a '*', then it is the number of bytes in the prefix
360 ** of zB that was deemed to match zA.
361 */
362 static int editdist1(const char *zA, const char *zB, int *pnMatch){
363   int nA, nB;            /* Number of characters in zA[] and zB[] */
364   int xA, xB;            /* Loop counters for zA[] and zB[] */
365   char cA = 0, cB;       /* Current character of zA and zB */
366   char cAprev, cBprev;   /* Previous character of zA and zB */
367   char cAnext, cBnext;   /* Next character in zA and zB */
368   int d;                 /* North-west cost value */
369   int dc = 0;            /* North-west character value */
370   int res;               /* Final result */
371   int *m;                /* The cost matrix */
372   char *cx;              /* Corresponding character values */
373   int *toFree = 0;       /* Malloced space */
374   int nMatch = 0;
375   int mStack[60+15];     /* Stack space to use if not too much is needed */
376 
377   /* Early out if either input is NULL */
378   if( zA==0 || zB==0 ) return -1;
379 
380   /* Skip any common prefix */
381   while( zA[0] && zA[0]==zB[0] ){ dc = zA[0]; zA++; zB++; nMatch++; }
382   if( pnMatch ) *pnMatch = nMatch;
383   if( zA[0]==0 && zB[0]==0 ) return 0;
384 
385 #if 0
386   printf("A=\"%s\" B=\"%s\" dc=%c\n", zA, zB, dc?dc:' ');
387 #endif
388 
389   /* Verify input strings and measure their lengths */
390   for(nA=0; zA[nA]; nA++){
391     if( zA[nA]&0x80 ) return -2;
392   }
393   for(nB=0; zB[nB]; nB++){
394     if( zB[nB]&0x80 ) return -2;
395   }
396 
397   /* Special processing if either string is empty */
398   if( nA==0 ){
399     cBprev = (char)dc;
400     for(xB=res=0; (cB = zB[xB])!=0; xB++){
401       res += insertOrDeleteCost(cBprev, cB, zB[xB+1])/FINAL_INS_COST_DIV;
402       cBprev = cB;
403     }
404     return res;
405   }
406   if( nB==0 ){
407     cAprev = (char)dc;
408     for(xA=res=0; (cA = zA[xA])!=0; xA++){
409       res += insertOrDeleteCost(cAprev, cA, zA[xA+1]);
410       cAprev = cA;
411     }
412     return res;
413   }
414 
415   /* A is a prefix of B */
416   if( zA[0]=='*' && zA[1]==0 ) return 0;
417 
418   /* Allocate and initialize the Wagner matrix */
419   if( nB<(sizeof(mStack)*4)/(sizeof(mStack[0])*5) ){
420     m = mStack;
421   }else{
422     m = toFree = sqlite3_malloc64( (nB+1)*5*sizeof(m[0])/4 );
423     if( m==0 ) return -3;
424   }
425   cx = (char*)&m[nB+1];
426 
427   /* Compute the Wagner edit distance */
428   m[0] = 0;
429   cx[0] = (char)dc;
430   cBprev = (char)dc;
431   for(xB=1; xB<=nB; xB++){
432     cBnext = zB[xB];
433     cB = zB[xB-1];
434     cx[xB] = cB;
435     m[xB] = m[xB-1] + insertOrDeleteCost(cBprev, cB, cBnext);
436     cBprev = cB;
437   }
438   cAprev = (char)dc;
439   for(xA=1; xA<=nA; xA++){
440     int lastA = (xA==nA);
441     cA = zA[xA-1];
442     cAnext = zA[xA];
443     if( cA=='*' && lastA ) break;
444     d = m[0];
445     dc = cx[0];
446     m[0] = d + insertOrDeleteCost(cAprev, cA, cAnext);
447     cBprev = 0;
448     for(xB=1; xB<=nB; xB++){
449       int totalCost, insCost, delCost, subCost, ncx;
450       cB = zB[xB-1];
451       cBnext = zB[xB];
452 
453       /* Cost to insert cB */
454       insCost = insertOrDeleteCost(cx[xB-1], cB, cBnext);
455       if( lastA ) insCost /= FINAL_INS_COST_DIV;
456 
457       /* Cost to delete cA */
458       delCost = insertOrDeleteCost(cx[xB], cA, cBnext);
459 
460       /* Cost to substitute cA->cB */
461       subCost = substituteCost(cx[xB-1], cA, cB);
462 
463       /* Best cost */
464       totalCost = insCost + m[xB-1];
465       ncx = cB;
466       if( (delCost + m[xB])<totalCost ){
467         totalCost = delCost + m[xB];
468         ncx = cA;
469       }
470       if( (subCost + d)<totalCost ){
471         totalCost = subCost + d;
472       }
473 
474 #if 0
475       printf("%d,%d d=%4d u=%4d r=%4d dc=%c cA=%c cB=%c"
476              " ins=%4d del=%4d sub=%4d t=%4d ncx=%c\n",
477              xA, xB, d, m[xB], m[xB-1], dc?dc:' ', cA, cB,
478              insCost, delCost, subCost, totalCost, ncx?ncx:' ');
479 #endif
480 
481       /* Update the matrix */
482       d = m[xB];
483       dc = cx[xB];
484       m[xB] = totalCost;
485       cx[xB] = (char)ncx;
486       cBprev = cB;
487     }
488     cAprev = cA;
489   }
490 
491   /* Free the wagner matrix and return the result */
492   if( cA=='*' ){
493     res = m[1];
494     for(xB=1; xB<=nB; xB++){
495       if( m[xB]<res ){
496         res = m[xB];
497         if( pnMatch ) *pnMatch = xB+nMatch;
498       }
499     }
500   }else{
501     res = m[nB];
502     /* In the current implementation, pnMatch is always NULL if zA does
503     ** not end in "*" */
504     assert( pnMatch==0 );
505   }
506   sqlite3_free(toFree);
507   return res;
508 }
509 
510 /*
511 ** Function:    editdist(A,B)
512 **
513 ** Return the cost of transforming string A into string B.  Both strings
514 ** must be pure ASCII text.  If A ends with '*' then it is assumed to be
515 ** a prefix of B and extra characters on the end of B have minimal additional
516 ** cost.
517 */
518 static void editdistSqlFunc(
519   sqlite3_context *context,
520   int argc,
521   sqlite3_value **argv
522 ){
523   int res = editdist1(
524                     (const char*)sqlite3_value_text(argv[0]),
525                     (const char*)sqlite3_value_text(argv[1]),
526                     0);
527   if( res<0 ){
528     if( res==(-3) ){
529       sqlite3_result_error_nomem(context);
530     }else if( res==(-2) ){
531       sqlite3_result_error(context, "non-ASCII input to editdist()", -1);
532     }else{
533       sqlite3_result_error(context, "NULL input to editdist()", -1);
534     }
535   }else{
536     sqlite3_result_int(context, res);
537   }
538 }
539 
540 /* End of the fixed-cost edit distance implementation
541 ******************************************************************************
542 *****************************************************************************
543 ** Begin: Configurable cost unicode edit distance routines
544 */
545 /* Forward declaration of structures */
546 typedef struct EditDist3Cost EditDist3Cost;
547 typedef struct EditDist3Config EditDist3Config;
548 typedef struct EditDist3Point EditDist3Point;
549 typedef struct EditDist3From EditDist3From;
550 typedef struct EditDist3FromString EditDist3FromString;
551 typedef struct EditDist3To EditDist3To;
552 typedef struct EditDist3ToString EditDist3ToString;
553 typedef struct EditDist3Lang EditDist3Lang;
554 
555 
556 /*
557 ** An entry in the edit cost table
558 */
559 struct EditDist3Cost {
560   EditDist3Cost *pNext;     /* Next cost element */
561   u8 nFrom;                 /* Number of bytes in aFrom */
562   u8 nTo;                   /* Number of bytes in aTo */
563   u16 iCost;                /* Cost of this transformation */
564   char a[4]    ;            /* FROM string followed by TO string */
565   /* Additional TO and FROM string bytes appended as necessary */
566 };
567 
568 /*
569 ** Edit costs for a particular language ID
570 */
571 struct EditDist3Lang {
572   int iLang;             /* Language ID */
573   int iInsCost;          /* Default insertion cost */
574   int iDelCost;          /* Default deletion cost */
575   int iSubCost;          /* Default substitution cost */
576   EditDist3Cost *pCost;  /* Costs */
577 };
578 
579 
580 /*
581 ** The default EditDist3Lang object, with default costs.
582 */
583 static const EditDist3Lang editDist3Lang = { 0, 100, 100, 150, 0 };
584 
585 /*
586 ** Complete configuration
587 */
588 struct EditDist3Config {
589   int nLang;             /* Number of language IDs.  Size of a[] */
590   EditDist3Lang *a;      /* One for each distinct language ID */
591 };
592 
593 /*
594 ** Extra information about each character in the FROM string.
595 */
596 struct EditDist3From {
597   int nSubst;              /* Number of substitution cost entries */
598   int nDel;                /* Number of deletion cost entries */
599   int nByte;               /* Number of bytes in this character */
600   EditDist3Cost **apSubst; /* Array of substitution costs for this element */
601   EditDist3Cost **apDel;   /* Array of deletion cost entries */
602 };
603 
604 /*
605 ** A precompiled FROM string.
606 *
607 ** In the common case we expect the FROM string to be reused multiple times.
608 ** In other words, the common case will be to measure the edit distance
609 ** from a single origin string to multiple target strings.
610 */
611 struct EditDist3FromString {
612   char *z;                 /* The complete text of the FROM string */
613   int n;                   /* Number of characters in the FROM string */
614   int isPrefix;            /* True if ends with '*' character */
615   EditDist3From *a;        /* Extra info about each char of the FROM string */
616 };
617 
618 /*
619 ** Extra information about each character in the TO string.
620 */
621 struct EditDist3To {
622   int nIns;                /* Number of insertion cost entries */
623   int nByte;               /* Number of bytes in this character */
624   EditDist3Cost **apIns;   /* Array of deletion cost entries */
625 };
626 
627 /*
628 ** A precompiled FROM string
629 */
630 struct EditDist3ToString {
631   char *z;                 /* The complete text of the TO string */
632   int n;                   /* Number of characters in the TO string */
633   EditDist3To *a;          /* Extra info about each char of the TO string */
634 };
635 
636 /*
637 ** Clear or delete an instance of the object that records all edit-distance
638 ** weights.
639 */
640 static void editDist3ConfigClear(EditDist3Config *p){
641   int i;
642   if( p==0 ) return;
643   for(i=0; i<p->nLang; i++){
644     EditDist3Cost *pCost, *pNext;
645     pCost = p->a[i].pCost;
646     while( pCost ){
647       pNext = pCost->pNext;
648       sqlite3_free(pCost);
649       pCost = pNext;
650     }
651   }
652   sqlite3_free(p->a);
653   memset(p, 0, sizeof(*p));
654 }
655 static void editDist3ConfigDelete(void *pIn){
656   EditDist3Config *p = (EditDist3Config*)pIn;
657   editDist3ConfigClear(p);
658   sqlite3_free(p);
659 }
660 
661 /* Compare the FROM values of two EditDist3Cost objects, for sorting.
662 ** Return negative, zero, or positive if the A is less than, equal to,
663 ** or greater than B.
664 */
665 static int editDist3CostCompare(EditDist3Cost *pA, EditDist3Cost *pB){
666   int n = pA->nFrom;
667   int rc;
668   if( n>pB->nFrom ) n = pB->nFrom;
669   rc = strncmp(pA->a, pB->a, n);
670   if( rc==0 ) rc = pA->nFrom - pB->nFrom;
671   return rc;
672 }
673 
674 /*
675 ** Merge together two sorted lists of EditDist3Cost objects, in order
676 ** of increasing FROM.
677 */
678 static EditDist3Cost *editDist3CostMerge(
679   EditDist3Cost *pA,
680   EditDist3Cost *pB
681 ){
682   EditDist3Cost *pHead = 0;
683   EditDist3Cost **ppTail = &pHead;
684   EditDist3Cost *p;
685   while( pA && pB ){
686     if( editDist3CostCompare(pA,pB)<=0 ){
687       p = pA;
688       pA = pA->pNext;
689     }else{
690       p = pB;
691       pB = pB->pNext;
692     }
693     *ppTail = p;
694     ppTail =  &p->pNext;
695   }
696   if( pA ){
697     *ppTail = pA;
698   }else{
699     *ppTail = pB;
700   }
701   return pHead;
702 }
703 
704 /*
705 ** Sort a list of EditDist3Cost objects into order of increasing FROM
706 */
707 static EditDist3Cost *editDist3CostSort(EditDist3Cost *pList){
708   EditDist3Cost *ap[60], *p;
709   int i;
710   int mx = 0;
711   ap[0] = 0;
712   ap[1] = 0;
713   while( pList ){
714     p = pList;
715     pList = p->pNext;
716     p->pNext = 0;
717     for(i=0; ap[i]; i++){
718       p = editDist3CostMerge(ap[i],p);
719       ap[i] = 0;
720     }
721     ap[i] = p;
722     if( i>mx ){
723       mx = i;
724       ap[i+1] = 0;
725     }
726   }
727   p = 0;
728   for(i=0; i<=mx; i++){
729     if( ap[i] ) p = editDist3CostMerge(p,ap[i]);
730   }
731   return p;
732 }
733 
734 /*
735 ** Load all edit-distance weights from a table.
736 */
737 static int editDist3ConfigLoad(
738   EditDist3Config *p,      /* The edit distance configuration to load */
739   sqlite3 *db,            /* Load from this database */
740   const char *zTable      /* Name of the table from which to load */
741 ){
742   sqlite3_stmt *pStmt;
743   int rc, rc2;
744   char *zSql;
745   int iLangPrev = -9999;
746   EditDist3Lang *pLang = 0;
747 
748   zSql = sqlite3_mprintf("SELECT iLang, cFrom, cTo, iCost"
749                          " FROM \"%w\" WHERE iLang>=0 ORDER BY iLang", zTable);
750   if( zSql==0 ) return SQLITE_NOMEM;
751   rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
752   sqlite3_free(zSql);
753   if( rc ) return rc;
754   editDist3ConfigClear(p);
755   while( sqlite3_step(pStmt)==SQLITE_ROW ){
756     int iLang = sqlite3_column_int(pStmt, 0);
757     const char *zFrom = (const char*)sqlite3_column_text(pStmt, 1);
758     int nFrom = zFrom ? sqlite3_column_bytes(pStmt, 1) : 0;
759     const char *zTo = (const char*)sqlite3_column_text(pStmt, 2);
760     int nTo = zTo ? sqlite3_column_bytes(pStmt, 2) : 0;
761     int iCost = sqlite3_column_int(pStmt, 3);
762 
763     assert( zFrom!=0 || nFrom==0 );
764     assert( zTo!=0 || nTo==0 );
765     if( nFrom>100 || nTo>100 ) continue;
766     if( iCost<0 ) continue;
767     if( iCost>=10000 ) continue;  /* Costs above 10K are considered infinite */
768     if( pLang==0 || iLang!=iLangPrev ){
769       EditDist3Lang *pNew;
770       pNew = sqlite3_realloc64(p->a, (p->nLang+1)*sizeof(p->a[0]));
771       if( pNew==0 ){ rc = SQLITE_NOMEM; break; }
772       p->a = pNew;
773       pLang = &p->a[p->nLang];
774       p->nLang++;
775       pLang->iLang = iLang;
776       pLang->iInsCost = 100;
777       pLang->iDelCost = 100;
778       pLang->iSubCost = 150;
779       pLang->pCost = 0;
780       iLangPrev = iLang;
781     }
782     if( nFrom==1 && zFrom[0]=='?' && nTo==0 ){
783       pLang->iDelCost = iCost;
784     }else if( nFrom==0 && nTo==1 && zTo[0]=='?' ){
785       pLang->iInsCost = iCost;
786     }else if( nFrom==1 && nTo==1 && zFrom[0]=='?' && zTo[0]=='?' ){
787       pLang->iSubCost = iCost;
788     }else{
789       EditDist3Cost *pCost;
790       int nExtra = nFrom + nTo - 4;
791       if( nExtra<0 ) nExtra = 0;
792       pCost = sqlite3_malloc64( sizeof(*pCost) + nExtra );
793       if( pCost==0 ){ rc = SQLITE_NOMEM; break; }
794       pCost->nFrom = (u8)nFrom;
795       pCost->nTo = (u8)nTo;
796       pCost->iCost = (u16)iCost;
797       memcpy(pCost->a, zFrom, nFrom);
798       memcpy(pCost->a + nFrom, zTo, nTo);
799       pCost->pNext = pLang->pCost;
800       pLang->pCost = pCost;
801     }
802   }
803   rc2 = sqlite3_finalize(pStmt);
804   if( rc==SQLITE_OK ) rc = rc2;
805   if( rc==SQLITE_OK ){
806     int iLang;
807     for(iLang=0; iLang<p->nLang; iLang++){
808       p->a[iLang].pCost = editDist3CostSort(p->a[iLang].pCost);
809     }
810   }
811   return rc;
812 }
813 
814 /*
815 ** Return the length (in bytes) of a utf-8 character.  Or return a maximum
816 ** of N.
817 */
818 static int utf8Len(unsigned char c, int N){
819   int len = 1;
820   if( c>0x7f ){
821     if( (c&0xe0)==0xc0 ){
822       len = 2;
823     }else if( (c&0xf0)==0xe0 ){
824       len = 3;
825     }else{
826       len = 4;
827     }
828   }
829   if( len>N ) len = N;
830   return len;
831 }
832 
833 /*
834 ** Return TRUE (non-zero) if the To side of the given cost matches
835 ** the given string.
836 */
837 static int matchTo(EditDist3Cost *p, const char *z, int n){
838   assert( n>0 );
839   if( p->a[p->nFrom]!=z[0] ) return 0;
840   if( p->nTo>n ) return 0;
841   if( strncmp(p->a+p->nFrom, z, p->nTo)!=0 ) return 0;
842   return 1;
843 }
844 
845 /*
846 ** Return TRUE (non-zero) if the From side of the given cost matches
847 ** the given string.
848 */
849 static int matchFrom(EditDist3Cost *p, const char *z, int n){
850   assert( p->nFrom<=n );
851   if( p->nFrom ){
852     if( p->a[0]!=z[0] ) return 0;
853     if( strncmp(p->a, z, p->nFrom)!=0 ) return 0;
854   }
855   return 1;
856 }
857 
858 /*
859 ** Return TRUE (non-zero) of the next FROM character and the next TO
860 ** character are the same.
861 */
862 static int matchFromTo(
863   EditDist3FromString *pStr,  /* Left hand string */
864   int n1,                     /* Index of comparison character on the left */
865   const char *z2,             /* Right-handl comparison character */
866   int n2                      /* Bytes remaining in z2[] */
867 ){
868   int b1 = pStr->a[n1].nByte;
869   if( b1>n2 ) return 0;
870   assert( b1>0 );
871   if( pStr->z[n1]!=z2[0] ) return 0;
872   if( strncmp(pStr->z+n1, z2, b1)!=0 ) return 0;
873   return 1;
874 }
875 
876 /*
877 ** Delete an EditDist3FromString objecct
878 */
879 static void editDist3FromStringDelete(EditDist3FromString *p){
880   int i;
881   if( p ){
882     for(i=0; i<p->n; i++){
883       sqlite3_free(p->a[i].apDel);
884       sqlite3_free(p->a[i].apSubst);
885     }
886     sqlite3_free(p);
887   }
888 }
889 
890 /*
891 ** Create a EditDist3FromString object.
892 */
893 static EditDist3FromString *editDist3FromStringNew(
894   const EditDist3Lang *pLang,
895   const char *z,
896   int n
897 ){
898   EditDist3FromString *pStr;
899   EditDist3Cost *p;
900   int i;
901 
902   if( z==0 ) return 0;
903   if( n<0 ) n = (int)strlen(z);
904   pStr = sqlite3_malloc64( sizeof(*pStr) + sizeof(pStr->a[0])*n + n + 1 );
905   if( pStr==0 ) return 0;
906   pStr->a = (EditDist3From*)&pStr[1];
907   memset(pStr->a, 0, sizeof(pStr->a[0])*n);
908   pStr->n = n;
909   pStr->z = (char*)&pStr->a[n];
910   memcpy(pStr->z, z, n+1);
911   if( n && z[n-1]=='*' ){
912     pStr->isPrefix = 1;
913     n--;
914     pStr->n--;
915     pStr->z[n] = 0;
916   }else{
917     pStr->isPrefix = 0;
918   }
919 
920   for(i=0; i<n; i++){
921     EditDist3From *pFrom = &pStr->a[i];
922     memset(pFrom, 0, sizeof(*pFrom));
923     pFrom->nByte = utf8Len((unsigned char)z[i], n-i);
924     for(p=pLang->pCost; p; p=p->pNext){
925       EditDist3Cost **apNew;
926       if( i+p->nFrom>n ) continue;
927       if( matchFrom(p, z+i, n-i)==0 ) continue;
928       if( p->nTo==0 ){
929         apNew = sqlite3_realloc64(pFrom->apDel,
930                                 sizeof(*apNew)*(pFrom->nDel+1));
931         if( apNew==0 ) break;
932         pFrom->apDel = apNew;
933         apNew[pFrom->nDel++] = p;
934       }else{
935         apNew = sqlite3_realloc64(pFrom->apSubst,
936                                 sizeof(*apNew)*(pFrom->nSubst+1));
937         if( apNew==0 ) break;
938         pFrom->apSubst = apNew;
939         apNew[pFrom->nSubst++] = p;
940       }
941     }
942     if( p ){
943       editDist3FromStringDelete(pStr);
944       pStr = 0;
945       break;
946     }
947   }
948   return pStr;
949 }
950 
951 /*
952 ** Update entry m[i] such that it is the minimum of its current value
953 ** and m[j]+iCost.
954 */
955 static void updateCost(
956   unsigned int *m,
957   int i,
958   int j,
959   int iCost
960 ){
961   unsigned int b;
962   assert( iCost>=0 );
963   assert( iCost<10000 );
964   b = m[j] + iCost;
965   if( b<m[i] ) m[i] = b;
966 }
967 
968 /*
969 ** How much stack space (int bytes) to use for Wagner matrix in
970 ** editDist3Core().  If more space than this is required, the entire
971 ** matrix is taken from the heap.  To reduce the load on the memory
972 ** allocator, make this value as large as practical for the
973 ** architecture in use.
974 */
975 #ifndef SQLITE_SPELLFIX_STACKALLOC_SZ
976 # define SQLITE_SPELLFIX_STACKALLOC_SZ  (1024)
977 #endif
978 
979 /* Compute the edit distance between two strings.
980 **
981 ** If an error occurs, return a negative number which is the error code.
982 **
983 ** If pnMatch is not NULL, then *pnMatch is set to the number of characters
984 ** (not bytes) in z2 that matched the search pattern in *pFrom. If pFrom does
985 ** not contain the pattern for a prefix-search, then this is always the number
986 ** of characters in z2. If pFrom does contain a prefix search pattern, then
987 ** it is the number of characters in the prefix of z2 that was deemed to
988 ** match pFrom.
989 */
990 static int editDist3Core(
991   EditDist3FromString *pFrom,  /* The FROM string */
992   const char *z2,              /* The TO string */
993   int n2,                      /* Length of the TO string */
994   const EditDist3Lang *pLang,  /* Edit weights for a particular language ID */
995   int *pnMatch                 /* OUT: Characters in matched prefix */
996 ){
997   int k, n;
998   int i1, b1;
999   int i2, b2;
1000   EditDist3FromString f = *pFrom;
1001   EditDist3To *a2;
1002   unsigned int *m;
1003   unsigned int *pToFree;
1004   int szRow;
1005   EditDist3Cost *p;
1006   int res;
1007   sqlite3_uint64 nByte;
1008   unsigned int stackSpace[SQLITE_SPELLFIX_STACKALLOC_SZ/sizeof(unsigned int)];
1009 
1010   /* allocate the Wagner matrix and the aTo[] array for the TO string */
1011   n = (f.n+1)*(n2+1);
1012   n = (n+1)&~1;
1013   nByte = n*sizeof(m[0]) + sizeof(a2[0])*n2;
1014   if( nByte<=sizeof(stackSpace) ){
1015     m = stackSpace;
1016     pToFree = 0;
1017   }else{
1018     m = pToFree = sqlite3_malloc64( nByte );
1019     if( m==0 ) return -1;            /* Out of memory */
1020   }
1021   a2 = (EditDist3To*)&m[n];
1022   memset(a2, 0, sizeof(a2[0])*n2);
1023 
1024   /* Fill in the a1[] matrix for all characters of the TO string */
1025   for(i2=0; i2<n2; i2++){
1026     a2[i2].nByte = utf8Len((unsigned char)z2[i2], n2-i2);
1027     for(p=pLang->pCost; p; p=p->pNext){
1028       EditDist3Cost **apNew;
1029       if( p->nFrom>0 ) break;
1030       if( i2+p->nTo>n2 ) continue;
1031       if( p->a[0]>z2[i2] ) break;
1032       if( matchTo(p, z2+i2, n2-i2)==0 ) continue;
1033       a2[i2].nIns++;
1034       apNew = sqlite3_realloc64(a2[i2].apIns, sizeof(*apNew)*a2[i2].nIns);
1035       if( apNew==0 ){
1036         res = -1;  /* Out of memory */
1037         goto editDist3Abort;
1038       }
1039       a2[i2].apIns = apNew;
1040       a2[i2].apIns[a2[i2].nIns-1] = p;
1041     }
1042   }
1043 
1044   /* Prepare to compute the minimum edit distance */
1045   szRow = f.n+1;
1046   memset(m, 0x01, (n2+1)*szRow*sizeof(m[0]));
1047   m[0] = 0;
1048 
1049   /* First fill in the top-row of the matrix with FROM deletion costs */
1050   for(i1=0; i1<f.n; i1 += b1){
1051     b1 = f.a[i1].nByte;
1052     updateCost(m, i1+b1, i1, pLang->iDelCost);
1053     for(k=0; k<f.a[i1].nDel; k++){
1054       p = f.a[i1].apDel[k];
1055       updateCost(m, i1+p->nFrom, i1, p->iCost);
1056     }
1057   }
1058 
1059   /* Fill in all subsequent rows, top-to-bottom, left-to-right */
1060   for(i2=0; i2<n2; i2 += b2){
1061     int rx;      /* Starting index for current row */
1062     int rxp;     /* Starting index for previous row */
1063     b2 = a2[i2].nByte;
1064     rx = szRow*(i2+b2);
1065     rxp = szRow*i2;
1066     updateCost(m, rx, rxp, pLang->iInsCost);
1067     for(k=0; k<a2[i2].nIns; k++){
1068       p = a2[i2].apIns[k];
1069       updateCost(m, szRow*(i2+p->nTo), rxp, p->iCost);
1070     }
1071     for(i1=0; i1<f.n; i1+=b1){
1072       int cx;    /* Index of current cell */
1073       int cxp;   /* Index of cell immediately to the left */
1074       int cxd;   /* Index of cell to the left and one row above */
1075       int cxu;   /* Index of cell immediately above */
1076       b1 = f.a[i1].nByte;
1077       cxp = rx + i1;
1078       cx = cxp + b1;
1079       cxd = rxp + i1;
1080       cxu = cxd + b1;
1081       updateCost(m, cx, cxp, pLang->iDelCost);
1082       for(k=0; k<f.a[i1].nDel; k++){
1083         p = f.a[i1].apDel[k];
1084         updateCost(m, cxp+p->nFrom, cxp, p->iCost);
1085       }
1086       updateCost(m, cx, cxu, pLang->iInsCost);
1087       if( matchFromTo(&f, i1, z2+i2, n2-i2) ){
1088         updateCost(m, cx, cxd, 0);
1089       }
1090       updateCost(m, cx, cxd, pLang->iSubCost);
1091       for(k=0; k<f.a[i1].nSubst; k++){
1092         p = f.a[i1].apSubst[k];
1093         if( matchTo(p, z2+i2, n2-i2) ){
1094           updateCost(m, cxd+p->nFrom+szRow*p->nTo, cxd, p->iCost);
1095         }
1096       }
1097     }
1098   }
1099 
1100 #if 0  /* Enable for debugging */
1101   printf("         ^");
1102   for(i1=0; i1<f.n; i1++) printf(" %c-%2x", f.z[i1], f.z[i1]&0xff);
1103   printf("\n   ^:");
1104   for(i1=0; i1<szRow; i1++){
1105     int v = m[i1];
1106     if( v>9999 ) printf(" ****");
1107     else         printf(" %4d", v);
1108   }
1109   printf("\n");
1110   for(i2=0; i2<n2; i2++){
1111     printf("%c-%02x:", z2[i2], z2[i2]&0xff);
1112     for(i1=0; i1<szRow; i1++){
1113       int v = m[(i2+1)*szRow+i1];
1114       if( v>9999 ) printf(" ****");
1115       else         printf(" %4d", v);
1116     }
1117     printf("\n");
1118   }
1119 #endif
1120 
1121   /* Free memory allocations and return the result */
1122   res = (int)m[szRow*(n2+1)-1];
1123   n = n2;
1124   if( f.isPrefix ){
1125     for(i2=1; i2<=n2; i2++){
1126       int b = m[szRow*i2-1];
1127       if( b<=res ){
1128         res = b;
1129         n = i2 - 1;
1130       }
1131     }
1132   }
1133   if( pnMatch ){
1134     int nExtra = 0;
1135     for(k=0; k<n; k++){
1136       if( (z2[k] & 0xc0)==0x80 ) nExtra++;
1137     }
1138     *pnMatch = n - nExtra;
1139   }
1140 
1141 editDist3Abort:
1142   for(i2=0; i2<n2; i2++) sqlite3_free(a2[i2].apIns);
1143   sqlite3_free(pToFree);
1144   return res;
1145 }
1146 
1147 /*
1148 ** Get an appropriate EditDist3Lang object.
1149 */
1150 static const EditDist3Lang *editDist3FindLang(
1151   EditDist3Config *pConfig,
1152   int iLang
1153 ){
1154   int i;
1155   for(i=0; i<pConfig->nLang; i++){
1156     if( pConfig->a[i].iLang==iLang ) return &pConfig->a[i];
1157   }
1158   return &editDist3Lang;
1159 }
1160 
1161 /*
1162 ** Function:    editdist3(A,B,iLang)
1163 **              editdist3(tablename)
1164 **
1165 ** Return the cost of transforming string A into string B using edit
1166 ** weights for iLang.
1167 **
1168 ** The second form loads edit weights into memory from a table.
1169 */
1170 static void editDist3SqlFunc(
1171   sqlite3_context *context,
1172   int argc,
1173   sqlite3_value **argv
1174 ){
1175   EditDist3Config *pConfig = (EditDist3Config*)sqlite3_user_data(context);
1176   sqlite3 *db = sqlite3_context_db_handle(context);
1177   int rc;
1178   if( argc==1 ){
1179     const char *zTable = (const char*)sqlite3_value_text(argv[0]);
1180     rc = editDist3ConfigLoad(pConfig, db, zTable);
1181     if( rc ) sqlite3_result_error_code(context, rc);
1182   }else{
1183     const char *zA = (const char*)sqlite3_value_text(argv[0]);
1184     const char *zB = (const char*)sqlite3_value_text(argv[1]);
1185     int nA = sqlite3_value_bytes(argv[0]);
1186     int nB = sqlite3_value_bytes(argv[1]);
1187     int iLang = argc==3 ? sqlite3_value_int(argv[2]) : 0;
1188     const EditDist3Lang *pLang = editDist3FindLang(pConfig, iLang);
1189     EditDist3FromString *pFrom;
1190     int dist;
1191 
1192     pFrom = editDist3FromStringNew(pLang, zA, nA);
1193     if( pFrom==0 ){
1194       sqlite3_result_error_nomem(context);
1195       return;
1196     }
1197     dist = editDist3Core(pFrom, zB, nB, pLang, 0);
1198     editDist3FromStringDelete(pFrom);
1199     if( dist==(-1) ){
1200       sqlite3_result_error_nomem(context);
1201     }else{
1202       sqlite3_result_int(context, dist);
1203     }
1204   }
1205 }
1206 
1207 /*
1208 ** Register the editDist3 function with SQLite
1209 */
1210 static int editDist3Install(sqlite3 *db){
1211   int rc;
1212   EditDist3Config *pConfig = sqlite3_malloc64( sizeof(*pConfig) );
1213   if( pConfig==0 ) return SQLITE_NOMEM;
1214   memset(pConfig, 0, sizeof(*pConfig));
1215   rc = sqlite3_create_function_v2(db, "editdist3",
1216               2, SQLITE_UTF8|SQLITE_DETERMINISTIC, pConfig,
1217               editDist3SqlFunc, 0, 0, 0);
1218   if( rc==SQLITE_OK ){
1219     rc = sqlite3_create_function_v2(db, "editdist3",
1220                 3, SQLITE_UTF8|SQLITE_DETERMINISTIC, pConfig,
1221                 editDist3SqlFunc, 0, 0, 0);
1222   }
1223   if( rc==SQLITE_OK ){
1224     rc = sqlite3_create_function_v2(db, "editdist3",
1225                 1, SQLITE_UTF8|SQLITE_DETERMINISTIC, pConfig,
1226                 editDist3SqlFunc, 0, 0, editDist3ConfigDelete);
1227   }else{
1228     sqlite3_free(pConfig);
1229   }
1230   return rc;
1231 }
1232 /* End configurable cost unicode edit distance routines
1233 ******************************************************************************
1234 ******************************************************************************
1235 ** Begin transliterate unicode-to-ascii implementation
1236 */
1237 
1238 #if !SQLITE_AMALGAMATION
1239 /*
1240 ** This lookup table is used to help decode the first byte of
1241 ** a multi-byte UTF8 character.
1242 */
1243 static const unsigned char sqlite3Utf8Trans1[] = {
1244   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1245   0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1246   0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
1247   0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
1248   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1249   0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1250   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1251   0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
1252 };
1253 #endif
1254 
1255 /*
1256 ** Return the value of the first UTF-8 character in the string.
1257 */
1258 static int utf8Read(const unsigned char *z, int n, int *pSize){
1259   int c, i;
1260 
1261   /* All callers to this routine (in the current implementation)
1262   ** always have n>0. */
1263   if( NEVER(n==0) ){
1264     c = i = 0;
1265   }else{
1266     c = z[0];
1267     i = 1;
1268     if( c>=0xc0 ){
1269       c = sqlite3Utf8Trans1[c-0xc0];
1270       while( i<n && (z[i] & 0xc0)==0x80 ){
1271         c = (c<<6) + (0x3f & z[i++]);
1272       }
1273     }
1274   }
1275   *pSize = i;
1276   return c;
1277 }
1278 
1279 /*
1280 ** Return the number of characters in the utf-8 string in the nIn byte
1281 ** buffer pointed to by zIn.
1282 */
1283 static int utf8Charlen(const char *zIn, int nIn){
1284   int i;
1285   int nChar = 0;
1286   for(i=0; i<nIn; nChar++){
1287     int sz;
1288     utf8Read((const unsigned char *)&zIn[i], nIn-i, &sz);
1289     i += sz;
1290   }
1291   return nChar;
1292 }
1293 
1294 typedef struct Transliteration Transliteration;
1295 struct Transliteration {
1296  unsigned short int cFrom;
1297  unsigned char cTo0, cTo1, cTo2, cTo3;
1298 #ifdef SQLITE_SPELLFIX_5BYTE_MAPPINGS
1299  unsigned char cTo4;
1300 #endif
1301 };
1302 
1303 /*
1304 ** Table of translations from unicode characters into ASCII.
1305 */
1306 static const Transliteration translit[] = {
1307   { 0x00A0,  0x20, 0x00, 0x00, 0x00 },  /*   to   */
1308   { 0x00B5,  0x75, 0x00, 0x00, 0x00 },  /* µ to u */
1309   { 0x00C0,  0x41, 0x00, 0x00, 0x00 },  /* À to A */
1310   { 0x00C1,  0x41, 0x00, 0x00, 0x00 },  /* Á to A */
1311   { 0x00C2,  0x41, 0x00, 0x00, 0x00 },  /* Â to A */
1312   { 0x00C3,  0x41, 0x00, 0x00, 0x00 },  /* Ã to A */
1313   { 0x00C4,  0x41, 0x65, 0x00, 0x00 },  /* Ä to Ae */
1314   { 0x00C5,  0x41, 0x61, 0x00, 0x00 },  /* Å to Aa */
1315   { 0x00C6,  0x41, 0x45, 0x00, 0x00 },  /* Æ to AE */
1316   { 0x00C7,  0x43, 0x00, 0x00, 0x00 },  /* Ç to C */
1317   { 0x00C8,  0x45, 0x00, 0x00, 0x00 },  /* È to E */
1318   { 0x00C9,  0x45, 0x00, 0x00, 0x00 },  /* É to E */
1319   { 0x00CA,  0x45, 0x00, 0x00, 0x00 },  /* Ê to E */
1320   { 0x00CB,  0x45, 0x00, 0x00, 0x00 },  /* Ë to E */
1321   { 0x00CC,  0x49, 0x00, 0x00, 0x00 },  /* Ì to I */
1322   { 0x00CD,  0x49, 0x00, 0x00, 0x00 },  /* Í to I */
1323   { 0x00CE,  0x49, 0x00, 0x00, 0x00 },  /* Î to I */
1324   { 0x00CF,  0x49, 0x00, 0x00, 0x00 },  /* Ï to I */
1325   { 0x00D0,  0x44, 0x00, 0x00, 0x00 },  /* Ð to D */
1326   { 0x00D1,  0x4E, 0x00, 0x00, 0x00 },  /* Ñ to N */
1327   { 0x00D2,  0x4F, 0x00, 0x00, 0x00 },  /* Ò to O */
1328   { 0x00D3,  0x4F, 0x00, 0x00, 0x00 },  /* Ó to O */
1329   { 0x00D4,  0x4F, 0x00, 0x00, 0x00 },  /* Ô to O */
1330   { 0x00D5,  0x4F, 0x00, 0x00, 0x00 },  /* Õ to O */
1331   { 0x00D6,  0x4F, 0x65, 0x00, 0x00 },  /* Ö to Oe */
1332   { 0x00D7,  0x78, 0x00, 0x00, 0x00 },  /* × to x */
1333   { 0x00D8,  0x4F, 0x00, 0x00, 0x00 },  /* Ø to O */
1334   { 0x00D9,  0x55, 0x00, 0x00, 0x00 },  /* Ù to U */
1335   { 0x00DA,  0x55, 0x00, 0x00, 0x00 },  /* Ú to U */
1336   { 0x00DB,  0x55, 0x00, 0x00, 0x00 },  /* Û to U */
1337   { 0x00DC,  0x55, 0x65, 0x00, 0x00 },  /* Ü to Ue */
1338   { 0x00DD,  0x59, 0x00, 0x00, 0x00 },  /* Ý to Y */
1339   { 0x00DE,  0x54, 0x68, 0x00, 0x00 },  /* Þ to Th */
1340   { 0x00DF,  0x73, 0x73, 0x00, 0x00 },  /* ß to ss */
1341   { 0x00E0,  0x61, 0x00, 0x00, 0x00 },  /* à to a */
1342   { 0x00E1,  0x61, 0x00, 0x00, 0x00 },  /* á to a */
1343   { 0x00E2,  0x61, 0x00, 0x00, 0x00 },  /* â to a */
1344   { 0x00E3,  0x61, 0x00, 0x00, 0x00 },  /* ã to a */
1345   { 0x00E4,  0x61, 0x65, 0x00, 0x00 },  /* ä to ae */
1346   { 0x00E5,  0x61, 0x61, 0x00, 0x00 },  /* å to aa */
1347   { 0x00E6,  0x61, 0x65, 0x00, 0x00 },  /* æ to ae */
1348   { 0x00E7,  0x63, 0x00, 0x00, 0x00 },  /* ç to c */
1349   { 0x00E8,  0x65, 0x00, 0x00, 0x00 },  /* è to e */
1350   { 0x00E9,  0x65, 0x00, 0x00, 0x00 },  /* é to e */
1351   { 0x00EA,  0x65, 0x00, 0x00, 0x00 },  /* ê to e */
1352   { 0x00EB,  0x65, 0x00, 0x00, 0x00 },  /* ë to e */
1353   { 0x00EC,  0x69, 0x00, 0x00, 0x00 },  /* ì to i */
1354   { 0x00ED,  0x69, 0x00, 0x00, 0x00 },  /* í to i */
1355   { 0x00EE,  0x69, 0x00, 0x00, 0x00 },  /* î to i */
1356   { 0x00EF,  0x69, 0x00, 0x00, 0x00 },  /* ï to i */
1357   { 0x00F0,  0x64, 0x00, 0x00, 0x00 },  /* ð to d */
1358   { 0x00F1,  0x6E, 0x00, 0x00, 0x00 },  /* ñ to n */
1359   { 0x00F2,  0x6F, 0x00, 0x00, 0x00 },  /* ò to o */
1360   { 0x00F3,  0x6F, 0x00, 0x00, 0x00 },  /* ó to o */
1361   { 0x00F4,  0x6F, 0x00, 0x00, 0x00 },  /* ô to o */
1362   { 0x00F5,  0x6F, 0x00, 0x00, 0x00 },  /* õ to o */
1363   { 0x00F6,  0x6F, 0x65, 0x00, 0x00 },  /* ö to oe */
1364   { 0x00F7,  0x3A, 0x00, 0x00, 0x00 },  /* ÷ to : */
1365   { 0x00F8,  0x6F, 0x00, 0x00, 0x00 },  /* ø to o */
1366   { 0x00F9,  0x75, 0x00, 0x00, 0x00 },  /* ù to u */
1367   { 0x00FA,  0x75, 0x00, 0x00, 0x00 },  /* ú to u */
1368   { 0x00FB,  0x75, 0x00, 0x00, 0x00 },  /* û to u */
1369   { 0x00FC,  0x75, 0x65, 0x00, 0x00 },  /* ü to ue */
1370   { 0x00FD,  0x79, 0x00, 0x00, 0x00 },  /* ý to y */
1371   { 0x00FE,  0x74, 0x68, 0x00, 0x00 },  /* þ to th */
1372   { 0x00FF,  0x79, 0x00, 0x00, 0x00 },  /* ÿ to y */
1373   { 0x0100,  0x41, 0x00, 0x00, 0x00 },  /* Ā to A */
1374   { 0x0101,  0x61, 0x00, 0x00, 0x00 },  /* ā to a */
1375   { 0x0102,  0x41, 0x00, 0x00, 0x00 },  /* Ă to A */
1376   { 0x0103,  0x61, 0x00, 0x00, 0x00 },  /* ă to a */
1377   { 0x0104,  0x41, 0x00, 0x00, 0x00 },  /* Ą to A */
1378   { 0x0105,  0x61, 0x00, 0x00, 0x00 },  /* ą to a */
1379   { 0x0106,  0x43, 0x00, 0x00, 0x00 },  /* Ć to C */
1380   { 0x0107,  0x63, 0x00, 0x00, 0x00 },  /* ć to c */
1381   { 0x0108,  0x43, 0x68, 0x00, 0x00 },  /* Ĉ to Ch */
1382   { 0x0109,  0x63, 0x68, 0x00, 0x00 },  /* ĉ to ch */
1383   { 0x010A,  0x43, 0x00, 0x00, 0x00 },  /* Ċ to C */
1384   { 0x010B,  0x63, 0x00, 0x00, 0x00 },  /* ċ to c */
1385   { 0x010C,  0x43, 0x00, 0x00, 0x00 },  /* Č to C */
1386   { 0x010D,  0x63, 0x00, 0x00, 0x00 },  /* č to c */
1387   { 0x010E,  0x44, 0x00, 0x00, 0x00 },  /* Ď to D */
1388   { 0x010F,  0x64, 0x00, 0x00, 0x00 },  /* ď to d */
1389   { 0x0110,  0x44, 0x00, 0x00, 0x00 },  /* Đ to D */
1390   { 0x0111,  0x64, 0x00, 0x00, 0x00 },  /* đ to d */
1391   { 0x0112,  0x45, 0x00, 0x00, 0x00 },  /* Ē to E */
1392   { 0x0113,  0x65, 0x00, 0x00, 0x00 },  /* ē to e */
1393   { 0x0114,  0x45, 0x00, 0x00, 0x00 },  /* Ĕ to E */
1394   { 0x0115,  0x65, 0x00, 0x00, 0x00 },  /* ĕ to e */
1395   { 0x0116,  0x45, 0x00, 0x00, 0x00 },  /* Ė to E */
1396   { 0x0117,  0x65, 0x00, 0x00, 0x00 },  /* ė to e */
1397   { 0x0118,  0x45, 0x00, 0x00, 0x00 },  /* Ę to E */
1398   { 0x0119,  0x65, 0x00, 0x00, 0x00 },  /* ę to e */
1399   { 0x011A,  0x45, 0x00, 0x00, 0x00 },  /* Ě to E */
1400   { 0x011B,  0x65, 0x00, 0x00, 0x00 },  /* ě to e */
1401   { 0x011C,  0x47, 0x68, 0x00, 0x00 },  /* Ĝ to Gh */
1402   { 0x011D,  0x67, 0x68, 0x00, 0x00 },  /* ĝ to gh */
1403   { 0x011E,  0x47, 0x00, 0x00, 0x00 },  /* Ğ to G */
1404   { 0x011F,  0x67, 0x00, 0x00, 0x00 },  /* ğ to g */
1405   { 0x0120,  0x47, 0x00, 0x00, 0x00 },  /* Ġ to G */
1406   { 0x0121,  0x67, 0x00, 0x00, 0x00 },  /* ġ to g */
1407   { 0x0122,  0x47, 0x00, 0x00, 0x00 },  /* Ģ to G */
1408   { 0x0123,  0x67, 0x00, 0x00, 0x00 },  /* ģ to g */
1409   { 0x0124,  0x48, 0x68, 0x00, 0x00 },  /* Ĥ to Hh */
1410   { 0x0125,  0x68, 0x68, 0x00, 0x00 },  /* ĥ to hh */
1411   { 0x0126,  0x48, 0x00, 0x00, 0x00 },  /* Ħ to H */
1412   { 0x0127,  0x68, 0x00, 0x00, 0x00 },  /* ħ to h */
1413   { 0x0128,  0x49, 0x00, 0x00, 0x00 },  /* Ĩ to I */
1414   { 0x0129,  0x69, 0x00, 0x00, 0x00 },  /* ĩ to i */
1415   { 0x012A,  0x49, 0x00, 0x00, 0x00 },  /* Ī to I */
1416   { 0x012B,  0x69, 0x00, 0x00, 0x00 },  /* ī to i */
1417   { 0x012C,  0x49, 0x00, 0x00, 0x00 },  /* Ĭ to I */
1418   { 0x012D,  0x69, 0x00, 0x00, 0x00 },  /* ĭ to i */
1419   { 0x012E,  0x49, 0x00, 0x00, 0x00 },  /* Į to I */
1420   { 0x012F,  0x69, 0x00, 0x00, 0x00 },  /* į to i */
1421   { 0x0130,  0x49, 0x00, 0x00, 0x00 },  /* İ to I */
1422   { 0x0131,  0x69, 0x00, 0x00, 0x00 },  /* ı to i */
1423   { 0x0132,  0x49, 0x4A, 0x00, 0x00 },  /* IJ to IJ */
1424   { 0x0133,  0x69, 0x6A, 0x00, 0x00 },  /* ij to ij */
1425   { 0x0134,  0x4A, 0x68, 0x00, 0x00 },  /* Ĵ to Jh */
1426   { 0x0135,  0x6A, 0x68, 0x00, 0x00 },  /* ĵ to jh */
1427   { 0x0136,  0x4B, 0x00, 0x00, 0x00 },  /* Ķ to K */
1428   { 0x0137,  0x6B, 0x00, 0x00, 0x00 },  /* ķ to k */
1429   { 0x0138,  0x6B, 0x00, 0x00, 0x00 },  /* ĸ to k */
1430   { 0x0139,  0x4C, 0x00, 0x00, 0x00 },  /* Ĺ to L */
1431   { 0x013A,  0x6C, 0x00, 0x00, 0x00 },  /* ĺ to l */
1432   { 0x013B,  0x4C, 0x00, 0x00, 0x00 },  /* Ļ to L */
1433   { 0x013C,  0x6C, 0x00, 0x00, 0x00 },  /* ļ to l */
1434   { 0x013D,  0x4C, 0x00, 0x00, 0x00 },  /* Ľ to L */
1435   { 0x013E,  0x6C, 0x00, 0x00, 0x00 },  /* ľ to l */
1436   { 0x013F,  0x4C, 0x2E, 0x00, 0x00 },  /* Ŀ to L. */
1437   { 0x0140,  0x6C, 0x2E, 0x00, 0x00 },  /* ŀ to l. */
1438   { 0x0141,  0x4C, 0x00, 0x00, 0x00 },  /* Ł to L */
1439   { 0x0142,  0x6C, 0x00, 0x00, 0x00 },  /* ł to l */
1440   { 0x0143,  0x4E, 0x00, 0x00, 0x00 },  /* Ń to N */
1441   { 0x0144,  0x6E, 0x00, 0x00, 0x00 },  /* ń to n */
1442   { 0x0145,  0x4E, 0x00, 0x00, 0x00 },  /* Ņ to N */
1443   { 0x0146,  0x6E, 0x00, 0x00, 0x00 },  /* ņ to n */
1444   { 0x0147,  0x4E, 0x00, 0x00, 0x00 },  /* Ň to N */
1445   { 0x0148,  0x6E, 0x00, 0x00, 0x00 },  /* ň to n */
1446   { 0x0149,  0x27, 0x6E, 0x00, 0x00 },  /* ʼn to 'n */
1447   { 0x014A,  0x4E, 0x47, 0x00, 0x00 },  /* Ŋ to NG */
1448   { 0x014B,  0x6E, 0x67, 0x00, 0x00 },  /* ŋ to ng */
1449   { 0x014C,  0x4F, 0x00, 0x00, 0x00 },  /* Ō to O */
1450   { 0x014D,  0x6F, 0x00, 0x00, 0x00 },  /* ō to o */
1451   { 0x014E,  0x4F, 0x00, 0x00, 0x00 },  /* Ŏ to O */
1452   { 0x014F,  0x6F, 0x00, 0x00, 0x00 },  /* ŏ to o */
1453   { 0x0150,  0x4F, 0x00, 0x00, 0x00 },  /* Ő to O */
1454   { 0x0151,  0x6F, 0x00, 0x00, 0x00 },  /* ő to o */
1455   { 0x0152,  0x4F, 0x45, 0x00, 0x00 },  /* Œ to OE */
1456   { 0x0153,  0x6F, 0x65, 0x00, 0x00 },  /* œ to oe */
1457   { 0x0154,  0x52, 0x00, 0x00, 0x00 },  /* Ŕ to R */
1458   { 0x0155,  0x72, 0x00, 0x00, 0x00 },  /* ŕ to r */
1459   { 0x0156,  0x52, 0x00, 0x00, 0x00 },  /* Ŗ to R */
1460   { 0x0157,  0x72, 0x00, 0x00, 0x00 },  /* ŗ to r */
1461   { 0x0158,  0x52, 0x00, 0x00, 0x00 },  /* Ř to R */
1462   { 0x0159,  0x72, 0x00, 0x00, 0x00 },  /* ř to r */
1463   { 0x015A,  0x53, 0x00, 0x00, 0x00 },  /* Ś to S */
1464   { 0x015B,  0x73, 0x00, 0x00, 0x00 },  /* ś to s */
1465   { 0x015C,  0x53, 0x68, 0x00, 0x00 },  /* Ŝ to Sh */
1466   { 0x015D,  0x73, 0x68, 0x00, 0x00 },  /* ŝ to sh */
1467   { 0x015E,  0x53, 0x00, 0x00, 0x00 },  /* Ş to S */
1468   { 0x015F,  0x73, 0x00, 0x00, 0x00 },  /* ş to s */
1469   { 0x0160,  0x53, 0x00, 0x00, 0x00 },  /* Š to S */
1470   { 0x0161,  0x73, 0x00, 0x00, 0x00 },  /* š to s */
1471   { 0x0162,  0x54, 0x00, 0x00, 0x00 },  /* Ţ to T */
1472   { 0x0163,  0x74, 0x00, 0x00, 0x00 },  /* ţ to t */
1473   { 0x0164,  0x54, 0x00, 0x00, 0x00 },  /* Ť to T */
1474   { 0x0165,  0x74, 0x00, 0x00, 0x00 },  /* ť to t */
1475   { 0x0166,  0x54, 0x00, 0x00, 0x00 },  /* Ŧ to T */
1476   { 0x0167,  0x74, 0x00, 0x00, 0x00 },  /* ŧ to t */
1477   { 0x0168,  0x55, 0x00, 0x00, 0x00 },  /* Ũ to U */
1478   { 0x0169,  0x75, 0x00, 0x00, 0x00 },  /* ũ to u */
1479   { 0x016A,  0x55, 0x00, 0x00, 0x00 },  /* Ū to U */
1480   { 0x016B,  0x75, 0x00, 0x00, 0x00 },  /* ū to u */
1481   { 0x016C,  0x55, 0x00, 0x00, 0x00 },  /* Ŭ to U */
1482   { 0x016D,  0x75, 0x00, 0x00, 0x00 },  /* ŭ to u */
1483   { 0x016E,  0x55, 0x00, 0x00, 0x00 },  /* Ů to U */
1484   { 0x016F,  0x75, 0x00, 0x00, 0x00 },  /* ů to u */
1485   { 0x0170,  0x55, 0x00, 0x00, 0x00 },  /* Ű to U */
1486   { 0x0171,  0x75, 0x00, 0x00, 0x00 },  /* ű to u */
1487   { 0x0172,  0x55, 0x00, 0x00, 0x00 },  /* Ų to U */
1488   { 0x0173,  0x75, 0x00, 0x00, 0x00 },  /* ų to u */
1489   { 0x0174,  0x57, 0x00, 0x00, 0x00 },  /* Ŵ to W */
1490   { 0x0175,  0x77, 0x00, 0x00, 0x00 },  /* ŵ to w */
1491   { 0x0176,  0x59, 0x00, 0x00, 0x00 },  /* Ŷ to Y */
1492   { 0x0177,  0x79, 0x00, 0x00, 0x00 },  /* ŷ to y */
1493   { 0x0178,  0x59, 0x00, 0x00, 0x00 },  /* Ÿ to Y */
1494   { 0x0179,  0x5A, 0x00, 0x00, 0x00 },  /* Ź to Z */
1495   { 0x017A,  0x7A, 0x00, 0x00, 0x00 },  /* ź to z */
1496   { 0x017B,  0x5A, 0x00, 0x00, 0x00 },  /* Ż to Z */
1497   { 0x017C,  0x7A, 0x00, 0x00, 0x00 },  /* ż to z */
1498   { 0x017D,  0x5A, 0x00, 0x00, 0x00 },  /* Ž to Z */
1499   { 0x017E,  0x7A, 0x00, 0x00, 0x00 },  /* ž to z */
1500   { 0x017F,  0x73, 0x00, 0x00, 0x00 },  /* ſ to s */
1501   { 0x0192,  0x66, 0x00, 0x00, 0x00 },  /* ƒ to f */
1502   { 0x0218,  0x53, 0x00, 0x00, 0x00 },  /* Ș to S */
1503   { 0x0219,  0x73, 0x00, 0x00, 0x00 },  /* ș to s */
1504   { 0x021A,  0x54, 0x00, 0x00, 0x00 },  /* Ț to T */
1505   { 0x021B,  0x74, 0x00, 0x00, 0x00 },  /* ț to t */
1506   { 0x0386,  0x41, 0x00, 0x00, 0x00 },  /* Ά to A */
1507   { 0x0388,  0x45, 0x00, 0x00, 0x00 },  /* Έ to E */
1508   { 0x0389,  0x49, 0x00, 0x00, 0x00 },  /* Ή to I */
1509   { 0x038A,  0x49, 0x00, 0x00, 0x00 },  /* Ί to I */
1510   { 0x038C,  0x4f, 0x00, 0x00, 0x00 },  /* Ό to O */
1511   { 0x038E,  0x59, 0x00, 0x00, 0x00 },  /* Ύ to Y */
1512   { 0x038F,  0x4f, 0x00, 0x00, 0x00 },  /* Ώ to O */
1513   { 0x0390,  0x69, 0x00, 0x00, 0x00 },  /* ΐ to i */
1514   { 0x0391,  0x41, 0x00, 0x00, 0x00 },  /* Α to A */
1515   { 0x0392,  0x42, 0x00, 0x00, 0x00 },  /* Β to B */
1516   { 0x0393,  0x47, 0x00, 0x00, 0x00 },  /* Γ to G */
1517   { 0x0394,  0x44, 0x00, 0x00, 0x00 },  /* Δ to D */
1518   { 0x0395,  0x45, 0x00, 0x00, 0x00 },  /* Ε to E */
1519   { 0x0396,  0x5a, 0x00, 0x00, 0x00 },  /* Ζ to Z */
1520   { 0x0397,  0x49, 0x00, 0x00, 0x00 },  /* Η to I */
1521   { 0x0398,  0x54, 0x68, 0x00, 0x00 },  /* Θ to Th */
1522   { 0x0399,  0x49, 0x00, 0x00, 0x00 },  /* Ι to I */
1523   { 0x039A,  0x4b, 0x00, 0x00, 0x00 },  /* Κ to K */
1524   { 0x039B,  0x4c, 0x00, 0x00, 0x00 },  /* Λ to L */
1525   { 0x039C,  0x4d, 0x00, 0x00, 0x00 },  /* Μ to M */
1526   { 0x039D,  0x4e, 0x00, 0x00, 0x00 },  /* Ν to N */
1527   { 0x039E,  0x58, 0x00, 0x00, 0x00 },  /* Ξ to X */
1528   { 0x039F,  0x4f, 0x00, 0x00, 0x00 },  /* Ο to O */
1529   { 0x03A0,  0x50, 0x00, 0x00, 0x00 },  /* Π to P */
1530   { 0x03A1,  0x52, 0x00, 0x00, 0x00 },  /* Ρ to R */
1531   { 0x03A3,  0x53, 0x00, 0x00, 0x00 },  /* Σ to S */
1532   { 0x03A4,  0x54, 0x00, 0x00, 0x00 },  /* Τ to T */
1533   { 0x03A5,  0x59, 0x00, 0x00, 0x00 },  /* Υ to Y */
1534   { 0x03A6,  0x46, 0x00, 0x00, 0x00 },  /* Φ to F */
1535   { 0x03A7,  0x43, 0x68, 0x00, 0x00 },  /* Χ to Ch */
1536   { 0x03A8,  0x50, 0x73, 0x00, 0x00 },  /* Ψ to Ps */
1537   { 0x03A9,  0x4f, 0x00, 0x00, 0x00 },  /* Ω to O */
1538   { 0x03AA,  0x49, 0x00, 0x00, 0x00 },  /* Ϊ to I */
1539   { 0x03AB,  0x59, 0x00, 0x00, 0x00 },  /* Ϋ to Y */
1540   { 0x03AC,  0x61, 0x00, 0x00, 0x00 },  /* ά to a */
1541   { 0x03AD,  0x65, 0x00, 0x00, 0x00 },  /* έ to e */
1542   { 0x03AE,  0x69, 0x00, 0x00, 0x00 },  /* ή to i */
1543   { 0x03AF,  0x69, 0x00, 0x00, 0x00 },  /* ί to i */
1544   { 0x03B1,  0x61, 0x00, 0x00, 0x00 },  /* α to a */
1545   { 0x03B2,  0x62, 0x00, 0x00, 0x00 },  /* β to b */
1546   { 0x03B3,  0x67, 0x00, 0x00, 0x00 },  /* γ to g */
1547   { 0x03B4,  0x64, 0x00, 0x00, 0x00 },  /* δ to d */
1548   { 0x03B5,  0x65, 0x00, 0x00, 0x00 },  /* ε to e */
1549   { 0x03B6,  0x7a, 0x00, 0x00, 0x00 },  /* ζ to z */
1550   { 0x03B7,  0x69, 0x00, 0x00, 0x00 },  /* η to i */
1551   { 0x03B8,  0x74, 0x68, 0x00, 0x00 },  /* θ to th */
1552   { 0x03B9,  0x69, 0x00, 0x00, 0x00 },  /* ι to i */
1553   { 0x03BA,  0x6b, 0x00, 0x00, 0x00 },  /* κ to k */
1554   { 0x03BB,  0x6c, 0x00, 0x00, 0x00 },  /* λ to l */
1555   { 0x03BC,  0x6d, 0x00, 0x00, 0x00 },  /* μ to m */
1556   { 0x03BD,  0x6e, 0x00, 0x00, 0x00 },  /* ν to n */
1557   { 0x03BE,  0x78, 0x00, 0x00, 0x00 },  /* ξ to x */
1558   { 0x03BF,  0x6f, 0x00, 0x00, 0x00 },  /* ο to o */
1559   { 0x03C0,  0x70, 0x00, 0x00, 0x00 },  /* π to p */
1560   { 0x03C1,  0x72, 0x00, 0x00, 0x00 },  /* ρ to r */
1561   { 0x03C3,  0x73, 0x00, 0x00, 0x00 },  /* σ to s */
1562   { 0x03C4,  0x74, 0x00, 0x00, 0x00 },  /* τ to t */
1563   { 0x03C5,  0x79, 0x00, 0x00, 0x00 },  /* υ to y */
1564   { 0x03C6,  0x66, 0x00, 0x00, 0x00 },  /* φ to f */
1565   { 0x03C7,  0x63, 0x68, 0x00, 0x00 },  /* χ to ch */
1566   { 0x03C8,  0x70, 0x73, 0x00, 0x00 },  /* ψ to ps */
1567   { 0x03C9,  0x6f, 0x00, 0x00, 0x00 },  /* ω to o */
1568   { 0x03CA,  0x69, 0x00, 0x00, 0x00 },  /* ϊ to i */
1569   { 0x03CB,  0x79, 0x00, 0x00, 0x00 },  /* ϋ to y */
1570   { 0x03CC,  0x6f, 0x00, 0x00, 0x00 },  /* ό to o */
1571   { 0x03CD,  0x79, 0x00, 0x00, 0x00 },  /* ύ to y */
1572   { 0x03CE,  0x69, 0x00, 0x00, 0x00 },  /* ώ to i */
1573   { 0x0400,  0x45, 0x00, 0x00, 0x00 },  /* Ѐ to E */
1574   { 0x0401,  0x45, 0x00, 0x00, 0x00 },  /* Ё to E */
1575   { 0x0402,  0x44, 0x00, 0x00, 0x00 },  /* Ђ to D */
1576   { 0x0403,  0x47, 0x00, 0x00, 0x00 },  /* Ѓ to G */
1577   { 0x0404,  0x45, 0x00, 0x00, 0x00 },  /* Є to E */
1578   { 0x0405,  0x5a, 0x00, 0x00, 0x00 },  /* Ѕ to Z */
1579   { 0x0406,  0x49, 0x00, 0x00, 0x00 },  /* І to I */
1580   { 0x0407,  0x49, 0x00, 0x00, 0x00 },  /* Ї to I */
1581   { 0x0408,  0x4a, 0x00, 0x00, 0x00 },  /* Ј to J */
1582   { 0x0409,  0x49, 0x00, 0x00, 0x00 },  /* Љ to I */
1583   { 0x040A,  0x4e, 0x00, 0x00, 0x00 },  /* Њ to N */
1584   { 0x040B,  0x44, 0x00, 0x00, 0x00 },  /* Ћ to D */
1585   { 0x040C,  0x4b, 0x00, 0x00, 0x00 },  /* Ќ to K */
1586   { 0x040D,  0x49, 0x00, 0x00, 0x00 },  /* Ѝ to I */
1587   { 0x040E,  0x55, 0x00, 0x00, 0x00 },  /* Ў to U */
1588   { 0x040F,  0x44, 0x00, 0x00, 0x00 },  /* Џ to D */
1589   { 0x0410,  0x41, 0x00, 0x00, 0x00 },  /* А to A */
1590   { 0x0411,  0x42, 0x00, 0x00, 0x00 },  /* Б to B */
1591   { 0x0412,  0x56, 0x00, 0x00, 0x00 },  /* В to V */
1592   { 0x0413,  0x47, 0x00, 0x00, 0x00 },  /* Г to G */
1593   { 0x0414,  0x44, 0x00, 0x00, 0x00 },  /* Д to D */
1594   { 0x0415,  0x45, 0x00, 0x00, 0x00 },  /* Е to E */
1595   { 0x0416,  0x5a, 0x68, 0x00, 0x00 },  /* Ж to Zh */
1596   { 0x0417,  0x5a, 0x00, 0x00, 0x00 },  /* З to Z */
1597   { 0x0418,  0x49, 0x00, 0x00, 0x00 },  /* И to I */
1598   { 0x0419,  0x49, 0x00, 0x00, 0x00 },  /* Й to I */
1599   { 0x041A,  0x4b, 0x00, 0x00, 0x00 },  /* К to K */
1600   { 0x041B,  0x4c, 0x00, 0x00, 0x00 },  /* Л to L */
1601   { 0x041C,  0x4d, 0x00, 0x00, 0x00 },  /* М to M */
1602   { 0x041D,  0x4e, 0x00, 0x00, 0x00 },  /* Н to N */
1603   { 0x041E,  0x4f, 0x00, 0x00, 0x00 },  /* О to O */
1604   { 0x041F,  0x50, 0x00, 0x00, 0x00 },  /* П to P */
1605   { 0x0420,  0x52, 0x00, 0x00, 0x00 },  /* Р to R */
1606   { 0x0421,  0x53, 0x00, 0x00, 0x00 },  /* С to S */
1607   { 0x0422,  0x54, 0x00, 0x00, 0x00 },  /* Т to T */
1608   { 0x0423,  0x55, 0x00, 0x00, 0x00 },  /* У to U */
1609   { 0x0424,  0x46, 0x00, 0x00, 0x00 },  /* Ф to F */
1610   { 0x0425,  0x4b, 0x68, 0x00, 0x00 },  /* Х to Kh */
1611   { 0x0426,  0x54, 0x63, 0x00, 0x00 },  /* Ц to Tc */
1612   { 0x0427,  0x43, 0x68, 0x00, 0x00 },  /* Ч to Ch */
1613   { 0x0428,  0x53, 0x68, 0x00, 0x00 },  /* Ш to Sh */
1614   { 0x0429,  0x53, 0x68, 0x63, 0x68 },  /* Щ to Shch */
1615   { 0x042A,  0x61, 0x00, 0x00, 0x00 },  /*  to A */
1616   { 0x042B,  0x59, 0x00, 0x00, 0x00 },  /* Ы to Y */
1617   { 0x042C,  0x59, 0x00, 0x00, 0x00 },  /*  to Y */
1618   { 0x042D,  0x45, 0x00, 0x00, 0x00 },  /* Э to E */
1619   { 0x042E,  0x49, 0x75, 0x00, 0x00 },  /* Ю to Iu */
1620   { 0x042F,  0x49, 0x61, 0x00, 0x00 },  /* Я to Ia */
1621   { 0x0430,  0x61, 0x00, 0x00, 0x00 },  /* а to a */
1622   { 0x0431,  0x62, 0x00, 0x00, 0x00 },  /* б to b */
1623   { 0x0432,  0x76, 0x00, 0x00, 0x00 },  /* в to v */
1624   { 0x0433,  0x67, 0x00, 0x00, 0x00 },  /* г to g */
1625   { 0x0434,  0x64, 0x00, 0x00, 0x00 },  /* д to d */
1626   { 0x0435,  0x65, 0x00, 0x00, 0x00 },  /* е to e */
1627   { 0x0436,  0x7a, 0x68, 0x00, 0x00 },  /* ж to zh */
1628   { 0x0437,  0x7a, 0x00, 0x00, 0x00 },  /* з to z */
1629   { 0x0438,  0x69, 0x00, 0x00, 0x00 },  /* и to i */
1630   { 0x0439,  0x69, 0x00, 0x00, 0x00 },  /* й to i */
1631   { 0x043A,  0x6b, 0x00, 0x00, 0x00 },  /* к to k */
1632   { 0x043B,  0x6c, 0x00, 0x00, 0x00 },  /* л to l */
1633   { 0x043C,  0x6d, 0x00, 0x00, 0x00 },  /* м to m */
1634   { 0x043D,  0x6e, 0x00, 0x00, 0x00 },  /* н to n */
1635   { 0x043E,  0x6f, 0x00, 0x00, 0x00 },  /* о to o */
1636   { 0x043F,  0x70, 0x00, 0x00, 0x00 },  /* п to p */
1637   { 0x0440,  0x72, 0x00, 0x00, 0x00 },  /* р to r */
1638   { 0x0441,  0x73, 0x00, 0x00, 0x00 },  /* с to s */
1639   { 0x0442,  0x74, 0x00, 0x00, 0x00 },  /* т to t */
1640   { 0x0443,  0x75, 0x00, 0x00, 0x00 },  /* у to u */
1641   { 0x0444,  0x66, 0x00, 0x00, 0x00 },  /* ф to f */
1642   { 0x0445,  0x6b, 0x68, 0x00, 0x00 },  /* х to kh */
1643   { 0x0446,  0x74, 0x63, 0x00, 0x00 },  /* ц to tc */
1644   { 0x0447,  0x63, 0x68, 0x00, 0x00 },  /* ч to ch */
1645   { 0x0448,  0x73, 0x68, 0x00, 0x00 },  /* ш to sh */
1646   { 0x0449,  0x73, 0x68, 0x63, 0x68 },  /* щ to shch */
1647   { 0x044A,  0x61, 0x00, 0x00, 0x00 },  /*  to a */
1648   { 0x044B,  0x79, 0x00, 0x00, 0x00 },  /* ы to y */
1649   { 0x044C,  0x79, 0x00, 0x00, 0x00 },  /*  to y */
1650   { 0x044D,  0x65, 0x00, 0x00, 0x00 },  /* э to e */
1651   { 0x044E,  0x69, 0x75, 0x00, 0x00 },  /* ю to iu */
1652   { 0x044F,  0x69, 0x61, 0x00, 0x00 },  /* я to ia */
1653   { 0x0450,  0x65, 0x00, 0x00, 0x00 },  /* ѐ to e */
1654   { 0x0451,  0x65, 0x00, 0x00, 0x00 },  /* ё to e */
1655   { 0x0452,  0x64, 0x00, 0x00, 0x00 },  /* ђ to d */
1656   { 0x0453,  0x67, 0x00, 0x00, 0x00 },  /* ѓ to g */
1657   { 0x0454,  0x65, 0x00, 0x00, 0x00 },  /* є to e */
1658   { 0x0455,  0x7a, 0x00, 0x00, 0x00 },  /* ѕ to z */
1659   { 0x0456,  0x69, 0x00, 0x00, 0x00 },  /* і to i */
1660   { 0x0457,  0x69, 0x00, 0x00, 0x00 },  /* ї to i */
1661   { 0x0458,  0x6a, 0x00, 0x00, 0x00 },  /* ј to j */
1662   { 0x0459,  0x69, 0x00, 0x00, 0x00 },  /* љ to i */
1663   { 0x045A,  0x6e, 0x00, 0x00, 0x00 },  /* њ to n */
1664   { 0x045B,  0x64, 0x00, 0x00, 0x00 },  /* ћ to d */
1665   { 0x045C,  0x6b, 0x00, 0x00, 0x00 },  /* ќ to k */
1666   { 0x045D,  0x69, 0x00, 0x00, 0x00 },  /* ѝ to i */
1667   { 0x045E,  0x75, 0x00, 0x00, 0x00 },  /* ў to u */
1668   { 0x045F,  0x64, 0x00, 0x00, 0x00 },  /* џ to d */
1669   { 0x1E02,  0x42, 0x00, 0x00, 0x00 },  /* Ḃ to B */
1670   { 0x1E03,  0x62, 0x00, 0x00, 0x00 },  /* ḃ to b */
1671   { 0x1E0A,  0x44, 0x00, 0x00, 0x00 },  /* Ḋ to D */
1672   { 0x1E0B,  0x64, 0x00, 0x00, 0x00 },  /* ḋ to d */
1673   { 0x1E1E,  0x46, 0x00, 0x00, 0x00 },  /* Ḟ to F */
1674   { 0x1E1F,  0x66, 0x00, 0x00, 0x00 },  /* ḟ to f */
1675   { 0x1E40,  0x4D, 0x00, 0x00, 0x00 },  /* Ṁ to M */
1676   { 0x1E41,  0x6D, 0x00, 0x00, 0x00 },  /* ṁ to m */
1677   { 0x1E56,  0x50, 0x00, 0x00, 0x00 },  /* Ṗ to P */
1678   { 0x1E57,  0x70, 0x00, 0x00, 0x00 },  /* ṗ to p */
1679   { 0x1E60,  0x53, 0x00, 0x00, 0x00 },  /* Ṡ to S */
1680   { 0x1E61,  0x73, 0x00, 0x00, 0x00 },  /* ṡ to s */
1681   { 0x1E6A,  0x54, 0x00, 0x00, 0x00 },  /* Ṫ to T */
1682   { 0x1E6B,  0x74, 0x00, 0x00, 0x00 },  /* ṫ to t */
1683   { 0x1E80,  0x57, 0x00, 0x00, 0x00 },  /* Ẁ to W */
1684   { 0x1E81,  0x77, 0x00, 0x00, 0x00 },  /* ẁ to w */
1685   { 0x1E82,  0x57, 0x00, 0x00, 0x00 },  /* Ẃ to W */
1686   { 0x1E83,  0x77, 0x00, 0x00, 0x00 },  /* ẃ to w */
1687   { 0x1E84,  0x57, 0x00, 0x00, 0x00 },  /* Ẅ to W */
1688   { 0x1E85,  0x77, 0x00, 0x00, 0x00 },  /* ẅ to w */
1689   { 0x1EF2,  0x59, 0x00, 0x00, 0x00 },  /* Ỳ to Y */
1690   { 0x1EF3,  0x79, 0x00, 0x00, 0x00 },  /* ỳ to y */
1691   { 0xFB00,  0x66, 0x66, 0x00, 0x00 },  /* ff to ff */
1692   { 0xFB01,  0x66, 0x69, 0x00, 0x00 },  /* fi to fi */
1693   { 0xFB02,  0x66, 0x6C, 0x00, 0x00 },  /* fl to fl */
1694   { 0xFB05,  0x73, 0x74, 0x00, 0x00 },  /* ſt to st */
1695   { 0xFB06,  0x73, 0x74, 0x00, 0x00 },  /* st to st */
1696 };
1697 
1698 static const Transliteration *spellfixFindTranslit(int c, int *pxTop){
1699   *pxTop = (sizeof(translit)/sizeof(translit[0])) - 1;
1700   return translit;
1701 }
1702 
1703 /*
1704 ** Convert the input string from UTF-8 into pure ASCII by converting
1705 ** all non-ASCII characters to some combination of characters in the
1706 ** ASCII subset.
1707 **
1708 ** The returned string might contain more characters than the input.
1709 **
1710 ** Space to hold the returned string comes from sqlite3_malloc() and
1711 ** should be freed by the caller.
1712 */
1713 static unsigned char *transliterate(const unsigned char *zIn, int nIn){
1714 #ifdef SQLITE_SPELLFIX_5BYTE_MAPPINGS
1715   unsigned char *zOut = sqlite3_malloc64( nIn*5 + 1 );
1716 #else
1717   unsigned char *zOut = sqlite3_malloc64( nIn*4 + 1 );
1718 #endif
1719   int c, sz, nOut;
1720   if( zOut==0 ) return 0;
1721   nOut = 0;
1722   while( nIn>0 ){
1723     c = utf8Read(zIn, nIn, &sz);
1724     zIn += sz;
1725     nIn -= sz;
1726     if( c<=127 ){
1727       zOut[nOut++] = (unsigned char)c;
1728     }else{
1729       int xTop, xBtm, x;
1730       const Transliteration *tbl = spellfixFindTranslit(c, &xTop);
1731       xBtm = 0;
1732       while( xTop>=xBtm ){
1733         x = (xTop + xBtm)/2;
1734         if( tbl[x].cFrom==c ){
1735           zOut[nOut++] = tbl[x].cTo0;
1736           if( tbl[x].cTo1 ){
1737             zOut[nOut++] = tbl[x].cTo1;
1738             if( tbl[x].cTo2 ){
1739               zOut[nOut++] = tbl[x].cTo2;
1740               if( tbl[x].cTo3 ){
1741                 zOut[nOut++] = tbl[x].cTo3;
1742 #ifdef SQLITE_SPELLFIX_5BYTE_MAPPINGS
1743                 if( tbl[x].cTo4 ){
1744                   zOut[nOut++] = tbl[x].cTo4;
1745                 }
1746 #endif /* SQLITE_SPELLFIX_5BYTE_MAPPINGS */
1747               }
1748             }
1749           }
1750           c = 0;
1751           break;
1752         }else if( tbl[x].cFrom>c ){
1753           xTop = x-1;
1754         }else{
1755           xBtm = x+1;
1756         }
1757       }
1758       if( c ) zOut[nOut++] = '?';
1759     }
1760   }
1761   zOut[nOut] = 0;
1762   return zOut;
1763 }
1764 
1765 /*
1766 ** Return the number of characters in the shortest prefix of the input
1767 ** string that transliterates to an ASCII string nTrans bytes or longer.
1768 ** Or, if the transliteration of the input string is less than nTrans
1769 ** bytes in size, return the number of characters in the input string.
1770 */
1771 static int translen_to_charlen(const char *zIn, int nIn, int nTrans){
1772   int i, c, sz, nOut;
1773   int nChar;
1774 
1775   i = nOut = 0;
1776   for(nChar=0; i<nIn && nOut<nTrans; nChar++){
1777     c = utf8Read((const unsigned char *)&zIn[i], nIn-i, &sz);
1778     i += sz;
1779 
1780     nOut++;
1781     if( c>=128 ){
1782       int xTop, xBtm, x;
1783       const Transliteration *tbl = spellfixFindTranslit(c, &xTop);
1784       xBtm = 0;
1785       while( xTop>=xBtm ){
1786         x = (xTop + xBtm)/2;
1787         if( tbl[x].cFrom==c ){
1788           if( tbl[x].cTo1 ){
1789             nOut++;
1790             if( tbl[x].cTo2 ){
1791               nOut++;
1792               if( tbl[x].cTo3 ){
1793                 nOut++;
1794               }
1795             }
1796           }
1797           break;
1798         }else if( tbl[x].cFrom>c ){
1799           xTop = x-1;
1800         }else{
1801           xBtm = x+1;
1802         }
1803       }
1804     }
1805   }
1806 
1807   return nChar;
1808 }
1809 
1810 
1811 /*
1812 **    spellfix1_translit(X)
1813 **
1814 ** Convert a string that contains non-ASCII Roman characters into
1815 ** pure ASCII.
1816 */
1817 static void transliterateSqlFunc(
1818   sqlite3_context *context,
1819   int argc,
1820   sqlite3_value **argv
1821 ){
1822   const unsigned char *zIn = sqlite3_value_text(argv[0]);
1823   int nIn = sqlite3_value_bytes(argv[0]);
1824   unsigned char *zOut = transliterate(zIn, nIn);
1825   if( zOut==0 ){
1826     sqlite3_result_error_nomem(context);
1827   }else{
1828     sqlite3_result_text(context, (char*)zOut, -1, sqlite3_free);
1829   }
1830 }
1831 
1832 /*
1833 **    spellfix1_scriptcode(X)
1834 **
1835 ** Try to determine the dominant script used by the word X and return
1836 ** its ISO 15924 numeric code.
1837 **
1838 ** The current implementation only understands the following scripts:
1839 **
1840 **    215  (Latin)
1841 **    220  (Cyrillic)
1842 **    200  (Greek)
1843 **
1844 ** This routine will return 998 if the input X contains characters from
1845 ** two or more of the above scripts or 999 if X contains no characters
1846 ** from any of the above scripts.
1847 */
1848 static void scriptCodeSqlFunc(
1849   sqlite3_context *context,
1850   int argc,
1851   sqlite3_value **argv
1852 ){
1853   const unsigned char *zIn = sqlite3_value_text(argv[0]);
1854   int nIn = sqlite3_value_bytes(argv[0]);
1855   int c, sz;
1856   int scriptMask = 0;
1857   int res;
1858   int seenDigit = 0;
1859 # define SCRIPT_LATIN       0x0001
1860 # define SCRIPT_CYRILLIC    0x0002
1861 # define SCRIPT_GREEK       0x0004
1862 # define SCRIPT_HEBREW      0x0008
1863 # define SCRIPT_ARABIC      0x0010
1864 
1865   while( nIn>0 ){
1866     c = utf8Read(zIn, nIn, &sz);
1867     zIn += sz;
1868     nIn -= sz;
1869     if( c<0x02af ){
1870       if( c>=0x80 || midClass[c&0x7f]<CCLASS_DIGIT ){
1871         scriptMask |= SCRIPT_LATIN;
1872       }else if( c>='0' && c<='9' ){
1873         seenDigit = 1;
1874       }
1875     }else if( c>=0x0400 && c<=0x04ff ){
1876       scriptMask |= SCRIPT_CYRILLIC;
1877     }else if( c>=0x0386 && c<=0x03ce ){
1878       scriptMask |= SCRIPT_GREEK;
1879     }else if( c>=0x0590 && c<=0x05ff ){
1880       scriptMask |= SCRIPT_HEBREW;
1881     }else if( c>=0x0600 && c<=0x06ff ){
1882       scriptMask |= SCRIPT_ARABIC;
1883     }
1884   }
1885   if( scriptMask==0 && seenDigit ) scriptMask = SCRIPT_LATIN;
1886   switch( scriptMask ){
1887     case 0:                res = 999; break;
1888     case SCRIPT_LATIN:     res = 215; break;
1889     case SCRIPT_CYRILLIC:  res = 220; break;
1890     case SCRIPT_GREEK:     res = 200; break;
1891     case SCRIPT_HEBREW:    res = 125; break;
1892     case SCRIPT_ARABIC:    res = 160; break;
1893     default:               res = 998; break;
1894   }
1895   sqlite3_result_int(context, res);
1896 }
1897 
1898 /* End transliterate
1899 ******************************************************************************
1900 ******************************************************************************
1901 ** Begin spellfix1 virtual table.
1902 */
1903 
1904 /* Maximum length of a phonehash used for querying the shadow table */
1905 #define SPELLFIX_MX_HASH  32
1906 
1907 /* Maximum number of hash strings to examine per query */
1908 #define SPELLFIX_MX_RUN   1
1909 
1910 typedef struct spellfix1_vtab spellfix1_vtab;
1911 typedef struct spellfix1_cursor spellfix1_cursor;
1912 
1913 /* Fuzzy-search virtual table object */
1914 struct spellfix1_vtab {
1915   sqlite3_vtab base;         /* Base class - must be first */
1916   sqlite3 *db;               /* Database connection */
1917   char *zDbName;             /* Name of database holding this table */
1918   char *zTableName;          /* Name of the virtual table */
1919   char *zCostTable;          /* Table holding edit-distance cost numbers */
1920   EditDist3Config *pConfig3; /* Parsed edit distance costs */
1921 };
1922 
1923 /* Fuzzy-search cursor object */
1924 struct spellfix1_cursor {
1925   sqlite3_vtab_cursor base;    /* Base class - must be first */
1926   spellfix1_vtab *pVTab;       /* The table to which this cursor belongs */
1927   char *zPattern;              /* rhs of MATCH clause */
1928   int idxNum;                  /* idxNum value passed to xFilter() */
1929   int nRow;                    /* Number of rows of content */
1930   int nAlloc;                  /* Number of allocated rows */
1931   int iRow;                    /* Current row of content */
1932   int iLang;                   /* Value of the langid= constraint */
1933   int iTop;                    /* Value of the top= constraint */
1934   int iScope;                  /* Value of the scope= constraint */
1935   int nSearch;                 /* Number of vocabulary items checked */
1936   sqlite3_stmt *pFullScan;     /* Shadow query for a full table scan */
1937   struct spellfix1_row {       /* For each row of content */
1938     sqlite3_int64 iRowid;         /* Rowid for this row */
1939     char *zWord;                  /* Text for this row */
1940     int iRank;                    /* Rank for this row */
1941     int iDistance;                /* Distance from pattern for this row */
1942     int iScore;                   /* Score for sorting */
1943     int iMatchlen;                /* Value of matchlen column (or -1) */
1944     char zHash[SPELLFIX_MX_HASH]; /* the phonehash used for this match */
1945   } *a;
1946 };
1947 
1948 /*
1949 ** Construct one or more SQL statements from the format string given
1950 ** and then evaluate those statements. The success code is written
1951 ** into *pRc.
1952 **
1953 ** If *pRc is initially non-zero then this routine is a no-op.
1954 */
1955 static void spellfix1DbExec(
1956   int *pRc,              /* Success code */
1957   sqlite3 *db,           /* Database in which to run SQL */
1958   const char *zFormat,   /* Format string for SQL */
1959   ...                    /* Arguments to the format string */
1960 ){
1961   va_list ap;
1962   char *zSql;
1963   if( *pRc ) return;
1964   va_start(ap, zFormat);
1965   zSql = sqlite3_vmprintf(zFormat, ap);
1966   va_end(ap);
1967   if( zSql==0 ){
1968     *pRc = SQLITE_NOMEM;
1969   }else{
1970     *pRc = sqlite3_exec(db, zSql, 0, 0, 0);
1971     sqlite3_free(zSql);
1972   }
1973 }
1974 
1975 /*
1976 ** xDisconnect/xDestroy method for the fuzzy-search module.
1977 */
1978 static int spellfix1Uninit(int isDestroy, sqlite3_vtab *pVTab){
1979   spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
1980   int rc = SQLITE_OK;
1981   if( isDestroy ){
1982     sqlite3 *db = p->db;
1983     spellfix1DbExec(&rc, db, "DROP TABLE IF EXISTS \"%w\".\"%w_vocab\"",
1984                   p->zDbName, p->zTableName);
1985   }
1986   if( rc==SQLITE_OK ){
1987     sqlite3_free(p->zTableName);
1988     editDist3ConfigDelete(p->pConfig3);
1989     sqlite3_free(p->zCostTable);
1990     sqlite3_free(p);
1991   }
1992   return rc;
1993 }
1994 static int spellfix1Disconnect(sqlite3_vtab *pVTab){
1995   return spellfix1Uninit(0, pVTab);
1996 }
1997 static int spellfix1Destroy(sqlite3_vtab *pVTab){
1998   return spellfix1Uninit(1, pVTab);
1999 }
2000 
2001 /*
2002 ** Make a copy of a string.  Remove leading and trailing whitespace
2003 ** and dequote it.
2004 */
2005 static char *spellfix1Dequote(const char *zIn){
2006   char *zOut;
2007   int i, j;
2008   char c;
2009   while( isspace((unsigned char)zIn[0]) ) zIn++;
2010   zOut = sqlite3_mprintf("%s", zIn);
2011   if( zOut==0 ) return 0;
2012   i = (int)strlen(zOut);
2013 #if 0  /* The parser will never leave spaces at the end */
2014   while( i>0 && isspace(zOut[i-1]) ){ i--; }
2015 #endif
2016   zOut[i] = 0;
2017   c = zOut[0];
2018   if( c=='\'' || c=='"' ){
2019     for(i=1, j=0; ALWAYS(zOut[i]); i++){
2020       zOut[j++] = zOut[i];
2021       if( zOut[i]==c ){
2022         if( zOut[i+1]==c ){
2023           i++;
2024         }else{
2025           zOut[j-1] = 0;
2026           break;
2027         }
2028       }
2029     }
2030   }
2031   return zOut;
2032 }
2033 
2034 
2035 /*
2036 ** xConnect/xCreate method for the spellfix1 module. Arguments are:
2037 **
2038 **   argv[0]   -> module name  ("spellfix1")
2039 **   argv[1]   -> database name
2040 **   argv[2]   -> table name
2041 **   argv[3].. -> optional arguments (i.e. "edit_cost_table" parameter)
2042 */
2043 static int spellfix1Init(
2044   int isCreate,
2045   sqlite3 *db,
2046   void *pAux,
2047   int argc, const char *const*argv,
2048   sqlite3_vtab **ppVTab,
2049   char **pzErr
2050 ){
2051   spellfix1_vtab *pNew = 0;
2052   /* const char *zModule = argv[0]; // not used */
2053   const char *zDbName = argv[1];
2054   const char *zTableName = argv[2];
2055   int nDbName;
2056   int rc = SQLITE_OK;
2057   int i;
2058 
2059   nDbName = (int)strlen(zDbName);
2060   pNew = sqlite3_malloc64( sizeof(*pNew) + nDbName + 1);
2061   if( pNew==0 ){
2062     rc = SQLITE_NOMEM;
2063   }else{
2064     memset(pNew, 0, sizeof(*pNew));
2065     pNew->zDbName = (char*)&pNew[1];
2066     memcpy(pNew->zDbName, zDbName, nDbName+1);
2067     pNew->zTableName = sqlite3_mprintf("%s", zTableName);
2068     pNew->db = db;
2069     if( pNew->zTableName==0 ){
2070       rc = SQLITE_NOMEM;
2071     }else{
2072       rc = sqlite3_declare_vtab(db,
2073            "CREATE TABLE x(word,rank,distance,langid, "
2074            "score, matchlen, phonehash HIDDEN, "
2075            "top HIDDEN, scope HIDDEN, srchcnt HIDDEN, "
2076            "soundslike HIDDEN, command HIDDEN)"
2077       );
2078 #define SPELLFIX_COL_WORD            0
2079 #define SPELLFIX_COL_RANK            1
2080 #define SPELLFIX_COL_DISTANCE        2
2081 #define SPELLFIX_COL_LANGID          3
2082 #define SPELLFIX_COL_SCORE           4
2083 #define SPELLFIX_COL_MATCHLEN        5
2084 #define SPELLFIX_COL_PHONEHASH       6
2085 #define SPELLFIX_COL_TOP             7
2086 #define SPELLFIX_COL_SCOPE           8
2087 #define SPELLFIX_COL_SRCHCNT         9
2088 #define SPELLFIX_COL_SOUNDSLIKE     10
2089 #define SPELLFIX_COL_COMMAND        11
2090     }
2091     if( rc==SQLITE_OK && isCreate ){
2092       spellfix1DbExec(&rc, db,
2093          "CREATE TABLE IF NOT EXISTS \"%w\".\"%w_vocab\"(\n"
2094          "  id INTEGER PRIMARY KEY,\n"
2095          "  rank INT,\n"
2096          "  langid INT,\n"
2097          "  word TEXT,\n"
2098          "  k1 TEXT,\n"
2099          "  k2 TEXT\n"
2100          ");\n",
2101          zDbName, zTableName
2102       );
2103       spellfix1DbExec(&rc, db,
2104          "CREATE INDEX IF NOT EXISTS \"%w\".\"%w_vocab_index_langid_k2\" "
2105             "ON \"%w_vocab\"(langid,k2);",
2106          zDbName, zTableName, zTableName
2107       );
2108     }
2109     for(i=3; rc==SQLITE_OK && i<argc; i++){
2110       if( strncmp(argv[i],"edit_cost_table=",16)==0 && pNew->zCostTable==0 ){
2111         pNew->zCostTable = spellfix1Dequote(&argv[i][16]);
2112         if( pNew->zCostTable==0 ) rc = SQLITE_NOMEM;
2113         continue;
2114       }
2115       *pzErr = sqlite3_mprintf("bad argument to spellfix1(): \"%s\"", argv[i]);
2116       rc = SQLITE_ERROR;
2117     }
2118   }
2119 
2120   if( rc && pNew ){
2121     *ppVTab = 0;
2122     spellfix1Uninit(0, &pNew->base);
2123   }else{
2124     *ppVTab = (sqlite3_vtab *)pNew;
2125   }
2126   return rc;
2127 }
2128 
2129 /*
2130 ** The xConnect and xCreate methods
2131 */
2132 static int spellfix1Connect(
2133   sqlite3 *db,
2134   void *pAux,
2135   int argc, const char *const*argv,
2136   sqlite3_vtab **ppVTab,
2137   char **pzErr
2138 ){
2139   return spellfix1Init(0, db, pAux, argc, argv, ppVTab, pzErr);
2140 }
2141 static int spellfix1Create(
2142   sqlite3 *db,
2143   void *pAux,
2144   int argc, const char *const*argv,
2145   sqlite3_vtab **ppVTab,
2146   char **pzErr
2147 ){
2148   return spellfix1Init(1, db, pAux, argc, argv, ppVTab, pzErr);
2149 }
2150 
2151 /*
2152 ** Clear all of the content from a cursor.
2153 */
2154 static void spellfix1ResetCursor(spellfix1_cursor *pCur){
2155   int i;
2156   for(i=0; i<pCur->nRow; i++){
2157     sqlite3_free(pCur->a[i].zWord);
2158   }
2159   pCur->nRow = 0;
2160   pCur->iRow = 0;
2161   pCur->nSearch = 0;
2162   if( pCur->pFullScan ){
2163     sqlite3_finalize(pCur->pFullScan);
2164     pCur->pFullScan = 0;
2165   }
2166 }
2167 
2168 /*
2169 ** Resize the cursor to hold up to N rows of content
2170 */
2171 static void spellfix1ResizeCursor(spellfix1_cursor *pCur, int N){
2172   struct spellfix1_row *aNew;
2173   assert( N>=pCur->nRow );
2174   aNew = sqlite3_realloc64(pCur->a, sizeof(pCur->a[0])*N);
2175   if( aNew==0 && N>0 ){
2176     spellfix1ResetCursor(pCur);
2177     sqlite3_free(pCur->a);
2178     pCur->nAlloc = 0;
2179     pCur->a = 0;
2180   }else{
2181     pCur->nAlloc = N;
2182     pCur->a = aNew;
2183   }
2184 }
2185 
2186 
2187 /*
2188 ** Close a fuzzy-search cursor.
2189 */
2190 static int spellfix1Close(sqlite3_vtab_cursor *cur){
2191   spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2192   spellfix1ResetCursor(pCur);
2193   spellfix1ResizeCursor(pCur, 0);
2194   sqlite3_free(pCur->zPattern);
2195   sqlite3_free(pCur);
2196   return SQLITE_OK;
2197 }
2198 
2199 #define SPELLFIX_IDXNUM_MATCH  0x01         /* word MATCH $str */
2200 #define SPELLFIX_IDXNUM_LANGID 0x02         /* langid == $langid */
2201 #define SPELLFIX_IDXNUM_TOP    0x04         /* top = $top */
2202 #define SPELLFIX_IDXNUM_SCOPE  0x08         /* scope = $scope */
2203 #define SPELLFIX_IDXNUM_DISTLT 0x10         /* distance < $distance */
2204 #define SPELLFIX_IDXNUM_DISTLE 0x20         /* distance <= $distance */
2205 #define SPELLFIX_IDXNUM_ROWID  0x40         /* rowid = $rowid */
2206 #define SPELLFIX_IDXNUM_DIST   (0x10|0x20)  /* DISTLT and DISTLE */
2207 
2208 /*
2209 **
2210 ** The plan number is a bitmask of the SPELLFIX_IDXNUM_* values defined
2211 ** above.
2212 **
2213 ** filter.argv[*] values contains $str, $langid, $top, $scope and $rowid
2214 ** if specified and in that order.
2215 */
2216 static int spellfix1BestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){
2217   int iPlan = 0;
2218   int iLangTerm = -1;
2219   int iTopTerm = -1;
2220   int iScopeTerm = -1;
2221   int iDistTerm = -1;
2222   int iRowidTerm = -1;
2223   int i;
2224   const struct sqlite3_index_constraint *pConstraint;
2225   pConstraint = pIdxInfo->aConstraint;
2226   for(i=0; i<pIdxInfo->nConstraint; i++, pConstraint++){
2227     if( pConstraint->usable==0 ) continue;
2228 
2229     /* Terms of the form:  word MATCH $str */
2230     if( (iPlan & SPELLFIX_IDXNUM_MATCH)==0
2231      && pConstraint->iColumn==SPELLFIX_COL_WORD
2232      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH
2233     ){
2234       iPlan |= SPELLFIX_IDXNUM_MATCH;
2235       pIdxInfo->aConstraintUsage[i].argvIndex = 1;
2236       pIdxInfo->aConstraintUsage[i].omit = 1;
2237     }
2238 
2239     /* Terms of the form:  langid = $langid  */
2240     if( (iPlan & SPELLFIX_IDXNUM_LANGID)==0
2241      && pConstraint->iColumn==SPELLFIX_COL_LANGID
2242      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2243     ){
2244       iPlan |= SPELLFIX_IDXNUM_LANGID;
2245       iLangTerm = i;
2246     }
2247 
2248     /* Terms of the form:  top = $top */
2249     if( (iPlan & SPELLFIX_IDXNUM_TOP)==0
2250      && pConstraint->iColumn==SPELLFIX_COL_TOP
2251      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2252     ){
2253       iPlan |= SPELLFIX_IDXNUM_TOP;
2254       iTopTerm = i;
2255     }
2256 
2257     /* Terms of the form:  scope = $scope */
2258     if( (iPlan & SPELLFIX_IDXNUM_SCOPE)==0
2259      && pConstraint->iColumn==SPELLFIX_COL_SCOPE
2260      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2261     ){
2262       iPlan |= SPELLFIX_IDXNUM_SCOPE;
2263       iScopeTerm = i;
2264     }
2265 
2266     /* Terms of the form:  distance < $dist or distance <= $dist */
2267     if( (iPlan & SPELLFIX_IDXNUM_DIST)==0
2268      && pConstraint->iColumn==SPELLFIX_COL_DISTANCE
2269      && (pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT
2270           || pConstraint->op==SQLITE_INDEX_CONSTRAINT_LE)
2271     ){
2272       if( pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT ){
2273         iPlan |= SPELLFIX_IDXNUM_DISTLT;
2274       }else{
2275         iPlan |= SPELLFIX_IDXNUM_DISTLE;
2276       }
2277       iDistTerm = i;
2278     }
2279 
2280     /* Terms of the form:  distance < $dist or distance <= $dist */
2281     if( (iPlan & SPELLFIX_IDXNUM_ROWID)==0
2282      && pConstraint->iColumn<0
2283      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2284     ){
2285       iPlan |= SPELLFIX_IDXNUM_ROWID;
2286       iRowidTerm = i;
2287     }
2288   }
2289   if( iPlan&SPELLFIX_IDXNUM_MATCH ){
2290     int idx = 2;
2291     pIdxInfo->idxNum = iPlan;
2292     if( pIdxInfo->nOrderBy==1
2293      && pIdxInfo->aOrderBy[0].iColumn==SPELLFIX_COL_SCORE
2294      && pIdxInfo->aOrderBy[0].desc==0
2295     ){
2296       pIdxInfo->orderByConsumed = 1;  /* Default order by iScore */
2297     }
2298     if( iPlan&SPELLFIX_IDXNUM_LANGID ){
2299       pIdxInfo->aConstraintUsage[iLangTerm].argvIndex = idx++;
2300       pIdxInfo->aConstraintUsage[iLangTerm].omit = 1;
2301     }
2302     if( iPlan&SPELLFIX_IDXNUM_TOP ){
2303       pIdxInfo->aConstraintUsage[iTopTerm].argvIndex = idx++;
2304       pIdxInfo->aConstraintUsage[iTopTerm].omit = 1;
2305     }
2306     if( iPlan&SPELLFIX_IDXNUM_SCOPE ){
2307       pIdxInfo->aConstraintUsage[iScopeTerm].argvIndex = idx++;
2308       pIdxInfo->aConstraintUsage[iScopeTerm].omit = 1;
2309     }
2310     if( iPlan&SPELLFIX_IDXNUM_DIST ){
2311       pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = idx++;
2312       pIdxInfo->aConstraintUsage[iDistTerm].omit = 1;
2313     }
2314     pIdxInfo->estimatedCost = 1e5;
2315   }else if( (iPlan & SPELLFIX_IDXNUM_ROWID) ){
2316     pIdxInfo->idxNum = SPELLFIX_IDXNUM_ROWID;
2317     pIdxInfo->aConstraintUsage[iRowidTerm].argvIndex = 1;
2318     pIdxInfo->aConstraintUsage[iRowidTerm].omit = 1;
2319     pIdxInfo->estimatedCost = 5;
2320   }else{
2321     pIdxInfo->idxNum = 0;
2322     pIdxInfo->estimatedCost = 1e50;
2323   }
2324   return SQLITE_OK;
2325 }
2326 
2327 /*
2328 ** Open a new fuzzy-search cursor.
2329 */
2330 static int spellfix1Open(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
2331   spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
2332   spellfix1_cursor *pCur;
2333   pCur = sqlite3_malloc64( sizeof(*pCur) );
2334   if( pCur==0 ) return SQLITE_NOMEM;
2335   memset(pCur, 0, sizeof(*pCur));
2336   pCur->pVTab = p;
2337   *ppCursor = &pCur->base;
2338   return SQLITE_OK;
2339 }
2340 
2341 /*
2342 ** Adjust a distance measurement by the words rank in order to show
2343 ** preference to common words.
2344 */
2345 static int spellfix1Score(int iDistance, int iRank){
2346   int iLog2;
2347   for(iLog2=0; iRank>0; iLog2++, iRank>>=1){}
2348   return iDistance + 32 - iLog2;
2349 }
2350 
2351 /*
2352 ** Compare two spellfix1_row objects for sorting purposes in qsort() such
2353 ** that they sort in order of increasing distance.
2354 */
2355 static int SQLITE_CDECL spellfix1RowCompare(const void *A, const void *B){
2356   const struct spellfix1_row *a = (const struct spellfix1_row*)A;
2357   const struct spellfix1_row *b = (const struct spellfix1_row*)B;
2358   return a->iScore - b->iScore;
2359 }
2360 
2361 /*
2362 ** A structure used to pass information from spellfix1FilterForMatch()
2363 ** into spellfix1RunQuery().
2364 */
2365 typedef struct MatchQuery {
2366   spellfix1_cursor *pCur;          /* The cursor being queried */
2367   sqlite3_stmt *pStmt;             /* shadow table query statment */
2368   char zHash[SPELLFIX_MX_HASH];    /* The current phonehash for zPattern */
2369   const char *zPattern;            /* Transliterated input string */
2370   int nPattern;                    /* Length of zPattern */
2371   EditDist3FromString *pMatchStr3; /* Original unicode string */
2372   EditDist3Config *pConfig3;       /* Edit-distance cost coefficients */
2373   const EditDist3Lang *pLang;      /* The selected language coefficients */
2374   int iLang;                       /* The language id */
2375   int iScope;                      /* Default scope */
2376   int iMaxDist;                    /* Maximum allowed edit distance, or -1 */
2377   int rc;                          /* Error code */
2378   int nRun;                  /* Number of prior runs for the same zPattern */
2379   char azPrior[SPELLFIX_MX_RUN][SPELLFIX_MX_HASH];  /* Prior hashes */
2380 } MatchQuery;
2381 
2382 /*
2383 ** Run a query looking for the best matches against zPattern using
2384 ** zHash as the character class seed hash.
2385 */
2386 static void spellfix1RunQuery(MatchQuery *p, const char *zQuery, int nQuery){
2387   const char *zK1;
2388   const char *zWord;
2389   int iDist;
2390   int iRank;
2391   int iScore;
2392   int iWorst = 0;
2393   int idx;
2394   int idxWorst = -1;
2395   int i;
2396   int iScope = p->iScope;
2397   spellfix1_cursor *pCur = p->pCur;
2398   sqlite3_stmt *pStmt = p->pStmt;
2399   char zHash1[SPELLFIX_MX_HASH];
2400   char zHash2[SPELLFIX_MX_HASH];
2401   char *zClass;
2402   int nClass;
2403   int rc;
2404 
2405   if( pCur->a==0 || p->rc ) return;   /* Prior memory allocation failure */
2406   zClass = (char*)phoneticHash((unsigned char*)zQuery, nQuery);
2407   if( zClass==0 ){
2408     p->rc = SQLITE_NOMEM;
2409     return;
2410   }
2411   nClass = (int)strlen(zClass);
2412   if( nClass>SPELLFIX_MX_HASH-2 ){
2413     nClass = SPELLFIX_MX_HASH-2;
2414     zClass[nClass] = 0;
2415   }
2416   if( nClass<=iScope ){
2417     if( nClass>2 ){
2418       iScope = nClass-1;
2419     }else{
2420       iScope = nClass;
2421     }
2422   }
2423   memcpy(zHash1, zClass, iScope);
2424   sqlite3_free(zClass);
2425   zHash1[iScope] = 0;
2426   memcpy(zHash2, zHash1, iScope);
2427   zHash2[iScope] = 'Z';
2428   zHash2[iScope+1] = 0;
2429 #if SPELLFIX_MX_RUN>1
2430   for(i=0; i<p->nRun; i++){
2431     if( strcmp(p->azPrior[i], zHash1)==0 ) return;
2432   }
2433 #endif
2434   assert( p->nRun<SPELLFIX_MX_RUN );
2435   memcpy(p->azPrior[p->nRun++], zHash1, iScope+1);
2436   if( sqlite3_bind_text(pStmt, 1, zHash1, -1, SQLITE_STATIC)==SQLITE_NOMEM
2437    || sqlite3_bind_text(pStmt, 2, zHash2, -1, SQLITE_STATIC)==SQLITE_NOMEM
2438   ){
2439     p->rc = SQLITE_NOMEM;
2440     return;
2441   }
2442 #if SPELLFIX_MX_RUN>1
2443   for(i=0; i<pCur->nRow; i++){
2444     if( pCur->a[i].iScore>iWorst ){
2445       iWorst = pCur->a[i].iScore;
2446       idxWorst = i;
2447     }
2448   }
2449 #endif
2450   while( sqlite3_step(pStmt)==SQLITE_ROW ){
2451     int iMatchlen = -1;
2452     iRank = sqlite3_column_int(pStmt, 2);
2453     if( p->pMatchStr3 ){
2454       int nWord = sqlite3_column_bytes(pStmt, 1);
2455       zWord = (const char*)sqlite3_column_text(pStmt, 1);
2456       iDist = editDist3Core(p->pMatchStr3, zWord, nWord, p->pLang, &iMatchlen);
2457     }else{
2458       zK1 = (const char*)sqlite3_column_text(pStmt, 3);
2459       if( zK1==0 ) continue;
2460       iDist = editdist1(p->zPattern, zK1, 0);
2461     }
2462     if( iDist<0 ){
2463       p->rc = SQLITE_NOMEM;
2464       break;
2465     }
2466     pCur->nSearch++;
2467 
2468     /* If there is a "distance < $dist" or "distance <= $dist" constraint,
2469     ** check if this row meets it. If not, jump back up to the top of the
2470     ** loop to process the next row. Otherwise, if the row does match the
2471     ** distance constraint, check if the pCur->a[] array is already full.
2472     ** If it is and no explicit "top = ?" constraint was present in the
2473     ** query, grow the array to ensure there is room for the new entry. */
2474     assert( (p->iMaxDist>=0)==((pCur->idxNum & SPELLFIX_IDXNUM_DIST) ? 1 : 0) );
2475     if( p->iMaxDist>=0 ){
2476       if( iDist>p->iMaxDist ) continue;
2477       if( pCur->nRow>=pCur->nAlloc && (pCur->idxNum & SPELLFIX_IDXNUM_TOP)==0 ){
2478         spellfix1ResizeCursor(pCur, pCur->nAlloc*2 + 10);
2479         if( pCur->a==0 ) break;
2480       }
2481     }
2482 
2483     iScore = spellfix1Score(iDist,iRank);
2484     if( pCur->nRow<pCur->nAlloc ){
2485       idx = pCur->nRow;
2486     }else if( iScore<iWorst ){
2487       idx = idxWorst;
2488       sqlite3_free(pCur->a[idx].zWord);
2489     }else{
2490       continue;
2491     }
2492 
2493     pCur->a[idx].zWord = sqlite3_mprintf("%s", sqlite3_column_text(pStmt, 1));
2494     if( pCur->a[idx].zWord==0 ){
2495       p->rc = SQLITE_NOMEM;
2496       break;
2497     }
2498     pCur->a[idx].iRowid = sqlite3_column_int64(pStmt, 0);
2499     pCur->a[idx].iRank = iRank;
2500     pCur->a[idx].iDistance = iDist;
2501     pCur->a[idx].iScore = iScore;
2502     pCur->a[idx].iMatchlen = iMatchlen;
2503     memcpy(pCur->a[idx].zHash, zHash1, iScope+1);
2504     if( pCur->nRow<pCur->nAlloc ) pCur->nRow++;
2505     if( pCur->nRow==pCur->nAlloc ){
2506       iWorst = pCur->a[0].iScore;
2507       idxWorst = 0;
2508       for(i=1; i<pCur->nRow; i++){
2509         iScore = pCur->a[i].iScore;
2510         if( iWorst<iScore ){
2511           iWorst = iScore;
2512           idxWorst = i;
2513         }
2514       }
2515     }
2516   }
2517   rc = sqlite3_reset(pStmt);
2518   if( rc ) p->rc = rc;
2519 }
2520 
2521 /*
2522 ** This version of the xFilter method work if the MATCH term is present
2523 ** and we are doing a scan.
2524 */
2525 static int spellfix1FilterForMatch(
2526   spellfix1_cursor *pCur,
2527   int argc,
2528   sqlite3_value **argv
2529 ){
2530   int idxNum = pCur->idxNum;
2531   const unsigned char *zMatchThis;   /* RHS of the MATCH operator */
2532   EditDist3FromString *pMatchStr3 = 0; /* zMatchThis as an editdist string */
2533   char *zPattern;                    /* Transliteration of zMatchThis */
2534   int nPattern;                      /* Length of zPattern */
2535   int iLimit = 20;                   /* Max number of rows of output */
2536   int iScope = 3;                    /* Use this many characters of zClass */
2537   int iLang = 0;                     /* Language code */
2538   char *zSql;                        /* SQL of shadow table query */
2539   sqlite3_stmt *pStmt = 0;           /* Shadow table query */
2540   int rc;                            /* Result code */
2541   int idx = 1;                       /* Next available filter parameter */
2542   spellfix1_vtab *p = pCur->pVTab;   /* The virtual table that owns pCur */
2543   MatchQuery x;                      /* For passing info to RunQuery() */
2544 
2545   /* Load the cost table if we have not already done so */
2546   if( p->zCostTable!=0 && p->pConfig3==0 ){
2547     p->pConfig3 = sqlite3_malloc64( sizeof(p->pConfig3[0]) );
2548     if( p->pConfig3==0 ) return SQLITE_NOMEM;
2549     memset(p->pConfig3, 0, sizeof(p->pConfig3[0]));
2550     rc = editDist3ConfigLoad(p->pConfig3, p->db, p->zCostTable);
2551     if( rc ) return rc;
2552   }
2553   memset(&x, 0, sizeof(x));
2554   x.iScope = 3;  /* Default scope if none specified by "WHERE scope=N" */
2555   x.iMaxDist = -1;   /* Maximum allowed edit distance */
2556 
2557   if( idxNum&2 ){
2558     iLang = sqlite3_value_int(argv[idx++]);
2559   }
2560   if( idxNum&4 ){
2561     iLimit = sqlite3_value_int(argv[idx++]);
2562     if( iLimit<1 ) iLimit = 1;
2563   }
2564   if( idxNum&8 ){
2565     x.iScope = sqlite3_value_int(argv[idx++]);
2566     if( x.iScope<1 ) x.iScope = 1;
2567     if( x.iScope>SPELLFIX_MX_HASH-2 ) x.iScope = SPELLFIX_MX_HASH-2;
2568   }
2569   if( idxNum&(16|32) ){
2570     x.iMaxDist = sqlite3_value_int(argv[idx++]);
2571     if( idxNum&16 ) x.iMaxDist--;
2572     if( x.iMaxDist<0 ) x.iMaxDist = 0;
2573   }
2574   spellfix1ResetCursor(pCur);
2575   spellfix1ResizeCursor(pCur, iLimit);
2576   zMatchThis = sqlite3_value_text(argv[0]);
2577   if( zMatchThis==0 ) return SQLITE_OK;
2578   if( p->pConfig3 ){
2579     x.pLang = editDist3FindLang(p->pConfig3, iLang);
2580     pMatchStr3 = editDist3FromStringNew(x.pLang, (const char*)zMatchThis, -1);
2581     if( pMatchStr3==0 ){
2582       x.rc = SQLITE_NOMEM;
2583       goto filter_exit;
2584     }
2585   }else{
2586     x.pLang = 0;
2587   }
2588   zPattern = (char*)transliterate(zMatchThis, sqlite3_value_bytes(argv[0]));
2589   sqlite3_free(pCur->zPattern);
2590   pCur->zPattern = zPattern;
2591   if( zPattern==0 ){
2592     x.rc = SQLITE_NOMEM;
2593     goto filter_exit;
2594   }
2595   nPattern = (int)strlen(zPattern);
2596   if( zPattern[nPattern-1]=='*' ) nPattern--;
2597   zSql = sqlite3_mprintf(
2598      "SELECT id, word, rank, coalesce(k1,word)"
2599      "  FROM \"%w\".\"%w_vocab\""
2600      " WHERE langid=%d AND k2>=?1 AND k2<?2",
2601      p->zDbName, p->zTableName, iLang
2602   );
2603   if( zSql==0 ){
2604     x.rc = SQLITE_NOMEM;
2605     pStmt = 0;
2606     goto filter_exit;
2607   }
2608   rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0);
2609   sqlite3_free(zSql);
2610   pCur->iLang = iLang;
2611   x.pCur = pCur;
2612   x.pStmt = pStmt;
2613   x.zPattern = zPattern;
2614   x.nPattern = nPattern;
2615   x.pMatchStr3 = pMatchStr3;
2616   x.iLang = iLang;
2617   x.rc = rc;
2618   x.pConfig3 = p->pConfig3;
2619   if( x.rc==SQLITE_OK ){
2620     spellfix1RunQuery(&x, zPattern, nPattern);
2621   }
2622 
2623   if( pCur->a ){
2624     qsort(pCur->a, pCur->nRow, sizeof(pCur->a[0]), spellfix1RowCompare);
2625     pCur->iTop = iLimit;
2626     pCur->iScope = iScope;
2627   }else{
2628     x.rc = SQLITE_NOMEM;
2629   }
2630 
2631 filter_exit:
2632   sqlite3_finalize(pStmt);
2633   editDist3FromStringDelete(pMatchStr3);
2634   return x.rc;
2635 }
2636 
2637 /*
2638 ** This version of xFilter handles a full-table scan case
2639 */
2640 static int spellfix1FilterForFullScan(
2641   spellfix1_cursor *pCur,
2642   int argc,
2643   sqlite3_value **argv
2644 ){
2645   int rc = SQLITE_OK;
2646   int idxNum = pCur->idxNum;
2647   char *zSql;
2648   spellfix1_vtab *pVTab = pCur->pVTab;
2649   spellfix1ResetCursor(pCur);
2650   assert( idxNum==0 || idxNum==64 );
2651   zSql = sqlite3_mprintf(
2652      "SELECT word, rank, NULL, langid, id FROM \"%w\".\"%w_vocab\"%s",
2653      pVTab->zDbName, pVTab->zTableName,
2654      ((idxNum & 64) ? " WHERE rowid=?" : "")
2655   );
2656   if( zSql==0 ) return SQLITE_NOMEM;
2657   rc = sqlite3_prepare_v2(pVTab->db, zSql, -1, &pCur->pFullScan, 0);
2658   sqlite3_free(zSql);
2659   if( rc==SQLITE_OK && (idxNum & 64) ){
2660     assert( argc==1 );
2661     rc = sqlite3_bind_value(pCur->pFullScan, 1, argv[0]);
2662   }
2663   pCur->nRow = pCur->iRow = 0;
2664   if( rc==SQLITE_OK ){
2665     rc = sqlite3_step(pCur->pFullScan);
2666     if( rc==SQLITE_ROW ){ pCur->iRow = -1; rc = SQLITE_OK; }
2667     if( rc==SQLITE_DONE ){ rc = SQLITE_OK; }
2668   }else{
2669     pCur->iRow = 0;
2670   }
2671   return rc;
2672 }
2673 
2674 
2675 /*
2676 ** Called to "rewind" a cursor back to the beginning so that
2677 ** it starts its output over again.  Always called at least once
2678 ** prior to any spellfix1Column, spellfix1Rowid, or spellfix1Eof call.
2679 */
2680 static int spellfix1Filter(
2681   sqlite3_vtab_cursor *cur,
2682   int idxNum, const char *idxStr,
2683   int argc, sqlite3_value **argv
2684 ){
2685   spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2686   int rc;
2687   pCur->idxNum = idxNum;
2688   if( idxNum & 1 ){
2689     rc = spellfix1FilterForMatch(pCur, argc, argv);
2690   }else{
2691     rc = spellfix1FilterForFullScan(pCur, argc, argv);
2692   }
2693   return rc;
2694 }
2695 
2696 
2697 /*
2698 ** Advance a cursor to its next row of output
2699 */
2700 static int spellfix1Next(sqlite3_vtab_cursor *cur){
2701   spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2702   int rc = SQLITE_OK;
2703   if( pCur->iRow < pCur->nRow ){
2704     if( pCur->pFullScan ){
2705       rc = sqlite3_step(pCur->pFullScan);
2706       if( rc!=SQLITE_ROW ) pCur->iRow = pCur->nRow;
2707       if( rc==SQLITE_ROW || rc==SQLITE_DONE ) rc = SQLITE_OK;
2708     }else{
2709       pCur->iRow++;
2710     }
2711   }
2712   return rc;
2713 }
2714 
2715 /*
2716 ** Return TRUE if we are at the end-of-file
2717 */
2718 static int spellfix1Eof(sqlite3_vtab_cursor *cur){
2719   spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2720   return pCur->iRow>=pCur->nRow;
2721 }
2722 
2723 /*
2724 ** Return columns from the current row.
2725 */
2726 static int spellfix1Column(
2727   sqlite3_vtab_cursor *cur,
2728   sqlite3_context *ctx,
2729   int i
2730 ){
2731   spellfix1_cursor *pCur = (spellfix1_cursor*)cur;
2732   if( pCur->pFullScan ){
2733     if( i<=SPELLFIX_COL_LANGID ){
2734       sqlite3_result_value(ctx, sqlite3_column_value(pCur->pFullScan, i));
2735     }else{
2736       sqlite3_result_null(ctx);
2737     }
2738     return SQLITE_OK;
2739   }
2740   switch( i ){
2741     case SPELLFIX_COL_WORD: {
2742       sqlite3_result_text(ctx, pCur->a[pCur->iRow].zWord, -1, SQLITE_STATIC);
2743       break;
2744     }
2745     case SPELLFIX_COL_RANK: {
2746       sqlite3_result_int(ctx, pCur->a[pCur->iRow].iRank);
2747       break;
2748     }
2749     case SPELLFIX_COL_DISTANCE: {
2750       sqlite3_result_int(ctx, pCur->a[pCur->iRow].iDistance);
2751       break;
2752     }
2753     case SPELLFIX_COL_LANGID: {
2754       sqlite3_result_int(ctx, pCur->iLang);
2755       break;
2756     }
2757     case SPELLFIX_COL_SCORE: {
2758       sqlite3_result_int(ctx, pCur->a[pCur->iRow].iScore);
2759       break;
2760     }
2761     case SPELLFIX_COL_MATCHLEN: {
2762       int iMatchlen = pCur->a[pCur->iRow].iMatchlen;
2763       if( iMatchlen<0 ){
2764         int nPattern = (int)strlen(pCur->zPattern);
2765         char *zWord = pCur->a[pCur->iRow].zWord;
2766         int nWord = (int)strlen(zWord);
2767 
2768         if( nPattern>0 && pCur->zPattern[nPattern-1]=='*' ){
2769           char *zTranslit;
2770           int res;
2771           zTranslit = (char *)transliterate((unsigned char *)zWord, nWord);
2772           if( !zTranslit ) return SQLITE_NOMEM;
2773           res = editdist1(pCur->zPattern, zTranslit, &iMatchlen);
2774           sqlite3_free(zTranslit);
2775           if( res<0 ) return SQLITE_NOMEM;
2776           iMatchlen = translen_to_charlen(zWord, nWord, iMatchlen);
2777         }else{
2778           iMatchlen = utf8Charlen(zWord, nWord);
2779         }
2780       }
2781 
2782       sqlite3_result_int(ctx, iMatchlen);
2783       break;
2784     }
2785     case SPELLFIX_COL_PHONEHASH: {
2786       sqlite3_result_text(ctx, pCur->a[pCur->iRow].zHash, -1, SQLITE_STATIC);
2787       break;
2788     }
2789     case SPELLFIX_COL_TOP: {
2790       sqlite3_result_int(ctx, pCur->iTop);
2791       break;
2792     }
2793     case SPELLFIX_COL_SCOPE: {
2794       sqlite3_result_int(ctx, pCur->iScope);
2795       break;
2796     }
2797     case SPELLFIX_COL_SRCHCNT: {
2798       sqlite3_result_int(ctx, pCur->nSearch);
2799       break;
2800     }
2801     default: {
2802       sqlite3_result_null(ctx);
2803       break;
2804     }
2805   }
2806   return SQLITE_OK;
2807 }
2808 
2809 /*
2810 ** The rowid.
2811 */
2812 static int spellfix1Rowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
2813   spellfix1_cursor *pCur = (spellfix1_cursor*)cur;
2814   if( pCur->pFullScan ){
2815     *pRowid = sqlite3_column_int64(pCur->pFullScan, 4);
2816   }else{
2817     *pRowid = pCur->a[pCur->iRow].iRowid;
2818   }
2819   return SQLITE_OK;
2820 }
2821 
2822 /*
2823 ** This function is called by the xUpdate() method. It returns a string
2824 ** containing the conflict mode that xUpdate() should use for the current
2825 ** operation. One of: "ROLLBACK", "IGNORE", "ABORT" or "REPLACE".
2826 */
2827 static const char *spellfix1GetConflict(sqlite3 *db){
2828   static const char *azConflict[] = {
2829     /* Note: Instead of "FAIL" - "ABORT". */
2830     "ROLLBACK", "IGNORE", "ABORT", "ABORT", "REPLACE"
2831   };
2832   int eConflict = sqlite3_vtab_on_conflict(db);
2833 
2834   assert( eConflict==SQLITE_ROLLBACK || eConflict==SQLITE_IGNORE
2835        || eConflict==SQLITE_FAIL || eConflict==SQLITE_ABORT
2836        || eConflict==SQLITE_REPLACE
2837   );
2838   assert( SQLITE_ROLLBACK==1 );
2839   assert( SQLITE_IGNORE==2 );
2840   assert( SQLITE_FAIL==3 );
2841   assert( SQLITE_ABORT==4 );
2842   assert( SQLITE_REPLACE==5 );
2843 
2844   return azConflict[eConflict-1];
2845 }
2846 
2847 /*
2848 ** The xUpdate() method.
2849 */
2850 static int spellfix1Update(
2851   sqlite3_vtab *pVTab,
2852   int argc,
2853   sqlite3_value **argv,
2854   sqlite_int64 *pRowid
2855 ){
2856   int rc = SQLITE_OK;
2857   sqlite3_int64 rowid, newRowid;
2858   spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
2859   sqlite3 *db = p->db;
2860 
2861   if( argc==1 ){
2862     /* A delete operation on the rowid given by argv[0] */
2863     rowid = *pRowid = sqlite3_value_int64(argv[0]);
2864     spellfix1DbExec(&rc, db, "DELETE FROM \"%w\".\"%w_vocab\" "
2865                            " WHERE id=%lld",
2866                   p->zDbName, p->zTableName, rowid);
2867   }else{
2868     const unsigned char *zWord = sqlite3_value_text(argv[SPELLFIX_COL_WORD+2]);
2869     int nWord = sqlite3_value_bytes(argv[SPELLFIX_COL_WORD+2]);
2870     int iLang = sqlite3_value_int(argv[SPELLFIX_COL_LANGID+2]);
2871     int iRank = sqlite3_value_int(argv[SPELLFIX_COL_RANK+2]);
2872     const unsigned char *zSoundslike =
2873            sqlite3_value_text(argv[SPELLFIX_COL_SOUNDSLIKE+2]);
2874     int nSoundslike = sqlite3_value_bytes(argv[SPELLFIX_COL_SOUNDSLIKE+2]);
2875     char *zK1, *zK2;
2876     int i;
2877     char c;
2878     const char *zConflict = spellfix1GetConflict(db);
2879 
2880     if( zWord==0 ){
2881       /* Inserts of the form:  INSERT INTO table(command) VALUES('xyzzy');
2882       ** cause zWord to be NULL, so we look at the "command" column to see
2883       ** what special actions to take */
2884       const char *zCmd =
2885          (const char*)sqlite3_value_text(argv[SPELLFIX_COL_COMMAND+2]);
2886       if( zCmd==0 ){
2887         pVTab->zErrMsg = sqlite3_mprintf("NOT NULL constraint failed: %s.word",
2888                                          p->zTableName);
2889         return SQLITE_CONSTRAINT_NOTNULL;
2890       }
2891       if( strcmp(zCmd,"reset")==0 ){
2892         /* Reset the  edit cost table (if there is one). */
2893         editDist3ConfigDelete(p->pConfig3);
2894         p->pConfig3 = 0;
2895         return SQLITE_OK;
2896       }
2897       if( strncmp(zCmd,"edit_cost_table=",16)==0 ){
2898         editDist3ConfigDelete(p->pConfig3);
2899         p->pConfig3 = 0;
2900         sqlite3_free(p->zCostTable);
2901         p->zCostTable = spellfix1Dequote(zCmd+16);
2902         if( p->zCostTable==0 ) return SQLITE_NOMEM;
2903         if( p->zCostTable[0]==0 || sqlite3_stricmp(p->zCostTable,"null")==0 ){
2904           sqlite3_free(p->zCostTable);
2905           p->zCostTable = 0;
2906         }
2907         return SQLITE_OK;
2908       }
2909       pVTab->zErrMsg = sqlite3_mprintf("unknown value for %s.command: \"%w\"",
2910                                        p->zTableName, zCmd);
2911       return SQLITE_ERROR;
2912     }
2913     if( iRank<1 ) iRank = 1;
2914     if( zSoundslike ){
2915       zK1 = (char*)transliterate(zSoundslike, nSoundslike);
2916     }else{
2917       zK1 = (char*)transliterate(zWord, nWord);
2918     }
2919     if( zK1==0 ) return SQLITE_NOMEM;
2920     for(i=0; (c = zK1[i])!=0; i++){
2921        if( c>='A' && c<='Z' ) zK1[i] += 'a' - 'A';
2922     }
2923     zK2 = (char*)phoneticHash((const unsigned char*)zK1, i);
2924     if( zK2==0 ){
2925       sqlite3_free(zK1);
2926       return SQLITE_NOMEM;
2927     }
2928     if( sqlite3_value_type(argv[0])==SQLITE_NULL ){
2929       if( sqlite3_value_type(argv[1])==SQLITE_NULL ){
2930         spellfix1DbExec(&rc, db,
2931                "INSERT INTO \"%w\".\"%w_vocab\"(rank,langid,word,k1,k2) "
2932                "VALUES(%d,%d,%Q,nullif(%Q,%Q),%Q)",
2933                p->zDbName, p->zTableName,
2934                iRank, iLang, zWord, zK1, zWord, zK2
2935         );
2936       }else{
2937         newRowid = sqlite3_value_int64(argv[1]);
2938         spellfix1DbExec(&rc, db,
2939             "INSERT OR %s INTO \"%w\".\"%w_vocab\"(id,rank,langid,word,k1,k2) "
2940             "VALUES(%lld,%d,%d,%Q,nullif(%Q,%Q),%Q)",
2941             zConflict, p->zDbName, p->zTableName,
2942             newRowid, iRank, iLang, zWord, zK1, zWord, zK2
2943         );
2944       }
2945       *pRowid = sqlite3_last_insert_rowid(db);
2946     }else{
2947       rowid = sqlite3_value_int64(argv[0]);
2948       newRowid = *pRowid = sqlite3_value_int64(argv[1]);
2949       spellfix1DbExec(&rc, db,
2950              "UPDATE OR %s \"%w\".\"%w_vocab\" SET id=%lld, rank=%d, langid=%d,"
2951              " word=%Q, k1=nullif(%Q,%Q), k2=%Q WHERE id=%lld",
2952              zConflict, p->zDbName, p->zTableName, newRowid, iRank, iLang,
2953              zWord, zK1, zWord, zK2, rowid
2954       );
2955     }
2956     sqlite3_free(zK1);
2957     sqlite3_free(zK2);
2958   }
2959   return rc;
2960 }
2961 
2962 /*
2963 ** Rename the spellfix1 table.
2964 */
2965 static int spellfix1Rename(sqlite3_vtab *pVTab, const char *zNew){
2966   spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
2967   sqlite3 *db = p->db;
2968   int rc = SQLITE_OK;
2969   char *zNewName = sqlite3_mprintf("%s", zNew);
2970   if( zNewName==0 ){
2971     return SQLITE_NOMEM;
2972   }
2973   spellfix1DbExec(&rc, db,
2974      "ALTER TABLE \"%w\".\"%w_vocab\" RENAME TO \"%w_vocab\"",
2975      p->zDbName, p->zTableName, zNewName
2976   );
2977   if( rc==SQLITE_OK ){
2978     sqlite3_free(p->zTableName);
2979     p->zTableName = zNewName;
2980   }else{
2981     sqlite3_free(zNewName);
2982   }
2983   return rc;
2984 }
2985 
2986 
2987 /*
2988 ** A virtual table module that provides fuzzy search.
2989 */
2990 static sqlite3_module spellfix1Module = {
2991   0,                       /* iVersion */
2992   spellfix1Create,         /* xCreate - handle CREATE VIRTUAL TABLE */
2993   spellfix1Connect,        /* xConnect - reconnected to an existing table */
2994   spellfix1BestIndex,      /* xBestIndex - figure out how to do a query */
2995   spellfix1Disconnect,     /* xDisconnect - close a connection */
2996   spellfix1Destroy,        /* xDestroy - handle DROP TABLE */
2997   spellfix1Open,           /* xOpen - open a cursor */
2998   spellfix1Close,          /* xClose - close a cursor */
2999   spellfix1Filter,         /* xFilter - configure scan constraints */
3000   spellfix1Next,           /* xNext - advance a cursor */
3001   spellfix1Eof,            /* xEof - check for end of scan */
3002   spellfix1Column,         /* xColumn - read data */
3003   spellfix1Rowid,          /* xRowid - read data */
3004   spellfix1Update,         /* xUpdate */
3005   0,                       /* xBegin */
3006   0,                       /* xSync */
3007   0,                       /* xCommit */
3008   0,                       /* xRollback */
3009   0,                       /* xFindMethod */
3010   spellfix1Rename,         /* xRename */
3011 };
3012 
3013 /*
3014 ** Register the various functions and the virtual table.
3015 */
3016 static int spellfix1Register(sqlite3 *db){
3017   int rc = SQLITE_OK;
3018   int i;
3019   rc = sqlite3_create_function(db, "spellfix1_translit", 1,
3020                                SQLITE_UTF8|SQLITE_DETERMINISTIC, 0,
3021                                 transliterateSqlFunc, 0, 0);
3022   if( rc==SQLITE_OK ){
3023     rc = sqlite3_create_function(db, "spellfix1_editdist", 2,
3024                                  SQLITE_UTF8|SQLITE_DETERMINISTIC, 0,
3025                                   editdistSqlFunc, 0, 0);
3026   }
3027   if( rc==SQLITE_OK ){
3028     rc = sqlite3_create_function(db, "spellfix1_phonehash", 1,
3029                                  SQLITE_UTF8|SQLITE_DETERMINISTIC, 0,
3030                                   phoneticHashSqlFunc, 0, 0);
3031   }
3032   if( rc==SQLITE_OK ){
3033     rc = sqlite3_create_function(db, "spellfix1_scriptcode", 1,
3034                                   SQLITE_UTF8|SQLITE_DETERMINISTIC, 0,
3035                                   scriptCodeSqlFunc, 0, 0);
3036   }
3037   if( rc==SQLITE_OK ){
3038     rc = sqlite3_create_module(db, "spellfix1", &spellfix1Module, 0);
3039   }
3040   if( rc==SQLITE_OK ){
3041     rc = editDist3Install(db);
3042   }
3043 
3044   /* Verify sanity of the translit[] table */
3045   for(i=0; i<sizeof(translit)/sizeof(translit[0])-1; i++){
3046     assert( translit[i].cFrom<translit[i+1].cFrom );
3047   }
3048 
3049   return rc;
3050 }
3051 
3052 #endif /* SQLITE_OMIT_VIRTUALTABLE */
3053 
3054 /*
3055 ** Extension load function.
3056 */
3057 #ifdef _WIN32
3058 __declspec(dllexport)
3059 #endif
3060 int sqlite3_spellfix_init(
3061   sqlite3 *db,
3062   char **pzErrMsg,
3063   const sqlite3_api_routines *pApi
3064 ){
3065   SQLITE_EXTENSION_INIT2(pApi);
3066 #ifndef SQLITE_OMIT_VIRTUALTABLE
3067   return spellfix1Register(db);
3068 #endif
3069   return SQLITE_OK;
3070 }
3071