xref: /sqlite-3.40.0/ext/misc/spellfix.c (revision b80bb6ce)
1 /*
2 ** 2012 April 10
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 *************************************************************************
12 **
13 ** This module implements the spellfix1 VIRTUAL TABLE that can be used
14 ** to search a large vocabulary for close matches.  See separate
15 ** documentation (http://www.sqlite.org/spellfix1.html) for details.
16 */
17 #include "sqlite3ext.h"
18 SQLITE_EXTENSION_INIT1
19 
20 #ifndef SQLITE_AMALGAMATION
21 # if !defined(NDEBUG) && !defined(SQLITE_DEBUG)
22 #  define NDEBUG 1
23 # endif
24 # if defined(NDEBUG) && defined(SQLITE_DEBUG)
25 #  undef NDEBUG
26 # endif
27 # include <string.h>
28 # include <stdio.h>
29 # include <stdlib.h>
30 # include <assert.h>
31 # define ALWAYS(X)  1
32 # define NEVER(X)   0
33   typedef unsigned char u8;
34   typedef unsigned short u16;
35 #endif
36 #include <ctype.h>
37 
38 #ifndef SQLITE_OMIT_VIRTUALTABLE
39 
40 /*
41 ** Character classes for ASCII characters:
42 **
43 **   0   ''        Silent letters:   H W
44 **   1   'A'       Any vowel:   A E I O U (Y)
45 **   2   'B'       A bilabeal stop or fricative:  B F P V W
46 **   3   'C'       Other fricatives or back stops:  C G J K Q S X Z
47 **   4   'D'       Alveolar stops:  D T
48 **   5   'H'       Letter H at the beginning of a word
49 **   6   'L'       Glide:  L
50 **   7   'R'       Semivowel:  R
51 **   8   'M'       Nasals:  M N
52 **   9   'Y'       Letter Y at the beginning of a word.
53 **   10  '9'       Digits: 0 1 2 3 4 5 6 7 8 9
54 **   11  ' '       White space
55 **   12  '?'       Other.
56 */
57 #define CCLASS_SILENT         0
58 #define CCLASS_VOWEL          1
59 #define CCLASS_B              2
60 #define CCLASS_C              3
61 #define CCLASS_D              4
62 #define CCLASS_H              5
63 #define CCLASS_L              6
64 #define CCLASS_R              7
65 #define CCLASS_M              8
66 #define CCLASS_Y              9
67 #define CCLASS_DIGIT         10
68 #define CCLASS_SPACE         11
69 #define CCLASS_OTHER         12
70 
71 /*
72 ** The following table gives the character class for non-initial ASCII
73 ** characters.
74 */
75 static const unsigned char midClass[] = {
76  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
77  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
78  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
79  /*   */ CCLASS_SPACE,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
80  /*   */ CCLASS_SPACE,    /*   */ CCLASS_SPACE,   /*   */ CCLASS_OTHER,
81  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
82  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
83  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
84  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
85  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
86  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_SPACE,
87  /* ! */ CCLASS_OTHER,    /* " */ CCLASS_OTHER,   /* # */ CCLASS_OTHER,
88  /* $ */ CCLASS_OTHER,    /* % */ CCLASS_OTHER,   /* & */ CCLASS_OTHER,
89  /* ' */ CCLASS_SILENT,   /* ( */ CCLASS_OTHER,   /* ) */ CCLASS_OTHER,
90  /* * */ CCLASS_OTHER,    /* + */ CCLASS_OTHER,   /* , */ CCLASS_OTHER,
91  /* - */ CCLASS_OTHER,    /* . */ CCLASS_OTHER,   /* / */ CCLASS_OTHER,
92  /* 0 */ CCLASS_DIGIT,    /* 1 */ CCLASS_DIGIT,   /* 2 */ CCLASS_DIGIT,
93  /* 3 */ CCLASS_DIGIT,    /* 4 */ CCLASS_DIGIT,   /* 5 */ CCLASS_DIGIT,
94  /* 6 */ CCLASS_DIGIT,    /* 7 */ CCLASS_DIGIT,   /* 8 */ CCLASS_DIGIT,
95  /* 9 */ CCLASS_DIGIT,    /* : */ CCLASS_OTHER,   /* ; */ CCLASS_OTHER,
96  /* < */ CCLASS_OTHER,    /* = */ CCLASS_OTHER,   /* > */ CCLASS_OTHER,
97  /* ? */ CCLASS_OTHER,    /* @ */ CCLASS_OTHER,   /* A */ CCLASS_VOWEL,
98  /* B */ CCLASS_B,        /* C */ CCLASS_C,       /* D */ CCLASS_D,
99  /* E */ CCLASS_VOWEL,    /* F */ CCLASS_B,       /* G */ CCLASS_C,
100  /* H */ CCLASS_SILENT,   /* I */ CCLASS_VOWEL,   /* J */ CCLASS_C,
101  /* K */ CCLASS_C,        /* L */ CCLASS_L,       /* M */ CCLASS_M,
102  /* N */ CCLASS_M,        /* O */ CCLASS_VOWEL,   /* P */ CCLASS_B,
103  /* Q */ CCLASS_C,        /* R */ CCLASS_R,       /* S */ CCLASS_C,
104  /* T */ CCLASS_D,        /* U */ CCLASS_VOWEL,   /* V */ CCLASS_B,
105  /* W */ CCLASS_B,        /* X */ CCLASS_C,       /* Y */ CCLASS_VOWEL,
106  /* Z */ CCLASS_C,        /* [ */ CCLASS_OTHER,   /* \ */ CCLASS_OTHER,
107  /* ] */ CCLASS_OTHER,    /* ^ */ CCLASS_OTHER,   /* _ */ CCLASS_OTHER,
108  /* ` */ CCLASS_OTHER,    /* a */ CCLASS_VOWEL,   /* b */ CCLASS_B,
109  /* c */ CCLASS_C,        /* d */ CCLASS_D,       /* e */ CCLASS_VOWEL,
110  /* f */ CCLASS_B,        /* g */ CCLASS_C,       /* h */ CCLASS_SILENT,
111  /* i */ CCLASS_VOWEL,    /* j */ CCLASS_C,       /* k */ CCLASS_C,
112  /* l */ CCLASS_L,        /* m */ CCLASS_M,       /* n */ CCLASS_M,
113  /* o */ CCLASS_VOWEL,    /* p */ CCLASS_B,       /* q */ CCLASS_C,
114  /* r */ CCLASS_R,        /* s */ CCLASS_C,       /* t */ CCLASS_D,
115  /* u */ CCLASS_VOWEL,    /* v */ CCLASS_B,       /* w */ CCLASS_B,
116  /* x */ CCLASS_C,        /* y */ CCLASS_VOWEL,   /* z */ CCLASS_C,
117  /* { */ CCLASS_OTHER,    /* | */ CCLASS_OTHER,   /* } */ CCLASS_OTHER,
118  /* ~ */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,
119 };
120 /*
121 ** This tables gives the character class for ASCII characters that form the
122 ** initial character of a word.  The only difference from midClass is with
123 ** the letters H, W, and Y.
124 */
125 static const unsigned char initClass[] = {
126  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
127  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
128  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
129  /*   */ CCLASS_SPACE,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
130  /*   */ CCLASS_SPACE,    /*   */ CCLASS_SPACE,   /*   */ CCLASS_OTHER,
131  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
132  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
133  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
134  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
135  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
136  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_SPACE,
137  /* ! */ CCLASS_OTHER,    /* " */ CCLASS_OTHER,   /* # */ CCLASS_OTHER,
138  /* $ */ CCLASS_OTHER,    /* % */ CCLASS_OTHER,   /* & */ CCLASS_OTHER,
139  /* ' */ CCLASS_OTHER,    /* ( */ CCLASS_OTHER,   /* ) */ CCLASS_OTHER,
140  /* * */ CCLASS_OTHER,    /* + */ CCLASS_OTHER,   /* , */ CCLASS_OTHER,
141  /* - */ CCLASS_OTHER,    /* . */ CCLASS_OTHER,   /* / */ CCLASS_OTHER,
142  /* 0 */ CCLASS_DIGIT,    /* 1 */ CCLASS_DIGIT,   /* 2 */ CCLASS_DIGIT,
143  /* 3 */ CCLASS_DIGIT,    /* 4 */ CCLASS_DIGIT,   /* 5 */ CCLASS_DIGIT,
144  /* 6 */ CCLASS_DIGIT,    /* 7 */ CCLASS_DIGIT,   /* 8 */ CCLASS_DIGIT,
145  /* 9 */ CCLASS_DIGIT,    /* : */ CCLASS_OTHER,   /* ; */ CCLASS_OTHER,
146  /* < */ CCLASS_OTHER,    /* = */ CCLASS_OTHER,   /* > */ CCLASS_OTHER,
147  /* ? */ CCLASS_OTHER,    /* @ */ CCLASS_OTHER,   /* A */ CCLASS_VOWEL,
148  /* B */ CCLASS_B,        /* C */ CCLASS_C,       /* D */ CCLASS_D,
149  /* E */ CCLASS_VOWEL,    /* F */ CCLASS_B,       /* G */ CCLASS_C,
150  /* H */ CCLASS_SILENT,   /* I */ CCLASS_VOWEL,   /* J */ CCLASS_C,
151  /* K */ CCLASS_C,        /* L */ CCLASS_L,       /* M */ CCLASS_M,
152  /* N */ CCLASS_M,        /* O */ CCLASS_VOWEL,   /* P */ CCLASS_B,
153  /* Q */ CCLASS_C,        /* R */ CCLASS_R,       /* S */ CCLASS_C,
154  /* T */ CCLASS_D,        /* U */ CCLASS_VOWEL,   /* V */ CCLASS_B,
155  /* W */ CCLASS_B,        /* X */ CCLASS_C,       /* Y */ CCLASS_Y,
156  /* Z */ CCLASS_C,        /* [ */ CCLASS_OTHER,   /* \ */ CCLASS_OTHER,
157  /* ] */ CCLASS_OTHER,    /* ^ */ CCLASS_OTHER,   /* _ */ CCLASS_OTHER,
158  /* ` */ CCLASS_OTHER,    /* a */ CCLASS_VOWEL,   /* b */ CCLASS_B,
159  /* c */ CCLASS_C,        /* d */ CCLASS_D,       /* e */ CCLASS_VOWEL,
160  /* f */ CCLASS_B,        /* g */ CCLASS_C,       /* h */ CCLASS_SILENT,
161  /* i */ CCLASS_VOWEL,    /* j */ CCLASS_C,       /* k */ CCLASS_C,
162  /* l */ CCLASS_L,        /* m */ CCLASS_M,       /* n */ CCLASS_M,
163  /* o */ CCLASS_VOWEL,    /* p */ CCLASS_B,       /* q */ CCLASS_C,
164  /* r */ CCLASS_R,        /* s */ CCLASS_C,       /* t */ CCLASS_D,
165  /* u */ CCLASS_VOWEL,    /* v */ CCLASS_B,       /* w */ CCLASS_B,
166  /* x */ CCLASS_C,        /* y */ CCLASS_Y,       /* z */ CCLASS_C,
167  /* { */ CCLASS_OTHER,    /* | */ CCLASS_OTHER,   /* } */ CCLASS_OTHER,
168  /* ~ */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,
169 };
170 
171 /*
172 ** Mapping from the character class number (0-13) to a symbol for each
173 ** character class.  Note that initClass[] can be used to map the class
174 ** symbol back into the class number.
175 */
176 static const unsigned char className[] = ".ABCDHLRMY9 ?";
177 
178 /*
179 ** Generate a "phonetic hash" from a string of ASCII characters
180 ** in zIn[0..nIn-1].
181 **
182 **   * Map characters by character class as defined above.
183 **   * Omit double-letters
184 **   * Omit vowels beside R and L
185 **   * Omit T when followed by CH
186 **   * Omit W when followed by R
187 **   * Omit D when followed by J or G
188 **   * Omit K in KN or G in GN at the beginning of a word
189 **
190 ** Space to hold the result is obtained from sqlite3_malloc()
191 **
192 ** Return NULL if memory allocation fails.
193 */
194 static unsigned char *phoneticHash(const unsigned char *zIn, int nIn){
195   unsigned char *zOut = sqlite3_malloc64( nIn + 1 );
196   int i;
197   int nOut = 0;
198   char cPrev = 0x77;
199   char cPrevX = 0x77;
200   const unsigned char *aClass = initClass;
201 
202   if( zOut==0 ) return 0;
203   if( nIn>2 ){
204     switch( zIn[0] ){
205       case 'g':
206       case 'k': {
207         if( zIn[1]=='n' ){ zIn++; nIn--; }
208         break;
209       }
210     }
211   }
212   for(i=0; i<nIn; i++){
213     unsigned char c = zIn[i];
214     if( i+1<nIn ){
215       if( c=='w' && zIn[i+1]=='r' ) continue;
216       if( c=='d' && (zIn[i+1]=='j' || zIn[i+1]=='g') ) continue;
217       if( i+2<nIn ){
218         if( c=='t' && zIn[i+1]=='c' && zIn[i+2]=='h' ) continue;
219       }
220     }
221     c = aClass[c&0x7f];
222     if( c==CCLASS_SPACE ) continue;
223     if( c==CCLASS_OTHER && cPrev!=CCLASS_DIGIT ) continue;
224     aClass = midClass;
225     if( c==CCLASS_VOWEL && (cPrevX==CCLASS_R || cPrevX==CCLASS_L) ){
226        continue; /* No vowels beside L or R */
227     }
228     if( (c==CCLASS_R || c==CCLASS_L) && cPrevX==CCLASS_VOWEL ){
229        nOut--;   /* No vowels beside L or R */
230     }
231     cPrev = c;
232     if( c==CCLASS_SILENT ) continue;
233     cPrevX = c;
234     c = className[c];
235     assert( nOut>=0 );
236     if( nOut==0 || c!=zOut[nOut-1] ) zOut[nOut++] = c;
237   }
238   zOut[nOut] = 0;
239   return zOut;
240 }
241 
242 /*
243 ** This is an SQL function wrapper around phoneticHash().  See
244 ** the description of phoneticHash() for additional information.
245 */
246 static void phoneticHashSqlFunc(
247   sqlite3_context *context,
248   int argc,
249   sqlite3_value **argv
250 ){
251   const unsigned char *zIn;
252   unsigned char *zOut;
253 
254   zIn = sqlite3_value_text(argv[0]);
255   if( zIn==0 ) return;
256   zOut = phoneticHash(zIn, sqlite3_value_bytes(argv[0]));
257   if( zOut==0 ){
258     sqlite3_result_error_nomem(context);
259   }else{
260     sqlite3_result_text(context, (char*)zOut, -1, sqlite3_free);
261   }
262 }
263 
264 /*
265 ** Return the character class number for a character given its
266 ** context.
267 */
268 static char characterClass(char cPrev, char c){
269   return cPrev==0 ? initClass[c&0x7f] : midClass[c&0x7f];
270 }
271 
272 /*
273 ** Return the cost of inserting or deleting character c immediately
274 ** following character cPrev.  If cPrev==0, that means c is the first
275 ** character of the word.
276 */
277 static int insertOrDeleteCost(char cPrev, char c, char cNext){
278   char classC = characterClass(cPrev, c);
279   char classCprev;
280 
281   if( classC==CCLASS_SILENT ){
282     /* Insert or delete "silent" characters such as H or W */
283     return 1;
284   }
285   if( cPrev==c ){
286     /* Repeated characters, or miss a repeat */
287     return 10;
288   }
289   if( classC==CCLASS_VOWEL && (cPrev=='r' || cNext=='r') ){
290     return 20;  /* Insert a vowel before or after 'r' */
291   }
292   classCprev = characterClass(cPrev, cPrev);
293   if( classC==classCprev ){
294     if( classC==CCLASS_VOWEL ){
295       /* Remove or add a new vowel to a vowel cluster */
296       return 15;
297     }else{
298       /* Remove or add a consonant not in the same class */
299       return 50;
300     }
301   }
302 
303   /* any other character insertion or deletion */
304   return 100;
305 }
306 
307 /*
308 ** Divide the insertion cost by this factor when appending to the
309 ** end of the word.
310 */
311 #define FINAL_INS_COST_DIV  4
312 
313 /*
314 ** Return the cost of substituting cTo in place of cFrom assuming
315 ** the previous character is cPrev.  If cPrev==0 then cTo is the first
316 ** character of the word.
317 */
318 static int substituteCost(char cPrev, char cFrom, char cTo){
319   char classFrom, classTo;
320   if( cFrom==cTo ){
321     /* Exact match */
322     return 0;
323   }
324   if( cFrom==(cTo^0x20) && ((cTo>='A' && cTo<='Z') || (cTo>='a' && cTo<='z')) ){
325     /* differ only in case */
326     return 0;
327   }
328   classFrom = characterClass(cPrev, cFrom);
329   classTo = characterClass(cPrev, cTo);
330   if( classFrom==classTo ){
331     /* Same character class */
332     return 40;
333   }
334   if( classFrom>=CCLASS_B && classFrom<=CCLASS_Y
335       && classTo>=CCLASS_B && classTo<=CCLASS_Y ){
336     /* Convert from one consonant to another, but in a different class */
337     return 75;
338   }
339   /* Any other subsitution */
340   return 100;
341 }
342 
343 /*
344 ** Given two strings zA and zB which are pure ASCII, return the cost
345 ** of transforming zA into zB.  If zA ends with '*' assume that it is
346 ** a prefix of zB and give only minimal penalty for extra characters
347 ** on the end of zB.
348 **
349 ** Smaller numbers mean a closer match.
350 **
351 ** Negative values indicate an error:
352 **    -1  One of the inputs is NULL
353 **    -2  Non-ASCII characters on input
354 **    -3  Unable to allocate memory
355 **
356 ** If pnMatch is not NULL, then *pnMatch is set to the number of bytes
357 ** of zB that matched the pattern in zA. If zA does not end with a '*',
358 ** then this value is always the number of bytes in zB (i.e. strlen(zB)).
359 ** If zA does end in a '*', then it is the number of bytes in the prefix
360 ** of zB that was deemed to match zA.
361 */
362 static int editdist1(const char *zA, const char *zB, int *pnMatch){
363   int nA, nB;            /* Number of characters in zA[] and zB[] */
364   int xA, xB;            /* Loop counters for zA[] and zB[] */
365   char cA = 0, cB;       /* Current character of zA and zB */
366   char cAprev, cBprev;   /* Previous character of zA and zB */
367   char cAnext, cBnext;   /* Next character in zA and zB */
368   int d;                 /* North-west cost value */
369   int dc = 0;            /* North-west character value */
370   int res;               /* Final result */
371   int *m;                /* The cost matrix */
372   char *cx;              /* Corresponding character values */
373   int *toFree = 0;       /* Malloced space */
374   int nMatch = 0;
375   int mStack[60+15];     /* Stack space to use if not too much is needed */
376 
377   /* Early out if either input is NULL */
378   if( zA==0 || zB==0 ) return -1;
379 
380   /* Skip any common prefix */
381   while( zA[0] && zA[0]==zB[0] ){ dc = zA[0]; zA++; zB++; nMatch++; }
382   if( pnMatch ) *pnMatch = nMatch;
383   if( zA[0]==0 && zB[0]==0 ) return 0;
384 
385 #if 0
386   printf("A=\"%s\" B=\"%s\" dc=%c\n", zA, zB, dc?dc:' ');
387 #endif
388 
389   /* Verify input strings and measure their lengths */
390   for(nA=0; zA[nA]; nA++){
391     if( zA[nA]&0x80 ) return -2;
392   }
393   for(nB=0; zB[nB]; nB++){
394     if( zB[nB]&0x80 ) return -2;
395   }
396 
397   /* Special processing if either string is empty */
398   if( nA==0 ){
399     cBprev = (char)dc;
400     for(xB=res=0; (cB = zB[xB])!=0; xB++){
401       res += insertOrDeleteCost(cBprev, cB, zB[xB+1])/FINAL_INS_COST_DIV;
402       cBprev = cB;
403     }
404     return res;
405   }
406   if( nB==0 ){
407     cAprev = (char)dc;
408     for(xA=res=0; (cA = zA[xA])!=0; xA++){
409       res += insertOrDeleteCost(cAprev, cA, zA[xA+1]);
410       cAprev = cA;
411     }
412     return res;
413   }
414 
415   /* A is a prefix of B */
416   if( zA[0]=='*' && zA[1]==0 ) return 0;
417 
418   /* Allocate and initialize the Wagner matrix */
419   if( nB<(sizeof(mStack)*4)/(sizeof(mStack[0])*5) ){
420     m = mStack;
421   }else{
422     m = toFree = sqlite3_malloc64( (nB+1)*5*sizeof(m[0])/4 );
423     if( m==0 ) return -3;
424   }
425   cx = (char*)&m[nB+1];
426 
427   /* Compute the Wagner edit distance */
428   m[0] = 0;
429   cx[0] = (char)dc;
430   cBprev = (char)dc;
431   for(xB=1; xB<=nB; xB++){
432     cBnext = zB[xB];
433     cB = zB[xB-1];
434     cx[xB] = cB;
435     m[xB] = m[xB-1] + insertOrDeleteCost(cBprev, cB, cBnext);
436     cBprev = cB;
437   }
438   cAprev = (char)dc;
439   for(xA=1; xA<=nA; xA++){
440     int lastA = (xA==nA);
441     cA = zA[xA-1];
442     cAnext = zA[xA];
443     if( cA=='*' && lastA ) break;
444     d = m[0];
445     dc = cx[0];
446     m[0] = d + insertOrDeleteCost(cAprev, cA, cAnext);
447     cBprev = 0;
448     for(xB=1; xB<=nB; xB++){
449       int totalCost, insCost, delCost, subCost, ncx;
450       cB = zB[xB-1];
451       cBnext = zB[xB];
452 
453       /* Cost to insert cB */
454       insCost = insertOrDeleteCost(cx[xB-1], cB, cBnext);
455       if( lastA ) insCost /= FINAL_INS_COST_DIV;
456 
457       /* Cost to delete cA */
458       delCost = insertOrDeleteCost(cx[xB], cA, cBnext);
459 
460       /* Cost to substitute cA->cB */
461       subCost = substituteCost(cx[xB-1], cA, cB);
462 
463       /* Best cost */
464       totalCost = insCost + m[xB-1];
465       ncx = cB;
466       if( (delCost + m[xB])<totalCost ){
467         totalCost = delCost + m[xB];
468         ncx = cA;
469       }
470       if( (subCost + d)<totalCost ){
471         totalCost = subCost + d;
472       }
473 
474 #if 0
475       printf("%d,%d d=%4d u=%4d r=%4d dc=%c cA=%c cB=%c"
476              " ins=%4d del=%4d sub=%4d t=%4d ncx=%c\n",
477              xA, xB, d, m[xB], m[xB-1], dc?dc:' ', cA, cB,
478              insCost, delCost, subCost, totalCost, ncx?ncx:' ');
479 #endif
480 
481       /* Update the matrix */
482       d = m[xB];
483       dc = cx[xB];
484       m[xB] = totalCost;
485       cx[xB] = (char)ncx;
486       cBprev = cB;
487     }
488     cAprev = cA;
489   }
490 
491   /* Free the wagner matrix and return the result */
492   if( cA=='*' ){
493     res = m[1];
494     for(xB=1; xB<=nB; xB++){
495       if( m[xB]<res ){
496         res = m[xB];
497         if( pnMatch ) *pnMatch = xB+nMatch;
498       }
499     }
500   }else{
501     res = m[nB];
502     /* In the current implementation, pnMatch is always NULL if zA does
503     ** not end in "*" */
504     assert( pnMatch==0 );
505   }
506   sqlite3_free(toFree);
507   return res;
508 }
509 
510 /*
511 ** Function:    editdist(A,B)
512 **
513 ** Return the cost of transforming string A into string B.  Both strings
514 ** must be pure ASCII text.  If A ends with '*' then it is assumed to be
515 ** a prefix of B and extra characters on the end of B have minimal additional
516 ** cost.
517 */
518 static void editdistSqlFunc(
519   sqlite3_context *context,
520   int argc,
521   sqlite3_value **argv
522 ){
523   int res = editdist1(
524                     (const char*)sqlite3_value_text(argv[0]),
525                     (const char*)sqlite3_value_text(argv[1]),
526                     0);
527   if( res<0 ){
528     if( res==(-3) ){
529       sqlite3_result_error_nomem(context);
530     }else if( res==(-2) ){
531       sqlite3_result_error(context, "non-ASCII input to editdist()", -1);
532     }else{
533       sqlite3_result_error(context, "NULL input to editdist()", -1);
534     }
535   }else{
536     sqlite3_result_int(context, res);
537   }
538 }
539 
540 /* End of the fixed-cost edit distance implementation
541 ******************************************************************************
542 *****************************************************************************
543 ** Begin: Configurable cost unicode edit distance routines
544 */
545 /* Forward declaration of structures */
546 typedef struct EditDist3Cost EditDist3Cost;
547 typedef struct EditDist3Config EditDist3Config;
548 typedef struct EditDist3Point EditDist3Point;
549 typedef struct EditDist3From EditDist3From;
550 typedef struct EditDist3FromString EditDist3FromString;
551 typedef struct EditDist3To EditDist3To;
552 typedef struct EditDist3ToString EditDist3ToString;
553 typedef struct EditDist3Lang EditDist3Lang;
554 
555 
556 /*
557 ** An entry in the edit cost table
558 */
559 struct EditDist3Cost {
560   EditDist3Cost *pNext;     /* Next cost element */
561   u8 nFrom;                 /* Number of bytes in aFrom */
562   u8 nTo;                   /* Number of bytes in aTo */
563   u16 iCost;                /* Cost of this transformation */
564   char a[4]    ;            /* FROM string followed by TO string */
565   /* Additional TO and FROM string bytes appended as necessary */
566 };
567 
568 /*
569 ** Edit costs for a particular language ID
570 */
571 struct EditDist3Lang {
572   int iLang;             /* Language ID */
573   int iInsCost;          /* Default insertion cost */
574   int iDelCost;          /* Default deletion cost */
575   int iSubCost;          /* Default substitution cost */
576   EditDist3Cost *pCost;  /* Costs */
577 };
578 
579 
580 /*
581 ** The default EditDist3Lang object, with default costs.
582 */
583 static const EditDist3Lang editDist3Lang = { 0, 100, 100, 150, 0 };
584 
585 /*
586 ** Complete configuration
587 */
588 struct EditDist3Config {
589   int nLang;             /* Number of language IDs.  Size of a[] */
590   EditDist3Lang *a;      /* One for each distinct language ID */
591 };
592 
593 /*
594 ** Extra information about each character in the FROM string.
595 */
596 struct EditDist3From {
597   int nSubst;              /* Number of substitution cost entries */
598   int nDel;                /* Number of deletion cost entries */
599   int nByte;               /* Number of bytes in this character */
600   EditDist3Cost **apSubst; /* Array of substitution costs for this element */
601   EditDist3Cost **apDel;   /* Array of deletion cost entries */
602 };
603 
604 /*
605 ** A precompiled FROM string.
606 *
607 ** In the common case we expect the FROM string to be reused multiple times.
608 ** In other words, the common case will be to measure the edit distance
609 ** from a single origin string to multiple target strings.
610 */
611 struct EditDist3FromString {
612   char *z;                 /* The complete text of the FROM string */
613   int n;                   /* Number of characters in the FROM string */
614   int isPrefix;            /* True if ends with '*' character */
615   EditDist3From *a;        /* Extra info about each char of the FROM string */
616 };
617 
618 /*
619 ** Extra information about each character in the TO string.
620 */
621 struct EditDist3To {
622   int nIns;                /* Number of insertion cost entries */
623   int nByte;               /* Number of bytes in this character */
624   EditDist3Cost **apIns;   /* Array of deletion cost entries */
625 };
626 
627 /*
628 ** A precompiled FROM string
629 */
630 struct EditDist3ToString {
631   char *z;                 /* The complete text of the TO string */
632   int n;                   /* Number of characters in the TO string */
633   EditDist3To *a;          /* Extra info about each char of the TO string */
634 };
635 
636 /*
637 ** Clear or delete an instance of the object that records all edit-distance
638 ** weights.
639 */
640 static void editDist3ConfigClear(EditDist3Config *p){
641   int i;
642   if( p==0 ) return;
643   for(i=0; i<p->nLang; i++){
644     EditDist3Cost *pCost, *pNext;
645     pCost = p->a[i].pCost;
646     while( pCost ){
647       pNext = pCost->pNext;
648       sqlite3_free(pCost);
649       pCost = pNext;
650     }
651   }
652   sqlite3_free(p->a);
653   memset(p, 0, sizeof(*p));
654 }
655 static void editDist3ConfigDelete(void *pIn){
656   EditDist3Config *p = (EditDist3Config*)pIn;
657   editDist3ConfigClear(p);
658   sqlite3_free(p);
659 }
660 
661 /* Compare the FROM values of two EditDist3Cost objects, for sorting.
662 ** Return negative, zero, or positive if the A is less than, equal to,
663 ** or greater than B.
664 */
665 static int editDist3CostCompare(EditDist3Cost *pA, EditDist3Cost *pB){
666   int n = pA->nFrom;
667   int rc;
668   if( n>pB->nFrom ) n = pB->nFrom;
669   rc = strncmp(pA->a, pB->a, n);
670   if( rc==0 ) rc = pA->nFrom - pB->nFrom;
671   return rc;
672 }
673 
674 /*
675 ** Merge together two sorted lists of EditDist3Cost objects, in order
676 ** of increasing FROM.
677 */
678 static EditDist3Cost *editDist3CostMerge(
679   EditDist3Cost *pA,
680   EditDist3Cost *pB
681 ){
682   EditDist3Cost *pHead = 0;
683   EditDist3Cost **ppTail = &pHead;
684   EditDist3Cost *p;
685   while( pA && pB ){
686     if( editDist3CostCompare(pA,pB)<=0 ){
687       p = pA;
688       pA = pA->pNext;
689     }else{
690       p = pB;
691       pB = pB->pNext;
692     }
693     *ppTail = p;
694     ppTail =  &p->pNext;
695   }
696   if( pA ){
697     *ppTail = pA;
698   }else{
699     *ppTail = pB;
700   }
701   return pHead;
702 }
703 
704 /*
705 ** Sort a list of EditDist3Cost objects into order of increasing FROM
706 */
707 static EditDist3Cost *editDist3CostSort(EditDist3Cost *pList){
708   EditDist3Cost *ap[60], *p;
709   int i;
710   int mx = 0;
711   ap[0] = 0;
712   ap[1] = 0;
713   while( pList ){
714     p = pList;
715     pList = p->pNext;
716     p->pNext = 0;
717     for(i=0; ap[i]; i++){
718       p = editDist3CostMerge(ap[i],p);
719       ap[i] = 0;
720     }
721     ap[i] = p;
722     if( i>mx ){
723       mx = i;
724       ap[i+1] = 0;
725     }
726   }
727   p = 0;
728   for(i=0; i<=mx; i++){
729     if( ap[i] ) p = editDist3CostMerge(p,ap[i]);
730   }
731   return p;
732 }
733 
734 /*
735 ** Load all edit-distance weights from a table.
736 */
737 static int editDist3ConfigLoad(
738   EditDist3Config *p,      /* The edit distance configuration to load */
739   sqlite3 *db,            /* Load from this database */
740   const char *zTable      /* Name of the table from which to load */
741 ){
742   sqlite3_stmt *pStmt;
743   int rc, rc2;
744   char *zSql;
745   int iLangPrev = -9999;
746   EditDist3Lang *pLang = 0;
747 
748   zSql = sqlite3_mprintf("SELECT iLang, cFrom, cTo, iCost"
749                          " FROM \"%w\" WHERE iLang>=0 ORDER BY iLang", zTable);
750   if( zSql==0 ) return SQLITE_NOMEM;
751   rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
752   sqlite3_free(zSql);
753   if( rc ) return rc;
754   editDist3ConfigClear(p);
755   while( sqlite3_step(pStmt)==SQLITE_ROW ){
756     int iLang = sqlite3_column_int(pStmt, 0);
757     const char *zFrom = (const char*)sqlite3_column_text(pStmt, 1);
758     int nFrom = zFrom ? sqlite3_column_bytes(pStmt, 1) : 0;
759     const char *zTo = (const char*)sqlite3_column_text(pStmt, 2);
760     int nTo = zTo ? sqlite3_column_bytes(pStmt, 2) : 0;
761     int iCost = sqlite3_column_int(pStmt, 3);
762 
763     assert( zFrom!=0 || nFrom==0 );
764     assert( zTo!=0 || nTo==0 );
765     if( nFrom>100 || nTo>100 ) continue;
766     if( iCost<0 ) continue;
767     if( iCost>=10000 ) continue;  /* Costs above 10K are considered infinite */
768     if( pLang==0 || iLang!=iLangPrev ){
769       EditDist3Lang *pNew;
770       pNew = sqlite3_realloc64(p->a, (p->nLang+1)*sizeof(p->a[0]));
771       if( pNew==0 ){ rc = SQLITE_NOMEM; break; }
772       p->a = pNew;
773       pLang = &p->a[p->nLang];
774       p->nLang++;
775       pLang->iLang = iLang;
776       pLang->iInsCost = 100;
777       pLang->iDelCost = 100;
778       pLang->iSubCost = 150;
779       pLang->pCost = 0;
780       iLangPrev = iLang;
781     }
782     if( nFrom==1 && zFrom[0]=='?' && nTo==0 ){
783       pLang->iDelCost = iCost;
784     }else if( nFrom==0 && nTo==1 && zTo[0]=='?' ){
785       pLang->iInsCost = iCost;
786     }else if( nFrom==1 && nTo==1 && zFrom[0]=='?' && zTo[0]=='?' ){
787       pLang->iSubCost = iCost;
788     }else{
789       EditDist3Cost *pCost;
790       int nExtra = nFrom + nTo - 4;
791       if( nExtra<0 ) nExtra = 0;
792       pCost = sqlite3_malloc64( sizeof(*pCost) + nExtra );
793       if( pCost==0 ){ rc = SQLITE_NOMEM; break; }
794       pCost->nFrom = (u8)nFrom;
795       pCost->nTo = (u8)nTo;
796       pCost->iCost = (u16)iCost;
797       memcpy(pCost->a, zFrom, nFrom);
798       memcpy(pCost->a + nFrom, zTo, nTo);
799       pCost->pNext = pLang->pCost;
800       pLang->pCost = pCost;
801     }
802   }
803   rc2 = sqlite3_finalize(pStmt);
804   if( rc==SQLITE_OK ) rc = rc2;
805   if( rc==SQLITE_OK ){
806     int iLang;
807     for(iLang=0; iLang<p->nLang; iLang++){
808       p->a[iLang].pCost = editDist3CostSort(p->a[iLang].pCost);
809     }
810   }
811   return rc;
812 }
813 
814 /*
815 ** Return the length (in bytes) of a utf-8 character.  Or return a maximum
816 ** of N.
817 */
818 static int utf8Len(unsigned char c, int N){
819   int len = 1;
820   if( c>0x7f ){
821     if( (c&0xe0)==0xc0 ){
822       len = 2;
823     }else if( (c&0xf0)==0xe0 ){
824       len = 3;
825     }else{
826       len = 4;
827     }
828   }
829   if( len>N ) len = N;
830   return len;
831 }
832 
833 /*
834 ** Return TRUE (non-zero) if the To side of the given cost matches
835 ** the given string.
836 */
837 static int matchTo(EditDist3Cost *p, const char *z, int n){
838   assert( n>0 );
839   if( p->a[p->nFrom]!=z[0] ) return 0;
840   if( p->nTo>n ) return 0;
841   if( strncmp(p->a+p->nFrom, z, p->nTo)!=0 ) return 0;
842   return 1;
843 }
844 
845 /*
846 ** Return TRUE (non-zero) if the From side of the given cost matches
847 ** the given string.
848 */
849 static int matchFrom(EditDist3Cost *p, const char *z, int n){
850   assert( p->nFrom<=n );
851   if( p->nFrom ){
852     if( p->a[0]!=z[0] ) return 0;
853     if( strncmp(p->a, z, p->nFrom)!=0 ) return 0;
854   }
855   return 1;
856 }
857 
858 /*
859 ** Return TRUE (non-zero) of the next FROM character and the next TO
860 ** character are the same.
861 */
862 static int matchFromTo(
863   EditDist3FromString *pStr,  /* Left hand string */
864   int n1,                     /* Index of comparison character on the left */
865   const char *z2,             /* Right-handl comparison character */
866   int n2                      /* Bytes remaining in z2[] */
867 ){
868   int b1 = pStr->a[n1].nByte;
869   if( b1>n2 ) return 0;
870   assert( b1>0 );
871   if( pStr->z[n1]!=z2[0] ) return 0;
872   if( strncmp(pStr->z+n1, z2, b1)!=0 ) return 0;
873   return 1;
874 }
875 
876 /*
877 ** Delete an EditDist3FromString objecct
878 */
879 static void editDist3FromStringDelete(EditDist3FromString *p){
880   int i;
881   if( p ){
882     for(i=0; i<p->n; i++){
883       sqlite3_free(p->a[i].apDel);
884       sqlite3_free(p->a[i].apSubst);
885     }
886     sqlite3_free(p);
887   }
888 }
889 
890 /*
891 ** Create a EditDist3FromString object.
892 */
893 static EditDist3FromString *editDist3FromStringNew(
894   const EditDist3Lang *pLang,
895   const char *z,
896   int n
897 ){
898   EditDist3FromString *pStr;
899   EditDist3Cost *p;
900   int i;
901 
902   if( z==0 ) return 0;
903   if( n<0 ) n = (int)strlen(z);
904   pStr = sqlite3_malloc64( sizeof(*pStr) + sizeof(pStr->a[0])*n + n + 1 );
905   if( pStr==0 ) return 0;
906   pStr->a = (EditDist3From*)&pStr[1];
907   memset(pStr->a, 0, sizeof(pStr->a[0])*n);
908   pStr->n = n;
909   pStr->z = (char*)&pStr->a[n];
910   memcpy(pStr->z, z, n+1);
911   if( n && z[n-1]=='*' ){
912     pStr->isPrefix = 1;
913     n--;
914     pStr->n--;
915     pStr->z[n] = 0;
916   }else{
917     pStr->isPrefix = 0;
918   }
919 
920   for(i=0; i<n; i++){
921     EditDist3From *pFrom = &pStr->a[i];
922     memset(pFrom, 0, sizeof(*pFrom));
923     pFrom->nByte = utf8Len((unsigned char)z[i], n-i);
924     for(p=pLang->pCost; p; p=p->pNext){
925       EditDist3Cost **apNew;
926       if( i+p->nFrom>n ) continue;
927       if( matchFrom(p, z+i, n-i)==0 ) continue;
928       if( p->nTo==0 ){
929         apNew = sqlite3_realloc64(pFrom->apDel,
930                                 sizeof(*apNew)*(pFrom->nDel+1));
931         if( apNew==0 ) break;
932         pFrom->apDel = apNew;
933         apNew[pFrom->nDel++] = p;
934       }else{
935         apNew = sqlite3_realloc64(pFrom->apSubst,
936                                 sizeof(*apNew)*(pFrom->nSubst+1));
937         if( apNew==0 ) break;
938         pFrom->apSubst = apNew;
939         apNew[pFrom->nSubst++] = p;
940       }
941     }
942     if( p ){
943       editDist3FromStringDelete(pStr);
944       pStr = 0;
945       break;
946     }
947   }
948   return pStr;
949 }
950 
951 /*
952 ** Update entry m[i] such that it is the minimum of its current value
953 ** and m[j]+iCost.
954 */
955 static void updateCost(
956   unsigned int *m,
957   int i,
958   int j,
959   int iCost
960 ){
961   unsigned int b;
962   assert( iCost>=0 );
963   assert( iCost<10000 );
964   b = m[j] + iCost;
965   if( b<m[i] ) m[i] = b;
966 }
967 
968 /*
969 ** How much stack space (int bytes) to use for Wagner matrix in
970 ** editDist3Core().  If more space than this is required, the entire
971 ** matrix is taken from the heap.  To reduce the load on the memory
972 ** allocator, make this value as large as practical for the
973 ** architecture in use.
974 */
975 #ifndef SQLITE_SPELLFIX_STACKALLOC_SZ
976 # define SQLITE_SPELLFIX_STACKALLOC_SZ  (1024)
977 #endif
978 
979 /* Compute the edit distance between two strings.
980 **
981 ** If an error occurs, return a negative number which is the error code.
982 **
983 ** If pnMatch is not NULL, then *pnMatch is set to the number of characters
984 ** (not bytes) in z2 that matched the search pattern in *pFrom. If pFrom does
985 ** not contain the pattern for a prefix-search, then this is always the number
986 ** of characters in z2. If pFrom does contain a prefix search pattern, then
987 ** it is the number of characters in the prefix of z2 that was deemed to
988 ** match pFrom.
989 */
990 static int editDist3Core(
991   EditDist3FromString *pFrom,  /* The FROM string */
992   const char *z2,              /* The TO string */
993   int n2,                      /* Length of the TO string */
994   const EditDist3Lang *pLang,  /* Edit weights for a particular language ID */
995   int *pnMatch                 /* OUT: Characters in matched prefix */
996 ){
997   int k, n;
998   int i1, b1;
999   int i2, b2;
1000   EditDist3FromString f = *pFrom;
1001   EditDist3To *a2;
1002   unsigned int *m;
1003   unsigned int *pToFree;
1004   int szRow;
1005   EditDist3Cost *p;
1006   int res;
1007   sqlite3_uint64 nByte;
1008   unsigned int stackSpace[SQLITE_SPELLFIX_STACKALLOC_SZ/sizeof(unsigned int)];
1009 
1010   /* allocate the Wagner matrix and the aTo[] array for the TO string */
1011   n = (f.n+1)*(n2+1);
1012   n = (n+1)&~1;
1013   nByte = n*sizeof(m[0]) + sizeof(a2[0])*n2;
1014   if( nByte<=sizeof(stackSpace) ){
1015     m = stackSpace;
1016     pToFree = 0;
1017   }else{
1018     m = pToFree = sqlite3_malloc64( nByte );
1019     if( m==0 ) return -1;            /* Out of memory */
1020   }
1021   a2 = (EditDist3To*)&m[n];
1022   memset(a2, 0, sizeof(a2[0])*n2);
1023 
1024   /* Fill in the a1[] matrix for all characters of the TO string */
1025   for(i2=0; i2<n2; i2++){
1026     a2[i2].nByte = utf8Len((unsigned char)z2[i2], n2-i2);
1027     for(p=pLang->pCost; p; p=p->pNext){
1028       EditDist3Cost **apNew;
1029       if( p->nFrom>0 ) break;
1030       if( i2+p->nTo>n2 ) continue;
1031       if( p->a[0]>z2[i2] ) break;
1032       if( matchTo(p, z2+i2, n2-i2)==0 ) continue;
1033       a2[i2].nIns++;
1034       apNew = sqlite3_realloc64(a2[i2].apIns, sizeof(*apNew)*a2[i2].nIns);
1035       if( apNew==0 ){
1036         res = -1;  /* Out of memory */
1037         goto editDist3Abort;
1038       }
1039       a2[i2].apIns = apNew;
1040       a2[i2].apIns[a2[i2].nIns-1] = p;
1041     }
1042   }
1043 
1044   /* Prepare to compute the minimum edit distance */
1045   szRow = f.n+1;
1046   memset(m, 0x01, (n2+1)*szRow*sizeof(m[0]));
1047   m[0] = 0;
1048 
1049   /* First fill in the top-row of the matrix with FROM deletion costs */
1050   for(i1=0; i1<f.n; i1 += b1){
1051     b1 = f.a[i1].nByte;
1052     updateCost(m, i1+b1, i1, pLang->iDelCost);
1053     for(k=0; k<f.a[i1].nDel; k++){
1054       p = f.a[i1].apDel[k];
1055       updateCost(m, i1+p->nFrom, i1, p->iCost);
1056     }
1057   }
1058 
1059   /* Fill in all subsequent rows, top-to-bottom, left-to-right */
1060   for(i2=0; i2<n2; i2 += b2){
1061     int rx;      /* Starting index for current row */
1062     int rxp;     /* Starting index for previous row */
1063     b2 = a2[i2].nByte;
1064     rx = szRow*(i2+b2);
1065     rxp = szRow*i2;
1066     updateCost(m, rx, rxp, pLang->iInsCost);
1067     for(k=0; k<a2[i2].nIns; k++){
1068       p = a2[i2].apIns[k];
1069       updateCost(m, szRow*(i2+p->nTo), rxp, p->iCost);
1070     }
1071     for(i1=0; i1<f.n; i1+=b1){
1072       int cx;    /* Index of current cell */
1073       int cxp;   /* Index of cell immediately to the left */
1074       int cxd;   /* Index of cell to the left and one row above */
1075       int cxu;   /* Index of cell immediately above */
1076       b1 = f.a[i1].nByte;
1077       cxp = rx + i1;
1078       cx = cxp + b1;
1079       cxd = rxp + i1;
1080       cxu = cxd + b1;
1081       updateCost(m, cx, cxp, pLang->iDelCost);
1082       for(k=0; k<f.a[i1].nDel; k++){
1083         p = f.a[i1].apDel[k];
1084         updateCost(m, cxp+p->nFrom, cxp, p->iCost);
1085       }
1086       updateCost(m, cx, cxu, pLang->iInsCost);
1087       if( matchFromTo(&f, i1, z2+i2, n2-i2) ){
1088         updateCost(m, cx, cxd, 0);
1089       }
1090       updateCost(m, cx, cxd, pLang->iSubCost);
1091       for(k=0; k<f.a[i1].nSubst; k++){
1092         p = f.a[i1].apSubst[k];
1093         if( matchTo(p, z2+i2, n2-i2) ){
1094           updateCost(m, cxd+p->nFrom+szRow*p->nTo, cxd, p->iCost);
1095         }
1096       }
1097     }
1098   }
1099 
1100 #if 0  /* Enable for debugging */
1101   printf("         ^");
1102   for(i1=0; i1<f.n; i1++) printf(" %c-%2x", f.z[i1], f.z[i1]&0xff);
1103   printf("\n   ^:");
1104   for(i1=0; i1<szRow; i1++){
1105     int v = m[i1];
1106     if( v>9999 ) printf(" ****");
1107     else         printf(" %4d", v);
1108   }
1109   printf("\n");
1110   for(i2=0; i2<n2; i2++){
1111     printf("%c-%02x:", z2[i2], z2[i2]&0xff);
1112     for(i1=0; i1<szRow; i1++){
1113       int v = m[(i2+1)*szRow+i1];
1114       if( v>9999 ) printf(" ****");
1115       else         printf(" %4d", v);
1116     }
1117     printf("\n");
1118   }
1119 #endif
1120 
1121   /* Free memory allocations and return the result */
1122   res = (int)m[szRow*(n2+1)-1];
1123   n = n2;
1124   if( f.isPrefix ){
1125     for(i2=1; i2<=n2; i2++){
1126       int b = m[szRow*i2-1];
1127       if( b<=res ){
1128         res = b;
1129         n = i2 - 1;
1130       }
1131     }
1132   }
1133   if( pnMatch ){
1134     int nExtra = 0;
1135     for(k=0; k<n; k++){
1136       if( (z2[k] & 0xc0)==0x80 ) nExtra++;
1137     }
1138     *pnMatch = n - nExtra;
1139   }
1140 
1141 editDist3Abort:
1142   for(i2=0; i2<n2; i2++) sqlite3_free(a2[i2].apIns);
1143   sqlite3_free(pToFree);
1144   return res;
1145 }
1146 
1147 /*
1148 ** Get an appropriate EditDist3Lang object.
1149 */
1150 static const EditDist3Lang *editDist3FindLang(
1151   EditDist3Config *pConfig,
1152   int iLang
1153 ){
1154   int i;
1155   for(i=0; i<pConfig->nLang; i++){
1156     if( pConfig->a[i].iLang==iLang ) return &pConfig->a[i];
1157   }
1158   return &editDist3Lang;
1159 }
1160 
1161 /*
1162 ** Function:    editdist3(A,B,iLang)
1163 **              editdist3(tablename)
1164 **
1165 ** Return the cost of transforming string A into string B using edit
1166 ** weights for iLang.
1167 **
1168 ** The second form loads edit weights into memory from a table.
1169 */
1170 static void editDist3SqlFunc(
1171   sqlite3_context *context,
1172   int argc,
1173   sqlite3_value **argv
1174 ){
1175   EditDist3Config *pConfig = (EditDist3Config*)sqlite3_user_data(context);
1176   sqlite3 *db = sqlite3_context_db_handle(context);
1177   int rc;
1178   if( argc==1 ){
1179     const char *zTable = (const char*)sqlite3_value_text(argv[0]);
1180     rc = editDist3ConfigLoad(pConfig, db, zTable);
1181     if( rc ) sqlite3_result_error_code(context, rc);
1182   }else{
1183     const char *zA = (const char*)sqlite3_value_text(argv[0]);
1184     const char *zB = (const char*)sqlite3_value_text(argv[1]);
1185     int nA = sqlite3_value_bytes(argv[0]);
1186     int nB = sqlite3_value_bytes(argv[1]);
1187     int iLang = argc==3 ? sqlite3_value_int(argv[2]) : 0;
1188     const EditDist3Lang *pLang = editDist3FindLang(pConfig, iLang);
1189     EditDist3FromString *pFrom;
1190     int dist;
1191 
1192     pFrom = editDist3FromStringNew(pLang, zA, nA);
1193     if( pFrom==0 ){
1194       sqlite3_result_error_nomem(context);
1195       return;
1196     }
1197     dist = editDist3Core(pFrom, zB, nB, pLang, 0);
1198     editDist3FromStringDelete(pFrom);
1199     if( dist==(-1) ){
1200       sqlite3_result_error_nomem(context);
1201     }else{
1202       sqlite3_result_int(context, dist);
1203     }
1204   }
1205 }
1206 
1207 /*
1208 ** Register the editDist3 function with SQLite
1209 */
1210 static int editDist3Install(sqlite3 *db){
1211   int rc;
1212   EditDist3Config *pConfig = sqlite3_malloc64( sizeof(*pConfig) );
1213   if( pConfig==0 ) return SQLITE_NOMEM;
1214   memset(pConfig, 0, sizeof(*pConfig));
1215   rc = sqlite3_create_function_v2(db, "editdist3",
1216               2, SQLITE_UTF8|SQLITE_DETERMINISTIC, pConfig,
1217               editDist3SqlFunc, 0, 0, 0);
1218   if( rc==SQLITE_OK ){
1219     rc = sqlite3_create_function_v2(db, "editdist3",
1220                 3, SQLITE_UTF8|SQLITE_DETERMINISTIC, pConfig,
1221                 editDist3SqlFunc, 0, 0, 0);
1222   }
1223   if( rc==SQLITE_OK ){
1224     rc = sqlite3_create_function_v2(db, "editdist3",
1225                 1, SQLITE_UTF8|SQLITE_DETERMINISTIC, pConfig,
1226                 editDist3SqlFunc, 0, 0, editDist3ConfigDelete);
1227   }else{
1228     sqlite3_free(pConfig);
1229   }
1230   return rc;
1231 }
1232 /* End configurable cost unicode edit distance routines
1233 ******************************************************************************
1234 ******************************************************************************
1235 ** Begin transliterate unicode-to-ascii implementation
1236 */
1237 
1238 #if !SQLITE_AMALGAMATION
1239 /*
1240 ** This lookup table is used to help decode the first byte of
1241 ** a multi-byte UTF8 character.
1242 */
1243 static const unsigned char sqlite3Utf8Trans1[] = {
1244   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1245   0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1246   0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
1247   0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
1248   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1249   0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1250   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1251   0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
1252 };
1253 #endif
1254 
1255 /*
1256 ** Return the value of the first UTF-8 character in the string.
1257 */
1258 static int utf8Read(const unsigned char *z, int n, int *pSize){
1259   int c, i;
1260 
1261   /* All callers to this routine (in the current implementation)
1262   ** always have n>0. */
1263   if( NEVER(n==0) ){
1264     c = i = 0;
1265   }else{
1266     c = z[0];
1267     i = 1;
1268     if( c>=0xc0 ){
1269       c = sqlite3Utf8Trans1[c-0xc0];
1270       while( i<n && (z[i] & 0xc0)==0x80 ){
1271         c = (c<<6) + (0x3f & z[i++]);
1272       }
1273     }
1274   }
1275   *pSize = i;
1276   return c;
1277 }
1278 
1279 /*
1280 ** Return the number of characters in the utf-8 string in the nIn byte
1281 ** buffer pointed to by zIn.
1282 */
1283 static int utf8Charlen(const char *zIn, int nIn){
1284   int i;
1285   int nChar = 0;
1286   for(i=0; i<nIn; nChar++){
1287     int sz;
1288     utf8Read((const unsigned char *)&zIn[i], nIn-i, &sz);
1289     i += sz;
1290   }
1291   return nChar;
1292 }
1293 
1294 typedef struct Transliteration Transliteration;
1295 struct Transliteration {
1296  unsigned short int cFrom;
1297  unsigned char cTo0, cTo1, cTo2, cTo3;
1298 };
1299 
1300 /*
1301 ** Table of translations from unicode characters into ASCII.
1302 */
1303 static const Transliteration translit[] = {
1304   { 0x00A0,  0x20, 0x00, 0x00, 0x00 },  /*   to   */
1305   { 0x00B5,  0x75, 0x00, 0x00, 0x00 },  /* µ to u */
1306   { 0x00C0,  0x41, 0x00, 0x00, 0x00 },  /* À to A */
1307   { 0x00C1,  0x41, 0x00, 0x00, 0x00 },  /* Á to A */
1308   { 0x00C2,  0x41, 0x00, 0x00, 0x00 },  /* Â to A */
1309   { 0x00C3,  0x41, 0x00, 0x00, 0x00 },  /* Ã to A */
1310   { 0x00C4,  0x41, 0x65, 0x00, 0x00 },  /* Ä to Ae */
1311   { 0x00C5,  0x41, 0x61, 0x00, 0x00 },  /* Å to Aa */
1312   { 0x00C6,  0x41, 0x45, 0x00, 0x00 },  /* Æ to AE */
1313   { 0x00C7,  0x43, 0x00, 0x00, 0x00 },  /* Ç to C */
1314   { 0x00C8,  0x45, 0x00, 0x00, 0x00 },  /* È to E */
1315   { 0x00C9,  0x45, 0x00, 0x00, 0x00 },  /* É to E */
1316   { 0x00CA,  0x45, 0x00, 0x00, 0x00 },  /* Ê to E */
1317   { 0x00CB,  0x45, 0x00, 0x00, 0x00 },  /* Ë to E */
1318   { 0x00CC,  0x49, 0x00, 0x00, 0x00 },  /* Ì to I */
1319   { 0x00CD,  0x49, 0x00, 0x00, 0x00 },  /* Í to I */
1320   { 0x00CE,  0x49, 0x00, 0x00, 0x00 },  /* Î to I */
1321   { 0x00CF,  0x49, 0x00, 0x00, 0x00 },  /* Ï to I */
1322   { 0x00D0,  0x44, 0x00, 0x00, 0x00 },  /* Ð to D */
1323   { 0x00D1,  0x4E, 0x00, 0x00, 0x00 },  /* Ñ to N */
1324   { 0x00D2,  0x4F, 0x00, 0x00, 0x00 },  /* Ò to O */
1325   { 0x00D3,  0x4F, 0x00, 0x00, 0x00 },  /* Ó to O */
1326   { 0x00D4,  0x4F, 0x00, 0x00, 0x00 },  /* Ô to O */
1327   { 0x00D5,  0x4F, 0x00, 0x00, 0x00 },  /* Õ to O */
1328   { 0x00D6,  0x4F, 0x65, 0x00, 0x00 },  /* Ö to Oe */
1329   { 0x00D7,  0x78, 0x00, 0x00, 0x00 },  /* × to x */
1330   { 0x00D8,  0x4F, 0x00, 0x00, 0x00 },  /* Ø to O */
1331   { 0x00D9,  0x55, 0x00, 0x00, 0x00 },  /* Ù to U */
1332   { 0x00DA,  0x55, 0x00, 0x00, 0x00 },  /* Ú to U */
1333   { 0x00DB,  0x55, 0x00, 0x00, 0x00 },  /* Û to U */
1334   { 0x00DC,  0x55, 0x65, 0x00, 0x00 },  /* Ü to Ue */
1335   { 0x00DD,  0x59, 0x00, 0x00, 0x00 },  /* Ý to Y */
1336   { 0x00DE,  0x54, 0x68, 0x00, 0x00 },  /* Þ to Th */
1337   { 0x00DF,  0x73, 0x73, 0x00, 0x00 },  /* ß to ss */
1338   { 0x00E0,  0x61, 0x00, 0x00, 0x00 },  /* à to a */
1339   { 0x00E1,  0x61, 0x00, 0x00, 0x00 },  /* á to a */
1340   { 0x00E2,  0x61, 0x00, 0x00, 0x00 },  /* â to a */
1341   { 0x00E3,  0x61, 0x00, 0x00, 0x00 },  /* ã to a */
1342   { 0x00E4,  0x61, 0x65, 0x00, 0x00 },  /* ä to ae */
1343   { 0x00E5,  0x61, 0x61, 0x00, 0x00 },  /* å to aa */
1344   { 0x00E6,  0x61, 0x65, 0x00, 0x00 },  /* æ to ae */
1345   { 0x00E7,  0x63, 0x00, 0x00, 0x00 },  /* ç to c */
1346   { 0x00E8,  0x65, 0x00, 0x00, 0x00 },  /* è to e */
1347   { 0x00E9,  0x65, 0x00, 0x00, 0x00 },  /* é to e */
1348   { 0x00EA,  0x65, 0x00, 0x00, 0x00 },  /* ê to e */
1349   { 0x00EB,  0x65, 0x00, 0x00, 0x00 },  /* ë to e */
1350   { 0x00EC,  0x69, 0x00, 0x00, 0x00 },  /* ì to i */
1351   { 0x00ED,  0x69, 0x00, 0x00, 0x00 },  /* í to i */
1352   { 0x00EE,  0x69, 0x00, 0x00, 0x00 },  /* î to i */
1353   { 0x00EF,  0x69, 0x00, 0x00, 0x00 },  /* ï to i */
1354   { 0x00F0,  0x64, 0x00, 0x00, 0x00 },  /* ð to d */
1355   { 0x00F1,  0x6E, 0x00, 0x00, 0x00 },  /* ñ to n */
1356   { 0x00F2,  0x6F, 0x00, 0x00, 0x00 },  /* ò to o */
1357   { 0x00F3,  0x6F, 0x00, 0x00, 0x00 },  /* ó to o */
1358   { 0x00F4,  0x6F, 0x00, 0x00, 0x00 },  /* ô to o */
1359   { 0x00F5,  0x6F, 0x00, 0x00, 0x00 },  /* õ to o */
1360   { 0x00F6,  0x6F, 0x65, 0x00, 0x00 },  /* ö to oe */
1361   { 0x00F7,  0x3A, 0x00, 0x00, 0x00 },  /* ÷ to : */
1362   { 0x00F8,  0x6F, 0x00, 0x00, 0x00 },  /* ø to o */
1363   { 0x00F9,  0x75, 0x00, 0x00, 0x00 },  /* ù to u */
1364   { 0x00FA,  0x75, 0x00, 0x00, 0x00 },  /* ú to u */
1365   { 0x00FB,  0x75, 0x00, 0x00, 0x00 },  /* û to u */
1366   { 0x00FC,  0x75, 0x65, 0x00, 0x00 },  /* ü to ue */
1367   { 0x00FD,  0x79, 0x00, 0x00, 0x00 },  /* ý to y */
1368   { 0x00FE,  0x74, 0x68, 0x00, 0x00 },  /* þ to th */
1369   { 0x00FF,  0x79, 0x00, 0x00, 0x00 },  /* ÿ to y */
1370   { 0x0100,  0x41, 0x00, 0x00, 0x00 },  /* Ā to A */
1371   { 0x0101,  0x61, 0x00, 0x00, 0x00 },  /* ā to a */
1372   { 0x0102,  0x41, 0x00, 0x00, 0x00 },  /* Ă to A */
1373   { 0x0103,  0x61, 0x00, 0x00, 0x00 },  /* ă to a */
1374   { 0x0104,  0x41, 0x00, 0x00, 0x00 },  /* Ą to A */
1375   { 0x0105,  0x61, 0x00, 0x00, 0x00 },  /* ą to a */
1376   { 0x0106,  0x43, 0x00, 0x00, 0x00 },  /* Ć to C */
1377   { 0x0107,  0x63, 0x00, 0x00, 0x00 },  /* ć to c */
1378   { 0x0108,  0x43, 0x68, 0x00, 0x00 },  /* Ĉ to Ch */
1379   { 0x0109,  0x63, 0x68, 0x00, 0x00 },  /* ĉ to ch */
1380   { 0x010A,  0x43, 0x00, 0x00, 0x00 },  /* Ċ to C */
1381   { 0x010B,  0x63, 0x00, 0x00, 0x00 },  /* ċ to c */
1382   { 0x010C,  0x43, 0x00, 0x00, 0x00 },  /* Č to C */
1383   { 0x010D,  0x63, 0x00, 0x00, 0x00 },  /* č to c */
1384   { 0x010E,  0x44, 0x00, 0x00, 0x00 },  /* Ď to D */
1385   { 0x010F,  0x64, 0x00, 0x00, 0x00 },  /* ď to d */
1386   { 0x0110,  0x44, 0x00, 0x00, 0x00 },  /* Đ to D */
1387   { 0x0111,  0x64, 0x00, 0x00, 0x00 },  /* đ to d */
1388   { 0x0112,  0x45, 0x00, 0x00, 0x00 },  /* Ē to E */
1389   { 0x0113,  0x65, 0x00, 0x00, 0x00 },  /* ē to e */
1390   { 0x0114,  0x45, 0x00, 0x00, 0x00 },  /* Ĕ to E */
1391   { 0x0115,  0x65, 0x00, 0x00, 0x00 },  /* ĕ to e */
1392   { 0x0116,  0x45, 0x00, 0x00, 0x00 },  /* Ė to E */
1393   { 0x0117,  0x65, 0x00, 0x00, 0x00 },  /* ė to e */
1394   { 0x0118,  0x45, 0x00, 0x00, 0x00 },  /* Ę to E */
1395   { 0x0119,  0x65, 0x00, 0x00, 0x00 },  /* ę to e */
1396   { 0x011A,  0x45, 0x00, 0x00, 0x00 },  /* Ě to E */
1397   { 0x011B,  0x65, 0x00, 0x00, 0x00 },  /* ě to e */
1398   { 0x011C,  0x47, 0x68, 0x00, 0x00 },  /* Ĝ to Gh */
1399   { 0x011D,  0x67, 0x68, 0x00, 0x00 },  /* ĝ to gh */
1400   { 0x011E,  0x47, 0x00, 0x00, 0x00 },  /* Ğ to G */
1401   { 0x011F,  0x67, 0x00, 0x00, 0x00 },  /* ğ to g */
1402   { 0x0120,  0x47, 0x00, 0x00, 0x00 },  /* Ġ to G */
1403   { 0x0121,  0x67, 0x00, 0x00, 0x00 },  /* ġ to g */
1404   { 0x0122,  0x47, 0x00, 0x00, 0x00 },  /* Ģ to G */
1405   { 0x0123,  0x67, 0x00, 0x00, 0x00 },  /* ģ to g */
1406   { 0x0124,  0x48, 0x68, 0x00, 0x00 },  /* Ĥ to Hh */
1407   { 0x0125,  0x68, 0x68, 0x00, 0x00 },  /* ĥ to hh */
1408   { 0x0126,  0x48, 0x00, 0x00, 0x00 },  /* Ħ to H */
1409   { 0x0127,  0x68, 0x00, 0x00, 0x00 },  /* ħ to h */
1410   { 0x0128,  0x49, 0x00, 0x00, 0x00 },  /* Ĩ to I */
1411   { 0x0129,  0x69, 0x00, 0x00, 0x00 },  /* ĩ to i */
1412   { 0x012A,  0x49, 0x00, 0x00, 0x00 },  /* Ī to I */
1413   { 0x012B,  0x69, 0x00, 0x00, 0x00 },  /* ī to i */
1414   { 0x012C,  0x49, 0x00, 0x00, 0x00 },  /* Ĭ to I */
1415   { 0x012D,  0x69, 0x00, 0x00, 0x00 },  /* ĭ to i */
1416   { 0x012E,  0x49, 0x00, 0x00, 0x00 },  /* Į to I */
1417   { 0x012F,  0x69, 0x00, 0x00, 0x00 },  /* į to i */
1418   { 0x0130,  0x49, 0x00, 0x00, 0x00 },  /* İ to I */
1419   { 0x0131,  0x69, 0x00, 0x00, 0x00 },  /* ı to i */
1420   { 0x0132,  0x49, 0x4A, 0x00, 0x00 },  /* IJ to IJ */
1421   { 0x0133,  0x69, 0x6A, 0x00, 0x00 },  /* ij to ij */
1422   { 0x0134,  0x4A, 0x68, 0x00, 0x00 },  /* Ĵ to Jh */
1423   { 0x0135,  0x6A, 0x68, 0x00, 0x00 },  /* ĵ to jh */
1424   { 0x0136,  0x4B, 0x00, 0x00, 0x00 },  /* Ķ to K */
1425   { 0x0137,  0x6B, 0x00, 0x00, 0x00 },  /* ķ to k */
1426   { 0x0138,  0x6B, 0x00, 0x00, 0x00 },  /* ĸ to k */
1427   { 0x0139,  0x4C, 0x00, 0x00, 0x00 },  /* Ĺ to L */
1428   { 0x013A,  0x6C, 0x00, 0x00, 0x00 },  /* ĺ to l */
1429   { 0x013B,  0x4C, 0x00, 0x00, 0x00 },  /* Ļ to L */
1430   { 0x013C,  0x6C, 0x00, 0x00, 0x00 },  /* ļ to l */
1431   { 0x013D,  0x4C, 0x00, 0x00, 0x00 },  /* Ľ to L */
1432   { 0x013E,  0x6C, 0x00, 0x00, 0x00 },  /* ľ to l */
1433   { 0x013F,  0x4C, 0x2E, 0x00, 0x00 },  /* Ŀ to L. */
1434   { 0x0140,  0x6C, 0x2E, 0x00, 0x00 },  /* ŀ to l. */
1435   { 0x0141,  0x4C, 0x00, 0x00, 0x00 },  /* Ł to L */
1436   { 0x0142,  0x6C, 0x00, 0x00, 0x00 },  /* ł to l */
1437   { 0x0143,  0x4E, 0x00, 0x00, 0x00 },  /* Ń to N */
1438   { 0x0144,  0x6E, 0x00, 0x00, 0x00 },  /* ń to n */
1439   { 0x0145,  0x4E, 0x00, 0x00, 0x00 },  /* Ņ to N */
1440   { 0x0146,  0x6E, 0x00, 0x00, 0x00 },  /* ņ to n */
1441   { 0x0147,  0x4E, 0x00, 0x00, 0x00 },  /* Ň to N */
1442   { 0x0148,  0x6E, 0x00, 0x00, 0x00 },  /* ň to n */
1443   { 0x0149,  0x27, 0x6E, 0x00, 0x00 },  /* ʼn to 'n */
1444   { 0x014A,  0x4E, 0x47, 0x00, 0x00 },  /* Ŋ to NG */
1445   { 0x014B,  0x6E, 0x67, 0x00, 0x00 },  /* ŋ to ng */
1446   { 0x014C,  0x4F, 0x00, 0x00, 0x00 },  /* Ō to O */
1447   { 0x014D,  0x6F, 0x00, 0x00, 0x00 },  /* ō to o */
1448   { 0x014E,  0x4F, 0x00, 0x00, 0x00 },  /* Ŏ to O */
1449   { 0x014F,  0x6F, 0x00, 0x00, 0x00 },  /* ŏ to o */
1450   { 0x0150,  0x4F, 0x00, 0x00, 0x00 },  /* Ő to O */
1451   { 0x0151,  0x6F, 0x00, 0x00, 0x00 },  /* ő to o */
1452   { 0x0152,  0x4F, 0x45, 0x00, 0x00 },  /* Œ to OE */
1453   { 0x0153,  0x6F, 0x65, 0x00, 0x00 },  /* œ to oe */
1454   { 0x0154,  0x52, 0x00, 0x00, 0x00 },  /* Ŕ to R */
1455   { 0x0155,  0x72, 0x00, 0x00, 0x00 },  /* ŕ to r */
1456   { 0x0156,  0x52, 0x00, 0x00, 0x00 },  /* Ŗ to R */
1457   { 0x0157,  0x72, 0x00, 0x00, 0x00 },  /* ŗ to r */
1458   { 0x0158,  0x52, 0x00, 0x00, 0x00 },  /* Ř to R */
1459   { 0x0159,  0x72, 0x00, 0x00, 0x00 },  /* ř to r */
1460   { 0x015A,  0x53, 0x00, 0x00, 0x00 },  /* Ś to S */
1461   { 0x015B,  0x73, 0x00, 0x00, 0x00 },  /* ś to s */
1462   { 0x015C,  0x53, 0x68, 0x00, 0x00 },  /* Ŝ to Sh */
1463   { 0x015D,  0x73, 0x68, 0x00, 0x00 },  /* ŝ to sh */
1464   { 0x015E,  0x53, 0x00, 0x00, 0x00 },  /* Ş to S */
1465   { 0x015F,  0x73, 0x00, 0x00, 0x00 },  /* ş to s */
1466   { 0x0160,  0x53, 0x00, 0x00, 0x00 },  /* Š to S */
1467   { 0x0161,  0x73, 0x00, 0x00, 0x00 },  /* š to s */
1468   { 0x0162,  0x54, 0x00, 0x00, 0x00 },  /* Ţ to T */
1469   { 0x0163,  0x74, 0x00, 0x00, 0x00 },  /* ţ to t */
1470   { 0x0164,  0x54, 0x00, 0x00, 0x00 },  /* Ť to T */
1471   { 0x0165,  0x74, 0x00, 0x00, 0x00 },  /* ť to t */
1472   { 0x0166,  0x54, 0x00, 0x00, 0x00 },  /* Ŧ to T */
1473   { 0x0167,  0x74, 0x00, 0x00, 0x00 },  /* ŧ to t */
1474   { 0x0168,  0x55, 0x00, 0x00, 0x00 },  /* Ũ to U */
1475   { 0x0169,  0x75, 0x00, 0x00, 0x00 },  /* ũ to u */
1476   { 0x016A,  0x55, 0x00, 0x00, 0x00 },  /* Ū to U */
1477   { 0x016B,  0x75, 0x00, 0x00, 0x00 },  /* ū to u */
1478   { 0x016C,  0x55, 0x00, 0x00, 0x00 },  /* Ŭ to U */
1479   { 0x016D,  0x75, 0x00, 0x00, 0x00 },  /* ŭ to u */
1480   { 0x016E,  0x55, 0x00, 0x00, 0x00 },  /* Ů to U */
1481   { 0x016F,  0x75, 0x00, 0x00, 0x00 },  /* ů to u */
1482   { 0x0170,  0x55, 0x00, 0x00, 0x00 },  /* Ű to U */
1483   { 0x0171,  0x75, 0x00, 0x00, 0x00 },  /* ű to u */
1484   { 0x0172,  0x55, 0x00, 0x00, 0x00 },  /* Ų to U */
1485   { 0x0173,  0x75, 0x00, 0x00, 0x00 },  /* ų to u */
1486   { 0x0174,  0x57, 0x00, 0x00, 0x00 },  /* Ŵ to W */
1487   { 0x0175,  0x77, 0x00, 0x00, 0x00 },  /* ŵ to w */
1488   { 0x0176,  0x59, 0x00, 0x00, 0x00 },  /* Ŷ to Y */
1489   { 0x0177,  0x79, 0x00, 0x00, 0x00 },  /* ŷ to y */
1490   { 0x0178,  0x59, 0x00, 0x00, 0x00 },  /* Ÿ to Y */
1491   { 0x0179,  0x5A, 0x00, 0x00, 0x00 },  /* Ź to Z */
1492   { 0x017A,  0x7A, 0x00, 0x00, 0x00 },  /* ź to z */
1493   { 0x017B,  0x5A, 0x00, 0x00, 0x00 },  /* Ż to Z */
1494   { 0x017C,  0x7A, 0x00, 0x00, 0x00 },  /* ż to z */
1495   { 0x017D,  0x5A, 0x00, 0x00, 0x00 },  /* Ž to Z */
1496   { 0x017E,  0x7A, 0x00, 0x00, 0x00 },  /* ž to z */
1497   { 0x017F,  0x73, 0x00, 0x00, 0x00 },  /* ſ to s */
1498   { 0x0192,  0x66, 0x00, 0x00, 0x00 },  /* ƒ to f */
1499   { 0x0218,  0x53, 0x00, 0x00, 0x00 },  /* Ș to S */
1500   { 0x0219,  0x73, 0x00, 0x00, 0x00 },  /* ș to s */
1501   { 0x021A,  0x54, 0x00, 0x00, 0x00 },  /* Ț to T */
1502   { 0x021B,  0x74, 0x00, 0x00, 0x00 },  /* ț to t */
1503   { 0x0386,  0x41, 0x00, 0x00, 0x00 },  /* Ά to A */
1504   { 0x0388,  0x45, 0x00, 0x00, 0x00 },  /* Έ to E */
1505   { 0x0389,  0x49, 0x00, 0x00, 0x00 },  /* Ή to I */
1506   { 0x038A,  0x49, 0x00, 0x00, 0x00 },  /* Ί to I */
1507   { 0x038C,  0x4f, 0x00, 0x00, 0x00 },  /* Ό to O */
1508   { 0x038E,  0x59, 0x00, 0x00, 0x00 },  /* Ύ to Y */
1509   { 0x038F,  0x4f, 0x00, 0x00, 0x00 },  /* Ώ to O */
1510   { 0x0390,  0x69, 0x00, 0x00, 0x00 },  /* ΐ to i */
1511   { 0x0391,  0x41, 0x00, 0x00, 0x00 },  /* Α to A */
1512   { 0x0392,  0x42, 0x00, 0x00, 0x00 },  /* Β to B */
1513   { 0x0393,  0x47, 0x00, 0x00, 0x00 },  /* Γ to G */
1514   { 0x0394,  0x44, 0x00, 0x00, 0x00 },  /* Δ to D */
1515   { 0x0395,  0x45, 0x00, 0x00, 0x00 },  /* Ε to E */
1516   { 0x0396,  0x5a, 0x00, 0x00, 0x00 },  /* Ζ to Z */
1517   { 0x0397,  0x49, 0x00, 0x00, 0x00 },  /* Η to I */
1518   { 0x0398,  0x54, 0x68, 0x00, 0x00 },  /* Θ to Th */
1519   { 0x0399,  0x49, 0x00, 0x00, 0x00 },  /* Ι to I */
1520   { 0x039A,  0x4b, 0x00, 0x00, 0x00 },  /* Κ to K */
1521   { 0x039B,  0x4c, 0x00, 0x00, 0x00 },  /* Λ to L */
1522   { 0x039C,  0x4d, 0x00, 0x00, 0x00 },  /* Μ to M */
1523   { 0x039D,  0x4e, 0x00, 0x00, 0x00 },  /* Ν to N */
1524   { 0x039E,  0x58, 0x00, 0x00, 0x00 },  /* Ξ to X */
1525   { 0x039F,  0x4f, 0x00, 0x00, 0x00 },  /* Ο to O */
1526   { 0x03A0,  0x50, 0x00, 0x00, 0x00 },  /* Π to P */
1527   { 0x03A1,  0x52, 0x00, 0x00, 0x00 },  /* Ρ to R */
1528   { 0x03A3,  0x53, 0x00, 0x00, 0x00 },  /* Σ to S */
1529   { 0x03A4,  0x54, 0x00, 0x00, 0x00 },  /* Τ to T */
1530   { 0x03A5,  0x59, 0x00, 0x00, 0x00 },  /* Υ to Y */
1531   { 0x03A6,  0x46, 0x00, 0x00, 0x00 },  /* Φ to F */
1532   { 0x03A7,  0x43, 0x68, 0x00, 0x00 },  /* Χ to Ch */
1533   { 0x03A8,  0x50, 0x73, 0x00, 0x00 },  /* Ψ to Ps */
1534   { 0x03A9,  0x4f, 0x00, 0x00, 0x00 },  /* Ω to O */
1535   { 0x03AA,  0x49, 0x00, 0x00, 0x00 },  /* Ϊ to I */
1536   { 0x03AB,  0x59, 0x00, 0x00, 0x00 },  /* Ϋ to Y */
1537   { 0x03AC,  0x61, 0x00, 0x00, 0x00 },  /* ά to a */
1538   { 0x03AD,  0x65, 0x00, 0x00, 0x00 },  /* έ to e */
1539   { 0x03AE,  0x69, 0x00, 0x00, 0x00 },  /* ή to i */
1540   { 0x03AF,  0x69, 0x00, 0x00, 0x00 },  /* ί to i */
1541   { 0x03B1,  0x61, 0x00, 0x00, 0x00 },  /* α to a */
1542   { 0x03B2,  0x62, 0x00, 0x00, 0x00 },  /* β to b */
1543   { 0x03B3,  0x67, 0x00, 0x00, 0x00 },  /* γ to g */
1544   { 0x03B4,  0x64, 0x00, 0x00, 0x00 },  /* δ to d */
1545   { 0x03B5,  0x65, 0x00, 0x00, 0x00 },  /* ε to e */
1546   { 0x03B6,  0x7a, 0x00, 0x00, 0x00 },  /* ζ to z */
1547   { 0x03B7,  0x69, 0x00, 0x00, 0x00 },  /* η to i */
1548   { 0x03B8,  0x74, 0x68, 0x00, 0x00 },  /* θ to th */
1549   { 0x03B9,  0x69, 0x00, 0x00, 0x00 },  /* ι to i */
1550   { 0x03BA,  0x6b, 0x00, 0x00, 0x00 },  /* κ to k */
1551   { 0x03BB,  0x6c, 0x00, 0x00, 0x00 },  /* λ to l */
1552   { 0x03BC,  0x6d, 0x00, 0x00, 0x00 },  /* μ to m */
1553   { 0x03BD,  0x6e, 0x00, 0x00, 0x00 },  /* ν to n */
1554   { 0x03BE,  0x78, 0x00, 0x00, 0x00 },  /* ξ to x */
1555   { 0x03BF,  0x6f, 0x00, 0x00, 0x00 },  /* ο to o */
1556   { 0x03C0,  0x70, 0x00, 0x00, 0x00 },  /* π to p */
1557   { 0x03C1,  0x72, 0x00, 0x00, 0x00 },  /* ρ to r */
1558   { 0x03C3,  0x73, 0x00, 0x00, 0x00 },  /* σ to s */
1559   { 0x03C4,  0x74, 0x00, 0x00, 0x00 },  /* τ to t */
1560   { 0x03C5,  0x79, 0x00, 0x00, 0x00 },  /* υ to y */
1561   { 0x03C6,  0x66, 0x00, 0x00, 0x00 },  /* φ to f */
1562   { 0x03C7,  0x63, 0x68, 0x00, 0x00 },  /* χ to ch */
1563   { 0x03C8,  0x70, 0x73, 0x00, 0x00 },  /* ψ to ps */
1564   { 0x03C9,  0x6f, 0x00, 0x00, 0x00 },  /* ω to o */
1565   { 0x03CA,  0x69, 0x00, 0x00, 0x00 },  /* ϊ to i */
1566   { 0x03CB,  0x79, 0x00, 0x00, 0x00 },  /* ϋ to y */
1567   { 0x03CC,  0x6f, 0x00, 0x00, 0x00 },  /* ό to o */
1568   { 0x03CD,  0x79, 0x00, 0x00, 0x00 },  /* ύ to y */
1569   { 0x03CE,  0x69, 0x00, 0x00, 0x00 },  /* ώ to i */
1570   { 0x0400,  0x45, 0x00, 0x00, 0x00 },  /* Ѐ to E */
1571   { 0x0401,  0x45, 0x00, 0x00, 0x00 },  /* Ё to E */
1572   { 0x0402,  0x44, 0x00, 0x00, 0x00 },  /* Ђ to D */
1573   { 0x0403,  0x47, 0x00, 0x00, 0x00 },  /* Ѓ to G */
1574   { 0x0404,  0x45, 0x00, 0x00, 0x00 },  /* Є to E */
1575   { 0x0405,  0x5a, 0x00, 0x00, 0x00 },  /* Ѕ to Z */
1576   { 0x0406,  0x49, 0x00, 0x00, 0x00 },  /* І to I */
1577   { 0x0407,  0x49, 0x00, 0x00, 0x00 },  /* Ї to I */
1578   { 0x0408,  0x4a, 0x00, 0x00, 0x00 },  /* Ј to J */
1579   { 0x0409,  0x49, 0x00, 0x00, 0x00 },  /* Љ to I */
1580   { 0x040A,  0x4e, 0x00, 0x00, 0x00 },  /* Њ to N */
1581   { 0x040B,  0x44, 0x00, 0x00, 0x00 },  /* Ћ to D */
1582   { 0x040C,  0x4b, 0x00, 0x00, 0x00 },  /* Ќ to K */
1583   { 0x040D,  0x49, 0x00, 0x00, 0x00 },  /* Ѝ to I */
1584   { 0x040E,  0x55, 0x00, 0x00, 0x00 },  /* Ў to U */
1585   { 0x040F,  0x44, 0x00, 0x00, 0x00 },  /* Џ to D */
1586   { 0x0410,  0x41, 0x00, 0x00, 0x00 },  /* А to A */
1587   { 0x0411,  0x42, 0x00, 0x00, 0x00 },  /* Б to B */
1588   { 0x0412,  0x56, 0x00, 0x00, 0x00 },  /* В to V */
1589   { 0x0413,  0x47, 0x00, 0x00, 0x00 },  /* Г to G */
1590   { 0x0414,  0x44, 0x00, 0x00, 0x00 },  /* Д to D */
1591   { 0x0415,  0x45, 0x00, 0x00, 0x00 },  /* Е to E */
1592   { 0x0416,  0x5a, 0x68, 0x00, 0x00 },  /* Ж to Zh */
1593   { 0x0417,  0x5a, 0x00, 0x00, 0x00 },  /* З to Z */
1594   { 0x0418,  0x49, 0x00, 0x00, 0x00 },  /* И to I */
1595   { 0x0419,  0x49, 0x00, 0x00, 0x00 },  /* Й to I */
1596   { 0x041A,  0x4b, 0x00, 0x00, 0x00 },  /* К to K */
1597   { 0x041B,  0x4c, 0x00, 0x00, 0x00 },  /* Л to L */
1598   { 0x041C,  0x4d, 0x00, 0x00, 0x00 },  /* М to M */
1599   { 0x041D,  0x4e, 0x00, 0x00, 0x00 },  /* Н to N */
1600   { 0x041E,  0x4f, 0x00, 0x00, 0x00 },  /* О to O */
1601   { 0x041F,  0x50, 0x00, 0x00, 0x00 },  /* П to P */
1602   { 0x0420,  0x52, 0x00, 0x00, 0x00 },  /* Р to R */
1603   { 0x0421,  0x53, 0x00, 0x00, 0x00 },  /* С to S */
1604   { 0x0422,  0x54, 0x00, 0x00, 0x00 },  /* Т to T */
1605   { 0x0423,  0x55, 0x00, 0x00, 0x00 },  /* У to U */
1606   { 0x0424,  0x46, 0x00, 0x00, 0x00 },  /* Ф to F */
1607   { 0x0425,  0x4b, 0x68, 0x00, 0x00 },  /* Х to Kh */
1608   { 0x0426,  0x54, 0x63, 0x00, 0x00 },  /* Ц to Tc */
1609   { 0x0427,  0x43, 0x68, 0x00, 0x00 },  /* Ч to Ch */
1610   { 0x0428,  0x53, 0x68, 0x00, 0x00 },  /* Ш to Sh */
1611   { 0x0429,  0x53, 0x68, 0x63, 0x68 },  /* Щ to Shch */
1612   { 0x042A,  0x61, 0x00, 0x00, 0x00 },  /*  to A */
1613   { 0x042B,  0x59, 0x00, 0x00, 0x00 },  /* Ы to Y */
1614   { 0x042C,  0x59, 0x00, 0x00, 0x00 },  /*  to Y */
1615   { 0x042D,  0x45, 0x00, 0x00, 0x00 },  /* Э to E */
1616   { 0x042E,  0x49, 0x75, 0x00, 0x00 },  /* Ю to Iu */
1617   { 0x042F,  0x49, 0x61, 0x00, 0x00 },  /* Я to Ia */
1618   { 0x0430,  0x61, 0x00, 0x00, 0x00 },  /* а to a */
1619   { 0x0431,  0x62, 0x00, 0x00, 0x00 },  /* б to b */
1620   { 0x0432,  0x76, 0x00, 0x00, 0x00 },  /* в to v */
1621   { 0x0433,  0x67, 0x00, 0x00, 0x00 },  /* г to g */
1622   { 0x0434,  0x64, 0x00, 0x00, 0x00 },  /* д to d */
1623   { 0x0435,  0x65, 0x00, 0x00, 0x00 },  /* е to e */
1624   { 0x0436,  0x7a, 0x68, 0x00, 0x00 },  /* ж to zh */
1625   { 0x0437,  0x7a, 0x00, 0x00, 0x00 },  /* з to z */
1626   { 0x0438,  0x69, 0x00, 0x00, 0x00 },  /* и to i */
1627   { 0x0439,  0x69, 0x00, 0x00, 0x00 },  /* й to i */
1628   { 0x043A,  0x6b, 0x00, 0x00, 0x00 },  /* к to k */
1629   { 0x043B,  0x6c, 0x00, 0x00, 0x00 },  /* л to l */
1630   { 0x043C,  0x6d, 0x00, 0x00, 0x00 },  /* м to m */
1631   { 0x043D,  0x6e, 0x00, 0x00, 0x00 },  /* н to n */
1632   { 0x043E,  0x6f, 0x00, 0x00, 0x00 },  /* о to o */
1633   { 0x043F,  0x70, 0x00, 0x00, 0x00 },  /* п to p */
1634   { 0x0440,  0x72, 0x00, 0x00, 0x00 },  /* р to r */
1635   { 0x0441,  0x73, 0x00, 0x00, 0x00 },  /* с to s */
1636   { 0x0442,  0x74, 0x00, 0x00, 0x00 },  /* т to t */
1637   { 0x0443,  0x75, 0x00, 0x00, 0x00 },  /* у to u */
1638   { 0x0444,  0x66, 0x00, 0x00, 0x00 },  /* ф to f */
1639   { 0x0445,  0x6b, 0x68, 0x00, 0x00 },  /* х to kh */
1640   { 0x0446,  0x74, 0x63, 0x00, 0x00 },  /* ц to tc */
1641   { 0x0447,  0x63, 0x68, 0x00, 0x00 },  /* ч to ch */
1642   { 0x0448,  0x73, 0x68, 0x00, 0x00 },  /* ш to sh */
1643   { 0x0449,  0x73, 0x68, 0x63, 0x68 },  /* щ to shch */
1644   { 0x044A,  0x61, 0x00, 0x00, 0x00 },  /*  to a */
1645   { 0x044B,  0x79, 0x00, 0x00, 0x00 },  /* ы to y */
1646   { 0x044C,  0x79, 0x00, 0x00, 0x00 },  /*  to y */
1647   { 0x044D,  0x65, 0x00, 0x00, 0x00 },  /* э to e */
1648   { 0x044E,  0x69, 0x75, 0x00, 0x00 },  /* ю to iu */
1649   { 0x044F,  0x69, 0x61, 0x00, 0x00 },  /* я to ia */
1650   { 0x0450,  0x65, 0x00, 0x00, 0x00 },  /* ѐ to e */
1651   { 0x0451,  0x65, 0x00, 0x00, 0x00 },  /* ё to e */
1652   { 0x0452,  0x64, 0x00, 0x00, 0x00 },  /* ђ to d */
1653   { 0x0453,  0x67, 0x00, 0x00, 0x00 },  /* ѓ to g */
1654   { 0x0454,  0x65, 0x00, 0x00, 0x00 },  /* є to e */
1655   { 0x0455,  0x7a, 0x00, 0x00, 0x00 },  /* ѕ to z */
1656   { 0x0456,  0x69, 0x00, 0x00, 0x00 },  /* і to i */
1657   { 0x0457,  0x69, 0x00, 0x00, 0x00 },  /* ї to i */
1658   { 0x0458,  0x6a, 0x00, 0x00, 0x00 },  /* ј to j */
1659   { 0x0459,  0x69, 0x00, 0x00, 0x00 },  /* љ to i */
1660   { 0x045A,  0x6e, 0x00, 0x00, 0x00 },  /* њ to n */
1661   { 0x045B,  0x64, 0x00, 0x00, 0x00 },  /* ћ to d */
1662   { 0x045C,  0x6b, 0x00, 0x00, 0x00 },  /* ќ to k */
1663   { 0x045D,  0x69, 0x00, 0x00, 0x00 },  /* ѝ to i */
1664   { 0x045E,  0x75, 0x00, 0x00, 0x00 },  /* ў to u */
1665   { 0x045F,  0x64, 0x00, 0x00, 0x00 },  /* џ to d */
1666   { 0x1E02,  0x42, 0x00, 0x00, 0x00 },  /* Ḃ to B */
1667   { 0x1E03,  0x62, 0x00, 0x00, 0x00 },  /* ḃ to b */
1668   { 0x1E0A,  0x44, 0x00, 0x00, 0x00 },  /* Ḋ to D */
1669   { 0x1E0B,  0x64, 0x00, 0x00, 0x00 },  /* ḋ to d */
1670   { 0x1E1E,  0x46, 0x00, 0x00, 0x00 },  /* Ḟ to F */
1671   { 0x1E1F,  0x66, 0x00, 0x00, 0x00 },  /* ḟ to f */
1672   { 0x1E40,  0x4D, 0x00, 0x00, 0x00 },  /* Ṁ to M */
1673   { 0x1E41,  0x6D, 0x00, 0x00, 0x00 },  /* ṁ to m */
1674   { 0x1E56,  0x50, 0x00, 0x00, 0x00 },  /* Ṗ to P */
1675   { 0x1E57,  0x70, 0x00, 0x00, 0x00 },  /* ṗ to p */
1676   { 0x1E60,  0x53, 0x00, 0x00, 0x00 },  /* Ṡ to S */
1677   { 0x1E61,  0x73, 0x00, 0x00, 0x00 },  /* ṡ to s */
1678   { 0x1E6A,  0x54, 0x00, 0x00, 0x00 },  /* Ṫ to T */
1679   { 0x1E6B,  0x74, 0x00, 0x00, 0x00 },  /* ṫ to t */
1680   { 0x1E80,  0x57, 0x00, 0x00, 0x00 },  /* Ẁ to W */
1681   { 0x1E81,  0x77, 0x00, 0x00, 0x00 },  /* ẁ to w */
1682   { 0x1E82,  0x57, 0x00, 0x00, 0x00 },  /* Ẃ to W */
1683   { 0x1E83,  0x77, 0x00, 0x00, 0x00 },  /* ẃ to w */
1684   { 0x1E84,  0x57, 0x00, 0x00, 0x00 },  /* Ẅ to W */
1685   { 0x1E85,  0x77, 0x00, 0x00, 0x00 },  /* ẅ to w */
1686   { 0x1EF2,  0x59, 0x00, 0x00, 0x00 },  /* Ỳ to Y */
1687   { 0x1EF3,  0x79, 0x00, 0x00, 0x00 },  /* ỳ to y */
1688   { 0xFB00,  0x66, 0x66, 0x00, 0x00 },  /* ff to ff */
1689   { 0xFB01,  0x66, 0x69, 0x00, 0x00 },  /* fi to fi */
1690   { 0xFB02,  0x66, 0x6C, 0x00, 0x00 },  /* fl to fl */
1691   { 0xFB05,  0x73, 0x74, 0x00, 0x00 },  /* ſt to st */
1692   { 0xFB06,  0x73, 0x74, 0x00, 0x00 },  /* st to st */
1693 };
1694 
1695 static const Transliteration *spellfixFindTranslit(int c, int *pxTop){
1696   *pxTop = (sizeof(translit)/sizeof(translit[0])) - 1;
1697   return translit;
1698 }
1699 
1700 /*
1701 ** Convert the input string from UTF-8 into pure ASCII by converting
1702 ** all non-ASCII characters to some combination of characters in the
1703 ** ASCII subset.
1704 **
1705 ** The returned string might contain more characters than the input.
1706 **
1707 ** Space to hold the returned string comes from sqlite3_malloc() and
1708 ** should be freed by the caller.
1709 */
1710 static unsigned char *transliterate(const unsigned char *zIn, int nIn){
1711   unsigned char *zOut = sqlite3_malloc64( nIn*4 + 1 );
1712   int c, sz, nOut;
1713   if( zOut==0 ) return 0;
1714   nOut = 0;
1715   while( nIn>0 ){
1716     c = utf8Read(zIn, nIn, &sz);
1717     zIn += sz;
1718     nIn -= sz;
1719     if( c<=127 ){
1720       zOut[nOut++] = (unsigned char)c;
1721     }else{
1722       int xTop, xBtm, x;
1723       const Transliteration *tbl = spellfixFindTranslit(c, &xTop);
1724       xBtm = 0;
1725       while( xTop>=xBtm ){
1726         x = (xTop + xBtm)/2;
1727         if( tbl[x].cFrom==c ){
1728           zOut[nOut++] = tbl[x].cTo0;
1729           if( tbl[x].cTo1 ){
1730             zOut[nOut++] = tbl[x].cTo1;
1731             if( tbl[x].cTo2 ){
1732               zOut[nOut++] = tbl[x].cTo2;
1733               if( tbl[x].cTo3 ){
1734                 zOut[nOut++] = tbl[x].cTo3;
1735               }
1736             }
1737           }
1738           c = 0;
1739           break;
1740         }else if( tbl[x].cFrom>c ){
1741           xTop = x-1;
1742         }else{
1743           xBtm = x+1;
1744         }
1745       }
1746       if( c ) zOut[nOut++] = '?';
1747     }
1748   }
1749   zOut[nOut] = 0;
1750   return zOut;
1751 }
1752 
1753 /*
1754 ** Return the number of characters in the shortest prefix of the input
1755 ** string that transliterates to an ASCII string nTrans bytes or longer.
1756 ** Or, if the transliteration of the input string is less than nTrans
1757 ** bytes in size, return the number of characters in the input string.
1758 */
1759 static int translen_to_charlen(const char *zIn, int nIn, int nTrans){
1760   int i, c, sz, nOut;
1761   int nChar;
1762 
1763   i = nOut = 0;
1764   for(nChar=0; i<nIn && nOut<nTrans; nChar++){
1765     c = utf8Read((const unsigned char *)&zIn[i], nIn-i, &sz);
1766     i += sz;
1767 
1768     nOut++;
1769     if( c>=128 ){
1770       int xTop, xBtm, x;
1771       const Transliteration *tbl = spellfixFindTranslit(c, &xTop);
1772       xBtm = 0;
1773       while( xTop>=xBtm ){
1774         x = (xTop + xBtm)/2;
1775         if( tbl[x].cFrom==c ){
1776           if( tbl[x].cTo1 ){
1777             nOut++;
1778             if( tbl[x].cTo2 ){
1779               nOut++;
1780               if( tbl[x].cTo3 ){
1781                 nOut++;
1782               }
1783             }
1784           }
1785           break;
1786         }else if( tbl[x].cFrom>c ){
1787           xTop = x-1;
1788         }else{
1789           xBtm = x+1;
1790         }
1791       }
1792     }
1793   }
1794 
1795   return nChar;
1796 }
1797 
1798 
1799 /*
1800 **    spellfix1_translit(X)
1801 **
1802 ** Convert a string that contains non-ASCII Roman characters into
1803 ** pure ASCII.
1804 */
1805 static void transliterateSqlFunc(
1806   sqlite3_context *context,
1807   int argc,
1808   sqlite3_value **argv
1809 ){
1810   const unsigned char *zIn = sqlite3_value_text(argv[0]);
1811   int nIn = sqlite3_value_bytes(argv[0]);
1812   unsigned char *zOut = transliterate(zIn, nIn);
1813   if( zOut==0 ){
1814     sqlite3_result_error_nomem(context);
1815   }else{
1816     sqlite3_result_text(context, (char*)zOut, -1, sqlite3_free);
1817   }
1818 }
1819 
1820 /*
1821 **    spellfix1_scriptcode(X)
1822 **
1823 ** Try to determine the dominant script used by the word X and return
1824 ** its ISO 15924 numeric code.
1825 **
1826 ** The current implementation only understands the following scripts:
1827 **
1828 **    215  (Latin)
1829 **    220  (Cyrillic)
1830 **    200  (Greek)
1831 **
1832 ** This routine will return 998 if the input X contains characters from
1833 ** two or more of the above scripts or 999 if X contains no characters
1834 ** from any of the above scripts.
1835 */
1836 static void scriptCodeSqlFunc(
1837   sqlite3_context *context,
1838   int argc,
1839   sqlite3_value **argv
1840 ){
1841   const unsigned char *zIn = sqlite3_value_text(argv[0]);
1842   int nIn = sqlite3_value_bytes(argv[0]);
1843   int c, sz;
1844   int scriptMask = 0;
1845   int res;
1846   int seenDigit = 0;
1847 # define SCRIPT_LATIN       0x0001
1848 # define SCRIPT_CYRILLIC    0x0002
1849 # define SCRIPT_GREEK       0x0004
1850 # define SCRIPT_HEBREW      0x0008
1851 # define SCRIPT_ARABIC      0x0010
1852 
1853   while( nIn>0 ){
1854     c = utf8Read(zIn, nIn, &sz);
1855     zIn += sz;
1856     nIn -= sz;
1857     if( c<0x02af ){
1858       if( c>=0x80 || midClass[c&0x7f]<CCLASS_DIGIT ){
1859         scriptMask |= SCRIPT_LATIN;
1860       }else if( c>='0' && c<='9' ){
1861         seenDigit = 1;
1862       }
1863     }else if( c>=0x0400 && c<=0x04ff ){
1864       scriptMask |= SCRIPT_CYRILLIC;
1865     }else if( c>=0x0386 && c<=0x03ce ){
1866       scriptMask |= SCRIPT_GREEK;
1867     }else if( c>=0x0590 && c<=0x05ff ){
1868       scriptMask |= SCRIPT_HEBREW;
1869     }else if( c>=0x0600 && c<=0x06ff ){
1870       scriptMask |= SCRIPT_ARABIC;
1871     }
1872   }
1873   if( scriptMask==0 && seenDigit ) scriptMask = SCRIPT_LATIN;
1874   switch( scriptMask ){
1875     case 0:                res = 999; break;
1876     case SCRIPT_LATIN:     res = 215; break;
1877     case SCRIPT_CYRILLIC:  res = 220; break;
1878     case SCRIPT_GREEK:     res = 200; break;
1879     case SCRIPT_HEBREW:    res = 125; break;
1880     case SCRIPT_ARABIC:    res = 160; break;
1881     default:               res = 998; break;
1882   }
1883   sqlite3_result_int(context, res);
1884 }
1885 
1886 /* End transliterate
1887 ******************************************************************************
1888 ******************************************************************************
1889 ** Begin spellfix1 virtual table.
1890 */
1891 
1892 /* Maximum length of a phonehash used for querying the shadow table */
1893 #define SPELLFIX_MX_HASH  32
1894 
1895 /* Maximum number of hash strings to examine per query */
1896 #define SPELLFIX_MX_RUN   1
1897 
1898 typedef struct spellfix1_vtab spellfix1_vtab;
1899 typedef struct spellfix1_cursor spellfix1_cursor;
1900 
1901 /* Fuzzy-search virtual table object */
1902 struct spellfix1_vtab {
1903   sqlite3_vtab base;         /* Base class - must be first */
1904   sqlite3 *db;               /* Database connection */
1905   char *zDbName;             /* Name of database holding this table */
1906   char *zTableName;          /* Name of the virtual table */
1907   char *zCostTable;          /* Table holding edit-distance cost numbers */
1908   EditDist3Config *pConfig3; /* Parsed edit distance costs */
1909 };
1910 
1911 /* Fuzzy-search cursor object */
1912 struct spellfix1_cursor {
1913   sqlite3_vtab_cursor base;    /* Base class - must be first */
1914   spellfix1_vtab *pVTab;       /* The table to which this cursor belongs */
1915   char *zPattern;              /* rhs of MATCH clause */
1916   int idxNum;                  /* idxNum value passed to xFilter() */
1917   int nRow;                    /* Number of rows of content */
1918   int nAlloc;                  /* Number of allocated rows */
1919   int iRow;                    /* Current row of content */
1920   int iLang;                   /* Value of the langid= constraint */
1921   int iTop;                    /* Value of the top= constraint */
1922   int iScope;                  /* Value of the scope= constraint */
1923   int nSearch;                 /* Number of vocabulary items checked */
1924   sqlite3_stmt *pFullScan;     /* Shadow query for a full table scan */
1925   struct spellfix1_row {       /* For each row of content */
1926     sqlite3_int64 iRowid;         /* Rowid for this row */
1927     char *zWord;                  /* Text for this row */
1928     int iRank;                    /* Rank for this row */
1929     int iDistance;                /* Distance from pattern for this row */
1930     int iScore;                   /* Score for sorting */
1931     int iMatchlen;                /* Value of matchlen column (or -1) */
1932     char zHash[SPELLFIX_MX_HASH]; /* the phonehash used for this match */
1933   } *a;
1934 };
1935 
1936 /*
1937 ** Construct one or more SQL statements from the format string given
1938 ** and then evaluate those statements. The success code is written
1939 ** into *pRc.
1940 **
1941 ** If *pRc is initially non-zero then this routine is a no-op.
1942 */
1943 static void spellfix1DbExec(
1944   int *pRc,              /* Success code */
1945   sqlite3 *db,           /* Database in which to run SQL */
1946   const char *zFormat,   /* Format string for SQL */
1947   ...                    /* Arguments to the format string */
1948 ){
1949   va_list ap;
1950   char *zSql;
1951   if( *pRc ) return;
1952   va_start(ap, zFormat);
1953   zSql = sqlite3_vmprintf(zFormat, ap);
1954   va_end(ap);
1955   if( zSql==0 ){
1956     *pRc = SQLITE_NOMEM;
1957   }else{
1958     *pRc = sqlite3_exec(db, zSql, 0, 0, 0);
1959     sqlite3_free(zSql);
1960   }
1961 }
1962 
1963 /*
1964 ** xDisconnect/xDestroy method for the fuzzy-search module.
1965 */
1966 static int spellfix1Uninit(int isDestroy, sqlite3_vtab *pVTab){
1967   spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
1968   int rc = SQLITE_OK;
1969   if( isDestroy ){
1970     sqlite3 *db = p->db;
1971     spellfix1DbExec(&rc, db, "DROP TABLE IF EXISTS \"%w\".\"%w_vocab\"",
1972                   p->zDbName, p->zTableName);
1973   }
1974   if( rc==SQLITE_OK ){
1975     sqlite3_free(p->zTableName);
1976     editDist3ConfigDelete(p->pConfig3);
1977     sqlite3_free(p->zCostTable);
1978     sqlite3_free(p);
1979   }
1980   return rc;
1981 }
1982 static int spellfix1Disconnect(sqlite3_vtab *pVTab){
1983   return spellfix1Uninit(0, pVTab);
1984 }
1985 static int spellfix1Destroy(sqlite3_vtab *pVTab){
1986   return spellfix1Uninit(1, pVTab);
1987 }
1988 
1989 /*
1990 ** Make a copy of a string.  Remove leading and trailing whitespace
1991 ** and dequote it.
1992 */
1993 static char *spellfix1Dequote(const char *zIn){
1994   char *zOut;
1995   int i, j;
1996   char c;
1997   while( isspace((unsigned char)zIn[0]) ) zIn++;
1998   zOut = sqlite3_mprintf("%s", zIn);
1999   if( zOut==0 ) return 0;
2000   i = (int)strlen(zOut);
2001 #if 0  /* The parser will never leave spaces at the end */
2002   while( i>0 && isspace(zOut[i-1]) ){ i--; }
2003 #endif
2004   zOut[i] = 0;
2005   c = zOut[0];
2006   if( c=='\'' || c=='"' ){
2007     for(i=1, j=0; ALWAYS(zOut[i]); i++){
2008       zOut[j++] = zOut[i];
2009       if( zOut[i]==c ){
2010         if( zOut[i+1]==c ){
2011           i++;
2012         }else{
2013           zOut[j-1] = 0;
2014           break;
2015         }
2016       }
2017     }
2018   }
2019   return zOut;
2020 }
2021 
2022 
2023 /*
2024 ** xConnect/xCreate method for the spellfix1 module. Arguments are:
2025 **
2026 **   argv[0]   -> module name  ("spellfix1")
2027 **   argv[1]   -> database name
2028 **   argv[2]   -> table name
2029 **   argv[3].. -> optional arguments (i.e. "edit_cost_table" parameter)
2030 */
2031 static int spellfix1Init(
2032   int isCreate,
2033   sqlite3 *db,
2034   void *pAux,
2035   int argc, const char *const*argv,
2036   sqlite3_vtab **ppVTab,
2037   char **pzErr
2038 ){
2039   spellfix1_vtab *pNew = 0;
2040   /* const char *zModule = argv[0]; // not used */
2041   const char *zDbName = argv[1];
2042   const char *zTableName = argv[2];
2043   int nDbName;
2044   int rc = SQLITE_OK;
2045   int i;
2046 
2047   nDbName = (int)strlen(zDbName);
2048   pNew = sqlite3_malloc64( sizeof(*pNew) + nDbName + 1);
2049   if( pNew==0 ){
2050     rc = SQLITE_NOMEM;
2051   }else{
2052     memset(pNew, 0, sizeof(*pNew));
2053     pNew->zDbName = (char*)&pNew[1];
2054     memcpy(pNew->zDbName, zDbName, nDbName+1);
2055     pNew->zTableName = sqlite3_mprintf("%s", zTableName);
2056     pNew->db = db;
2057     if( pNew->zTableName==0 ){
2058       rc = SQLITE_NOMEM;
2059     }else{
2060       rc = sqlite3_declare_vtab(db,
2061            "CREATE TABLE x(word,rank,distance,langid, "
2062            "score, matchlen, phonehash HIDDEN, "
2063            "top HIDDEN, scope HIDDEN, srchcnt HIDDEN, "
2064            "soundslike HIDDEN, command HIDDEN)"
2065       );
2066 #define SPELLFIX_COL_WORD            0
2067 #define SPELLFIX_COL_RANK            1
2068 #define SPELLFIX_COL_DISTANCE        2
2069 #define SPELLFIX_COL_LANGID          3
2070 #define SPELLFIX_COL_SCORE           4
2071 #define SPELLFIX_COL_MATCHLEN        5
2072 #define SPELLFIX_COL_PHONEHASH       6
2073 #define SPELLFIX_COL_TOP             7
2074 #define SPELLFIX_COL_SCOPE           8
2075 #define SPELLFIX_COL_SRCHCNT         9
2076 #define SPELLFIX_COL_SOUNDSLIKE     10
2077 #define SPELLFIX_COL_COMMAND        11
2078     }
2079     if( rc==SQLITE_OK && isCreate ){
2080       spellfix1DbExec(&rc, db,
2081          "CREATE TABLE IF NOT EXISTS \"%w\".\"%w_vocab\"(\n"
2082          "  id INTEGER PRIMARY KEY,\n"
2083          "  rank INT,\n"
2084          "  langid INT,\n"
2085          "  word TEXT,\n"
2086          "  k1 TEXT,\n"
2087          "  k2 TEXT\n"
2088          ");\n",
2089          zDbName, zTableName
2090       );
2091       spellfix1DbExec(&rc, db,
2092          "CREATE INDEX IF NOT EXISTS \"%w\".\"%w_vocab_index_langid_k2\" "
2093             "ON \"%w_vocab\"(langid,k2);",
2094          zDbName, zTableName, zTableName
2095       );
2096     }
2097     for(i=3; rc==SQLITE_OK && i<argc; i++){
2098       if( strncmp(argv[i],"edit_cost_table=",16)==0 && pNew->zCostTable==0 ){
2099         pNew->zCostTable = spellfix1Dequote(&argv[i][16]);
2100         if( pNew->zCostTable==0 ) rc = SQLITE_NOMEM;
2101         continue;
2102       }
2103       *pzErr = sqlite3_mprintf("bad argument to spellfix1(): \"%s\"", argv[i]);
2104       rc = SQLITE_ERROR;
2105     }
2106   }
2107 
2108   if( rc && pNew ){
2109     *ppVTab = 0;
2110     spellfix1Uninit(0, &pNew->base);
2111   }else{
2112     *ppVTab = (sqlite3_vtab *)pNew;
2113   }
2114   return rc;
2115 }
2116 
2117 /*
2118 ** The xConnect and xCreate methods
2119 */
2120 static int spellfix1Connect(
2121   sqlite3 *db,
2122   void *pAux,
2123   int argc, const char *const*argv,
2124   sqlite3_vtab **ppVTab,
2125   char **pzErr
2126 ){
2127   return spellfix1Init(0, db, pAux, argc, argv, ppVTab, pzErr);
2128 }
2129 static int spellfix1Create(
2130   sqlite3 *db,
2131   void *pAux,
2132   int argc, const char *const*argv,
2133   sqlite3_vtab **ppVTab,
2134   char **pzErr
2135 ){
2136   return spellfix1Init(1, db, pAux, argc, argv, ppVTab, pzErr);
2137 }
2138 
2139 /*
2140 ** Clear all of the content from a cursor.
2141 */
2142 static void spellfix1ResetCursor(spellfix1_cursor *pCur){
2143   int i;
2144   for(i=0; i<pCur->nRow; i++){
2145     sqlite3_free(pCur->a[i].zWord);
2146   }
2147   pCur->nRow = 0;
2148   pCur->iRow = 0;
2149   pCur->nSearch = 0;
2150   if( pCur->pFullScan ){
2151     sqlite3_finalize(pCur->pFullScan);
2152     pCur->pFullScan = 0;
2153   }
2154 }
2155 
2156 /*
2157 ** Resize the cursor to hold up to N rows of content
2158 */
2159 static void spellfix1ResizeCursor(spellfix1_cursor *pCur, int N){
2160   struct spellfix1_row *aNew;
2161   assert( N>=pCur->nRow );
2162   aNew = sqlite3_realloc64(pCur->a, sizeof(pCur->a[0])*N);
2163   if( aNew==0 && N>0 ){
2164     spellfix1ResetCursor(pCur);
2165     sqlite3_free(pCur->a);
2166     pCur->nAlloc = 0;
2167     pCur->a = 0;
2168   }else{
2169     pCur->nAlloc = N;
2170     pCur->a = aNew;
2171   }
2172 }
2173 
2174 
2175 /*
2176 ** Close a fuzzy-search cursor.
2177 */
2178 static int spellfix1Close(sqlite3_vtab_cursor *cur){
2179   spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2180   spellfix1ResetCursor(pCur);
2181   spellfix1ResizeCursor(pCur, 0);
2182   sqlite3_free(pCur->zPattern);
2183   sqlite3_free(pCur);
2184   return SQLITE_OK;
2185 }
2186 
2187 #define SPELLFIX_IDXNUM_MATCH  0x01         /* word MATCH $str */
2188 #define SPELLFIX_IDXNUM_LANGID 0x02         /* langid == $langid */
2189 #define SPELLFIX_IDXNUM_TOP    0x04         /* top = $top */
2190 #define SPELLFIX_IDXNUM_SCOPE  0x08         /* scope = $scope */
2191 #define SPELLFIX_IDXNUM_DISTLT 0x10         /* distance < $distance */
2192 #define SPELLFIX_IDXNUM_DISTLE 0x20         /* distance <= $distance */
2193 #define SPELLFIX_IDXNUM_ROWID  0x40         /* rowid = $rowid */
2194 #define SPELLFIX_IDXNUM_DIST   (0x10|0x20)  /* DISTLT and DISTLE */
2195 
2196 /*
2197 **
2198 ** The plan number is a bitmask of the SPELLFIX_IDXNUM_* values defined
2199 ** above.
2200 **
2201 ** filter.argv[*] values contains $str, $langid, $top, $scope and $rowid
2202 ** if specified and in that order.
2203 */
2204 static int spellfix1BestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){
2205   int iPlan = 0;
2206   int iLangTerm = -1;
2207   int iTopTerm = -1;
2208   int iScopeTerm = -1;
2209   int iDistTerm = -1;
2210   int iRowidTerm = -1;
2211   int i;
2212   const struct sqlite3_index_constraint *pConstraint;
2213   pConstraint = pIdxInfo->aConstraint;
2214   for(i=0; i<pIdxInfo->nConstraint; i++, pConstraint++){
2215     if( pConstraint->usable==0 ) continue;
2216 
2217     /* Terms of the form:  word MATCH $str */
2218     if( (iPlan & SPELLFIX_IDXNUM_MATCH)==0
2219      && pConstraint->iColumn==SPELLFIX_COL_WORD
2220      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH
2221     ){
2222       iPlan |= SPELLFIX_IDXNUM_MATCH;
2223       pIdxInfo->aConstraintUsage[i].argvIndex = 1;
2224       pIdxInfo->aConstraintUsage[i].omit = 1;
2225     }
2226 
2227     /* Terms of the form:  langid = $langid  */
2228     if( (iPlan & SPELLFIX_IDXNUM_LANGID)==0
2229      && pConstraint->iColumn==SPELLFIX_COL_LANGID
2230      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2231     ){
2232       iPlan |= SPELLFIX_IDXNUM_LANGID;
2233       iLangTerm = i;
2234     }
2235 
2236     /* Terms of the form:  top = $top */
2237     if( (iPlan & SPELLFIX_IDXNUM_TOP)==0
2238      && pConstraint->iColumn==SPELLFIX_COL_TOP
2239      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2240     ){
2241       iPlan |= SPELLFIX_IDXNUM_TOP;
2242       iTopTerm = i;
2243     }
2244 
2245     /* Terms of the form:  scope = $scope */
2246     if( (iPlan & SPELLFIX_IDXNUM_SCOPE)==0
2247      && pConstraint->iColumn==SPELLFIX_COL_SCOPE
2248      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2249     ){
2250       iPlan |= SPELLFIX_IDXNUM_SCOPE;
2251       iScopeTerm = i;
2252     }
2253 
2254     /* Terms of the form:  distance < $dist or distance <= $dist */
2255     if( (iPlan & SPELLFIX_IDXNUM_DIST)==0
2256      && pConstraint->iColumn==SPELLFIX_COL_DISTANCE
2257      && (pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT
2258           || pConstraint->op==SQLITE_INDEX_CONSTRAINT_LE)
2259     ){
2260       if( pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT ){
2261         iPlan |= SPELLFIX_IDXNUM_DISTLT;
2262       }else{
2263         iPlan |= SPELLFIX_IDXNUM_DISTLE;
2264       }
2265       iDistTerm = i;
2266     }
2267 
2268     /* Terms of the form:  distance < $dist or distance <= $dist */
2269     if( (iPlan & SPELLFIX_IDXNUM_ROWID)==0
2270      && pConstraint->iColumn<0
2271      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2272     ){
2273       iPlan |= SPELLFIX_IDXNUM_ROWID;
2274       iRowidTerm = i;
2275     }
2276   }
2277   if( iPlan&SPELLFIX_IDXNUM_MATCH ){
2278     int idx = 2;
2279     pIdxInfo->idxNum = iPlan;
2280     if( pIdxInfo->nOrderBy==1
2281      && pIdxInfo->aOrderBy[0].iColumn==SPELLFIX_COL_SCORE
2282      && pIdxInfo->aOrderBy[0].desc==0
2283     ){
2284       pIdxInfo->orderByConsumed = 1;  /* Default order by iScore */
2285     }
2286     if( iPlan&SPELLFIX_IDXNUM_LANGID ){
2287       pIdxInfo->aConstraintUsage[iLangTerm].argvIndex = idx++;
2288       pIdxInfo->aConstraintUsage[iLangTerm].omit = 1;
2289     }
2290     if( iPlan&SPELLFIX_IDXNUM_TOP ){
2291       pIdxInfo->aConstraintUsage[iTopTerm].argvIndex = idx++;
2292       pIdxInfo->aConstraintUsage[iTopTerm].omit = 1;
2293     }
2294     if( iPlan&SPELLFIX_IDXNUM_SCOPE ){
2295       pIdxInfo->aConstraintUsage[iScopeTerm].argvIndex = idx++;
2296       pIdxInfo->aConstraintUsage[iScopeTerm].omit = 1;
2297     }
2298     if( iPlan&SPELLFIX_IDXNUM_DIST ){
2299       pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = idx++;
2300       pIdxInfo->aConstraintUsage[iDistTerm].omit = 1;
2301     }
2302     pIdxInfo->estimatedCost = 1e5;
2303   }else if( (iPlan & SPELLFIX_IDXNUM_ROWID) ){
2304     pIdxInfo->idxNum = SPELLFIX_IDXNUM_ROWID;
2305     pIdxInfo->aConstraintUsage[iRowidTerm].argvIndex = 1;
2306     pIdxInfo->aConstraintUsage[iRowidTerm].omit = 1;
2307     pIdxInfo->estimatedCost = 5;
2308   }else{
2309     pIdxInfo->idxNum = 0;
2310     pIdxInfo->estimatedCost = 1e50;
2311   }
2312   return SQLITE_OK;
2313 }
2314 
2315 /*
2316 ** Open a new fuzzy-search cursor.
2317 */
2318 static int spellfix1Open(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
2319   spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
2320   spellfix1_cursor *pCur;
2321   pCur = sqlite3_malloc64( sizeof(*pCur) );
2322   if( pCur==0 ) return SQLITE_NOMEM;
2323   memset(pCur, 0, sizeof(*pCur));
2324   pCur->pVTab = p;
2325   *ppCursor = &pCur->base;
2326   return SQLITE_OK;
2327 }
2328 
2329 /*
2330 ** Adjust a distance measurement by the words rank in order to show
2331 ** preference to common words.
2332 */
2333 static int spellfix1Score(int iDistance, int iRank){
2334   int iLog2;
2335   for(iLog2=0; iRank>0; iLog2++, iRank>>=1){}
2336   return iDistance + 32 - iLog2;
2337 }
2338 
2339 /*
2340 ** Compare two spellfix1_row objects for sorting purposes in qsort() such
2341 ** that they sort in order of increasing distance.
2342 */
2343 static int SQLITE_CDECL spellfix1RowCompare(const void *A, const void *B){
2344   const struct spellfix1_row *a = (const struct spellfix1_row*)A;
2345   const struct spellfix1_row *b = (const struct spellfix1_row*)B;
2346   return a->iScore - b->iScore;
2347 }
2348 
2349 /*
2350 ** A structure used to pass information from spellfix1FilterForMatch()
2351 ** into spellfix1RunQuery().
2352 */
2353 typedef struct MatchQuery {
2354   spellfix1_cursor *pCur;          /* The cursor being queried */
2355   sqlite3_stmt *pStmt;             /* shadow table query statment */
2356   char zHash[SPELLFIX_MX_HASH];    /* The current phonehash for zPattern */
2357   const char *zPattern;            /* Transliterated input string */
2358   int nPattern;                    /* Length of zPattern */
2359   EditDist3FromString *pMatchStr3; /* Original unicode string */
2360   EditDist3Config *pConfig3;       /* Edit-distance cost coefficients */
2361   const EditDist3Lang *pLang;      /* The selected language coefficients */
2362   int iLang;                       /* The language id */
2363   int iScope;                      /* Default scope */
2364   int iMaxDist;                    /* Maximum allowed edit distance, or -1 */
2365   int rc;                          /* Error code */
2366   int nRun;                  /* Number of prior runs for the same zPattern */
2367   char azPrior[SPELLFIX_MX_RUN][SPELLFIX_MX_HASH];  /* Prior hashes */
2368 } MatchQuery;
2369 
2370 /*
2371 ** Run a query looking for the best matches against zPattern using
2372 ** zHash as the character class seed hash.
2373 */
2374 static void spellfix1RunQuery(MatchQuery *p, const char *zQuery, int nQuery){
2375   const char *zK1;
2376   const char *zWord;
2377   int iDist;
2378   int iRank;
2379   int iScore;
2380   int iWorst = 0;
2381   int idx;
2382   int idxWorst = -1;
2383   int i;
2384   int iScope = p->iScope;
2385   spellfix1_cursor *pCur = p->pCur;
2386   sqlite3_stmt *pStmt = p->pStmt;
2387   char zHash1[SPELLFIX_MX_HASH];
2388   char zHash2[SPELLFIX_MX_HASH];
2389   char *zClass;
2390   int nClass;
2391   int rc;
2392 
2393   if( pCur->a==0 || p->rc ) return;   /* Prior memory allocation failure */
2394   zClass = (char*)phoneticHash((unsigned char*)zQuery, nQuery);
2395   if( zClass==0 ){
2396     p->rc = SQLITE_NOMEM;
2397     return;
2398   }
2399   nClass = (int)strlen(zClass);
2400   if( nClass>SPELLFIX_MX_HASH-2 ){
2401     nClass = SPELLFIX_MX_HASH-2;
2402     zClass[nClass] = 0;
2403   }
2404   if( nClass<=iScope ){
2405     if( nClass>2 ){
2406       iScope = nClass-1;
2407     }else{
2408       iScope = nClass;
2409     }
2410   }
2411   memcpy(zHash1, zClass, iScope);
2412   sqlite3_free(zClass);
2413   zHash1[iScope] = 0;
2414   memcpy(zHash2, zHash1, iScope);
2415   zHash2[iScope] = 'Z';
2416   zHash2[iScope+1] = 0;
2417 #if SPELLFIX_MX_RUN>1
2418   for(i=0; i<p->nRun; i++){
2419     if( strcmp(p->azPrior[i], zHash1)==0 ) return;
2420   }
2421 #endif
2422   assert( p->nRun<SPELLFIX_MX_RUN );
2423   memcpy(p->azPrior[p->nRun++], zHash1, iScope+1);
2424   if( sqlite3_bind_text(pStmt, 1, zHash1, -1, SQLITE_STATIC)==SQLITE_NOMEM
2425    || sqlite3_bind_text(pStmt, 2, zHash2, -1, SQLITE_STATIC)==SQLITE_NOMEM
2426   ){
2427     p->rc = SQLITE_NOMEM;
2428     return;
2429   }
2430 #if SPELLFIX_MX_RUN>1
2431   for(i=0; i<pCur->nRow; i++){
2432     if( pCur->a[i].iScore>iWorst ){
2433       iWorst = pCur->a[i].iScore;
2434       idxWorst = i;
2435     }
2436   }
2437 #endif
2438   while( sqlite3_step(pStmt)==SQLITE_ROW ){
2439     int iMatchlen = -1;
2440     iRank = sqlite3_column_int(pStmt, 2);
2441     if( p->pMatchStr3 ){
2442       int nWord = sqlite3_column_bytes(pStmt, 1);
2443       zWord = (const char*)sqlite3_column_text(pStmt, 1);
2444       iDist = editDist3Core(p->pMatchStr3, zWord, nWord, p->pLang, &iMatchlen);
2445     }else{
2446       zK1 = (const char*)sqlite3_column_text(pStmt, 3);
2447       if( zK1==0 ) continue;
2448       iDist = editdist1(p->zPattern, zK1, 0);
2449     }
2450     if( iDist<0 ){
2451       p->rc = SQLITE_NOMEM;
2452       break;
2453     }
2454     pCur->nSearch++;
2455 
2456     /* If there is a "distance < $dist" or "distance <= $dist" constraint,
2457     ** check if this row meets it. If not, jump back up to the top of the
2458     ** loop to process the next row. Otherwise, if the row does match the
2459     ** distance constraint, check if the pCur->a[] array is already full.
2460     ** If it is and no explicit "top = ?" constraint was present in the
2461     ** query, grow the array to ensure there is room for the new entry. */
2462     assert( (p->iMaxDist>=0)==((pCur->idxNum & SPELLFIX_IDXNUM_DIST) ? 1 : 0) );
2463     if( p->iMaxDist>=0 ){
2464       if( iDist>p->iMaxDist ) continue;
2465       if( pCur->nRow>=pCur->nAlloc && (pCur->idxNum & SPELLFIX_IDXNUM_TOP)==0 ){
2466         spellfix1ResizeCursor(pCur, pCur->nAlloc*2 + 10);
2467         if( pCur->a==0 ) break;
2468       }
2469     }
2470 
2471     iScore = spellfix1Score(iDist,iRank);
2472     if( pCur->nRow<pCur->nAlloc ){
2473       idx = pCur->nRow;
2474     }else if( iScore<iWorst ){
2475       idx = idxWorst;
2476       sqlite3_free(pCur->a[idx].zWord);
2477     }else{
2478       continue;
2479     }
2480 
2481     pCur->a[idx].zWord = sqlite3_mprintf("%s", sqlite3_column_text(pStmt, 1));
2482     if( pCur->a[idx].zWord==0 ){
2483       p->rc = SQLITE_NOMEM;
2484       break;
2485     }
2486     pCur->a[idx].iRowid = sqlite3_column_int64(pStmt, 0);
2487     pCur->a[idx].iRank = iRank;
2488     pCur->a[idx].iDistance = iDist;
2489     pCur->a[idx].iScore = iScore;
2490     pCur->a[idx].iMatchlen = iMatchlen;
2491     memcpy(pCur->a[idx].zHash, zHash1, iScope+1);
2492     if( pCur->nRow<pCur->nAlloc ) pCur->nRow++;
2493     if( pCur->nRow==pCur->nAlloc ){
2494       iWorst = pCur->a[0].iScore;
2495       idxWorst = 0;
2496       for(i=1; i<pCur->nRow; i++){
2497         iScore = pCur->a[i].iScore;
2498         if( iWorst<iScore ){
2499           iWorst = iScore;
2500           idxWorst = i;
2501         }
2502       }
2503     }
2504   }
2505   rc = sqlite3_reset(pStmt);
2506   if( rc ) p->rc = rc;
2507 }
2508 
2509 /*
2510 ** This version of the xFilter method work if the MATCH term is present
2511 ** and we are doing a scan.
2512 */
2513 static int spellfix1FilterForMatch(
2514   spellfix1_cursor *pCur,
2515   int argc,
2516   sqlite3_value **argv
2517 ){
2518   int idxNum = pCur->idxNum;
2519   const unsigned char *zMatchThis;   /* RHS of the MATCH operator */
2520   EditDist3FromString *pMatchStr3 = 0; /* zMatchThis as an editdist string */
2521   char *zPattern;                    /* Transliteration of zMatchThis */
2522   int nPattern;                      /* Length of zPattern */
2523   int iLimit = 20;                   /* Max number of rows of output */
2524   int iScope = 3;                    /* Use this many characters of zClass */
2525   int iLang = 0;                     /* Language code */
2526   char *zSql;                        /* SQL of shadow table query */
2527   sqlite3_stmt *pStmt = 0;           /* Shadow table query */
2528   int rc;                            /* Result code */
2529   int idx = 1;                       /* Next available filter parameter */
2530   spellfix1_vtab *p = pCur->pVTab;   /* The virtual table that owns pCur */
2531   MatchQuery x;                      /* For passing info to RunQuery() */
2532 
2533   /* Load the cost table if we have not already done so */
2534   if( p->zCostTable!=0 && p->pConfig3==0 ){
2535     p->pConfig3 = sqlite3_malloc64( sizeof(p->pConfig3[0]) );
2536     if( p->pConfig3==0 ) return SQLITE_NOMEM;
2537     memset(p->pConfig3, 0, sizeof(p->pConfig3[0]));
2538     rc = editDist3ConfigLoad(p->pConfig3, p->db, p->zCostTable);
2539     if( rc ) return rc;
2540   }
2541   memset(&x, 0, sizeof(x));
2542   x.iScope = 3;  /* Default scope if none specified by "WHERE scope=N" */
2543   x.iMaxDist = -1;   /* Maximum allowed edit distance */
2544 
2545   if( idxNum&2 ){
2546     iLang = sqlite3_value_int(argv[idx++]);
2547   }
2548   if( idxNum&4 ){
2549     iLimit = sqlite3_value_int(argv[idx++]);
2550     if( iLimit<1 ) iLimit = 1;
2551   }
2552   if( idxNum&8 ){
2553     x.iScope = sqlite3_value_int(argv[idx++]);
2554     if( x.iScope<1 ) x.iScope = 1;
2555     if( x.iScope>SPELLFIX_MX_HASH-2 ) x.iScope = SPELLFIX_MX_HASH-2;
2556   }
2557   if( idxNum&(16|32) ){
2558     x.iMaxDist = sqlite3_value_int(argv[idx++]);
2559     if( idxNum&16 ) x.iMaxDist--;
2560     if( x.iMaxDist<0 ) x.iMaxDist = 0;
2561   }
2562   spellfix1ResetCursor(pCur);
2563   spellfix1ResizeCursor(pCur, iLimit);
2564   zMatchThis = sqlite3_value_text(argv[0]);
2565   if( zMatchThis==0 ) return SQLITE_OK;
2566   if( p->pConfig3 ){
2567     x.pLang = editDist3FindLang(p->pConfig3, iLang);
2568     pMatchStr3 = editDist3FromStringNew(x.pLang, (const char*)zMatchThis, -1);
2569     if( pMatchStr3==0 ){
2570       x.rc = SQLITE_NOMEM;
2571       goto filter_exit;
2572     }
2573   }else{
2574     x.pLang = 0;
2575   }
2576   zPattern = (char*)transliterate(zMatchThis, sqlite3_value_bytes(argv[0]));
2577   sqlite3_free(pCur->zPattern);
2578   pCur->zPattern = zPattern;
2579   if( zPattern==0 ){
2580     x.rc = SQLITE_NOMEM;
2581     goto filter_exit;
2582   }
2583   nPattern = (int)strlen(zPattern);
2584   if( zPattern[nPattern-1]=='*' ) nPattern--;
2585   zSql = sqlite3_mprintf(
2586      "SELECT id, word, rank, coalesce(k1,word)"
2587      "  FROM \"%w\".\"%w_vocab\""
2588      " WHERE langid=%d AND k2>=?1 AND k2<?2",
2589      p->zDbName, p->zTableName, iLang
2590   );
2591   if( zSql==0 ){
2592     x.rc = SQLITE_NOMEM;
2593     pStmt = 0;
2594     goto filter_exit;
2595   }
2596   rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0);
2597   sqlite3_free(zSql);
2598   pCur->iLang = iLang;
2599   x.pCur = pCur;
2600   x.pStmt = pStmt;
2601   x.zPattern = zPattern;
2602   x.nPattern = nPattern;
2603   x.pMatchStr3 = pMatchStr3;
2604   x.iLang = iLang;
2605   x.rc = rc;
2606   x.pConfig3 = p->pConfig3;
2607   if( x.rc==SQLITE_OK ){
2608     spellfix1RunQuery(&x, zPattern, nPattern);
2609   }
2610 
2611   if( pCur->a ){
2612     qsort(pCur->a, pCur->nRow, sizeof(pCur->a[0]), spellfix1RowCompare);
2613     pCur->iTop = iLimit;
2614     pCur->iScope = iScope;
2615   }else{
2616     x.rc = SQLITE_NOMEM;
2617   }
2618 
2619 filter_exit:
2620   sqlite3_finalize(pStmt);
2621   editDist3FromStringDelete(pMatchStr3);
2622   return x.rc;
2623 }
2624 
2625 /*
2626 ** This version of xFilter handles a full-table scan case
2627 */
2628 static int spellfix1FilterForFullScan(
2629   spellfix1_cursor *pCur,
2630   int argc,
2631   sqlite3_value **argv
2632 ){
2633   int rc = SQLITE_OK;
2634   int idxNum = pCur->idxNum;
2635   char *zSql;
2636   spellfix1_vtab *pVTab = pCur->pVTab;
2637   spellfix1ResetCursor(pCur);
2638   assert( idxNum==0 || idxNum==64 );
2639   zSql = sqlite3_mprintf(
2640      "SELECT word, rank, NULL, langid, id FROM \"%w\".\"%w_vocab\"%s",
2641      pVTab->zDbName, pVTab->zTableName,
2642      ((idxNum & 64) ? " WHERE rowid=?" : "")
2643   );
2644   if( zSql==0 ) return SQLITE_NOMEM;
2645   rc = sqlite3_prepare_v2(pVTab->db, zSql, -1, &pCur->pFullScan, 0);
2646   sqlite3_free(zSql);
2647   if( rc==SQLITE_OK && (idxNum & 64) ){
2648     assert( argc==1 );
2649     rc = sqlite3_bind_value(pCur->pFullScan, 1, argv[0]);
2650   }
2651   pCur->nRow = pCur->iRow = 0;
2652   if( rc==SQLITE_OK ){
2653     rc = sqlite3_step(pCur->pFullScan);
2654     if( rc==SQLITE_ROW ){ pCur->iRow = -1; rc = SQLITE_OK; }
2655     if( rc==SQLITE_DONE ){ rc = SQLITE_OK; }
2656   }else{
2657     pCur->iRow = 0;
2658   }
2659   return rc;
2660 }
2661 
2662 
2663 /*
2664 ** Called to "rewind" a cursor back to the beginning so that
2665 ** it starts its output over again.  Always called at least once
2666 ** prior to any spellfix1Column, spellfix1Rowid, or spellfix1Eof call.
2667 */
2668 static int spellfix1Filter(
2669   sqlite3_vtab_cursor *cur,
2670   int idxNum, const char *idxStr,
2671   int argc, sqlite3_value **argv
2672 ){
2673   spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2674   int rc;
2675   pCur->idxNum = idxNum;
2676   if( idxNum & 1 ){
2677     rc = spellfix1FilterForMatch(pCur, argc, argv);
2678   }else{
2679     rc = spellfix1FilterForFullScan(pCur, argc, argv);
2680   }
2681   return rc;
2682 }
2683 
2684 
2685 /*
2686 ** Advance a cursor to its next row of output
2687 */
2688 static int spellfix1Next(sqlite3_vtab_cursor *cur){
2689   spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2690   int rc = SQLITE_OK;
2691   if( pCur->iRow < pCur->nRow ){
2692     if( pCur->pFullScan ){
2693       rc = sqlite3_step(pCur->pFullScan);
2694       if( rc!=SQLITE_ROW ) pCur->iRow = pCur->nRow;
2695       if( rc==SQLITE_ROW || rc==SQLITE_DONE ) rc = SQLITE_OK;
2696     }else{
2697       pCur->iRow++;
2698     }
2699   }
2700   return rc;
2701 }
2702 
2703 /*
2704 ** Return TRUE if we are at the end-of-file
2705 */
2706 static int spellfix1Eof(sqlite3_vtab_cursor *cur){
2707   spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2708   return pCur->iRow>=pCur->nRow;
2709 }
2710 
2711 /*
2712 ** Return columns from the current row.
2713 */
2714 static int spellfix1Column(
2715   sqlite3_vtab_cursor *cur,
2716   sqlite3_context *ctx,
2717   int i
2718 ){
2719   spellfix1_cursor *pCur = (spellfix1_cursor*)cur;
2720   if( pCur->pFullScan ){
2721     if( i<=SPELLFIX_COL_LANGID ){
2722       sqlite3_result_value(ctx, sqlite3_column_value(pCur->pFullScan, i));
2723     }else{
2724       sqlite3_result_null(ctx);
2725     }
2726     return SQLITE_OK;
2727   }
2728   switch( i ){
2729     case SPELLFIX_COL_WORD: {
2730       sqlite3_result_text(ctx, pCur->a[pCur->iRow].zWord, -1, SQLITE_STATIC);
2731       break;
2732     }
2733     case SPELLFIX_COL_RANK: {
2734       sqlite3_result_int(ctx, pCur->a[pCur->iRow].iRank);
2735       break;
2736     }
2737     case SPELLFIX_COL_DISTANCE: {
2738       sqlite3_result_int(ctx, pCur->a[pCur->iRow].iDistance);
2739       break;
2740     }
2741     case SPELLFIX_COL_LANGID: {
2742       sqlite3_result_int(ctx, pCur->iLang);
2743       break;
2744     }
2745     case SPELLFIX_COL_SCORE: {
2746       sqlite3_result_int(ctx, pCur->a[pCur->iRow].iScore);
2747       break;
2748     }
2749     case SPELLFIX_COL_MATCHLEN: {
2750       int iMatchlen = pCur->a[pCur->iRow].iMatchlen;
2751       if( iMatchlen<0 ){
2752         int nPattern = (int)strlen(pCur->zPattern);
2753         char *zWord = pCur->a[pCur->iRow].zWord;
2754         int nWord = (int)strlen(zWord);
2755 
2756         if( nPattern>0 && pCur->zPattern[nPattern-1]=='*' ){
2757           char *zTranslit;
2758           int res;
2759           zTranslit = (char *)transliterate((unsigned char *)zWord, nWord);
2760           if( !zTranslit ) return SQLITE_NOMEM;
2761           res = editdist1(pCur->zPattern, zTranslit, &iMatchlen);
2762           sqlite3_free(zTranslit);
2763           if( res<0 ) return SQLITE_NOMEM;
2764           iMatchlen = translen_to_charlen(zWord, nWord, iMatchlen);
2765         }else{
2766           iMatchlen = utf8Charlen(zWord, nWord);
2767         }
2768       }
2769 
2770       sqlite3_result_int(ctx, iMatchlen);
2771       break;
2772     }
2773     case SPELLFIX_COL_PHONEHASH: {
2774       sqlite3_result_text(ctx, pCur->a[pCur->iRow].zHash, -1, SQLITE_STATIC);
2775       break;
2776     }
2777     case SPELLFIX_COL_TOP: {
2778       sqlite3_result_int(ctx, pCur->iTop);
2779       break;
2780     }
2781     case SPELLFIX_COL_SCOPE: {
2782       sqlite3_result_int(ctx, pCur->iScope);
2783       break;
2784     }
2785     case SPELLFIX_COL_SRCHCNT: {
2786       sqlite3_result_int(ctx, pCur->nSearch);
2787       break;
2788     }
2789     default: {
2790       sqlite3_result_null(ctx);
2791       break;
2792     }
2793   }
2794   return SQLITE_OK;
2795 }
2796 
2797 /*
2798 ** The rowid.
2799 */
2800 static int spellfix1Rowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
2801   spellfix1_cursor *pCur = (spellfix1_cursor*)cur;
2802   if( pCur->pFullScan ){
2803     *pRowid = sqlite3_column_int64(pCur->pFullScan, 4);
2804   }else{
2805     *pRowid = pCur->a[pCur->iRow].iRowid;
2806   }
2807   return SQLITE_OK;
2808 }
2809 
2810 /*
2811 ** This function is called by the xUpdate() method. It returns a string
2812 ** containing the conflict mode that xUpdate() should use for the current
2813 ** operation. One of: "ROLLBACK", "IGNORE", "ABORT" or "REPLACE".
2814 */
2815 static const char *spellfix1GetConflict(sqlite3 *db){
2816   static const char *azConflict[] = {
2817     /* Note: Instead of "FAIL" - "ABORT". */
2818     "ROLLBACK", "IGNORE", "ABORT", "ABORT", "REPLACE"
2819   };
2820   int eConflict = sqlite3_vtab_on_conflict(db);
2821 
2822   assert( eConflict==SQLITE_ROLLBACK || eConflict==SQLITE_IGNORE
2823        || eConflict==SQLITE_FAIL || eConflict==SQLITE_ABORT
2824        || eConflict==SQLITE_REPLACE
2825   );
2826   assert( SQLITE_ROLLBACK==1 );
2827   assert( SQLITE_IGNORE==2 );
2828   assert( SQLITE_FAIL==3 );
2829   assert( SQLITE_ABORT==4 );
2830   assert( SQLITE_REPLACE==5 );
2831 
2832   return azConflict[eConflict-1];
2833 }
2834 
2835 /*
2836 ** The xUpdate() method.
2837 */
2838 static int spellfix1Update(
2839   sqlite3_vtab *pVTab,
2840   int argc,
2841   sqlite3_value **argv,
2842   sqlite_int64 *pRowid
2843 ){
2844   int rc = SQLITE_OK;
2845   sqlite3_int64 rowid, newRowid;
2846   spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
2847   sqlite3 *db = p->db;
2848 
2849   if( argc==1 ){
2850     /* A delete operation on the rowid given by argv[0] */
2851     rowid = *pRowid = sqlite3_value_int64(argv[0]);
2852     spellfix1DbExec(&rc, db, "DELETE FROM \"%w\".\"%w_vocab\" "
2853                            " WHERE id=%lld",
2854                   p->zDbName, p->zTableName, rowid);
2855   }else{
2856     const unsigned char *zWord = sqlite3_value_text(argv[SPELLFIX_COL_WORD+2]);
2857     int nWord = sqlite3_value_bytes(argv[SPELLFIX_COL_WORD+2]);
2858     int iLang = sqlite3_value_int(argv[SPELLFIX_COL_LANGID+2]);
2859     int iRank = sqlite3_value_int(argv[SPELLFIX_COL_RANK+2]);
2860     const unsigned char *zSoundslike =
2861            sqlite3_value_text(argv[SPELLFIX_COL_SOUNDSLIKE+2]);
2862     int nSoundslike = sqlite3_value_bytes(argv[SPELLFIX_COL_SOUNDSLIKE+2]);
2863     char *zK1, *zK2;
2864     int i;
2865     char c;
2866     const char *zConflict = spellfix1GetConflict(db);
2867 
2868     if( zWord==0 ){
2869       /* Inserts of the form:  INSERT INTO table(command) VALUES('xyzzy');
2870       ** cause zWord to be NULL, so we look at the "command" column to see
2871       ** what special actions to take */
2872       const char *zCmd =
2873          (const char*)sqlite3_value_text(argv[SPELLFIX_COL_COMMAND+2]);
2874       if( zCmd==0 ){
2875         pVTab->zErrMsg = sqlite3_mprintf("NOT NULL constraint failed: %s.word",
2876                                          p->zTableName);
2877         return SQLITE_CONSTRAINT_NOTNULL;
2878       }
2879       if( strcmp(zCmd,"reset")==0 ){
2880         /* Reset the  edit cost table (if there is one). */
2881         editDist3ConfigDelete(p->pConfig3);
2882         p->pConfig3 = 0;
2883         return SQLITE_OK;
2884       }
2885       if( strncmp(zCmd,"edit_cost_table=",16)==0 ){
2886         editDist3ConfigDelete(p->pConfig3);
2887         p->pConfig3 = 0;
2888         sqlite3_free(p->zCostTable);
2889         p->zCostTable = spellfix1Dequote(zCmd+16);
2890         if( p->zCostTable==0 ) return SQLITE_NOMEM;
2891         if( p->zCostTable[0]==0 || sqlite3_stricmp(p->zCostTable,"null")==0 ){
2892           sqlite3_free(p->zCostTable);
2893           p->zCostTable = 0;
2894         }
2895         return SQLITE_OK;
2896       }
2897       pVTab->zErrMsg = sqlite3_mprintf("unknown value for %s.command: \"%w\"",
2898                                        p->zTableName, zCmd);
2899       return SQLITE_ERROR;
2900     }
2901     if( iRank<1 ) iRank = 1;
2902     if( zSoundslike ){
2903       zK1 = (char*)transliterate(zSoundslike, nSoundslike);
2904     }else{
2905       zK1 = (char*)transliterate(zWord, nWord);
2906     }
2907     if( zK1==0 ) return SQLITE_NOMEM;
2908     for(i=0; (c = zK1[i])!=0; i++){
2909        if( c>='A' && c<='Z' ) zK1[i] += 'a' - 'A';
2910     }
2911     zK2 = (char*)phoneticHash((const unsigned char*)zK1, i);
2912     if( zK2==0 ){
2913       sqlite3_free(zK1);
2914       return SQLITE_NOMEM;
2915     }
2916     if( sqlite3_value_type(argv[0])==SQLITE_NULL ){
2917       if( sqlite3_value_type(argv[1])==SQLITE_NULL ){
2918         spellfix1DbExec(&rc, db,
2919                "INSERT INTO \"%w\".\"%w_vocab\"(rank,langid,word,k1,k2) "
2920                "VALUES(%d,%d,%Q,nullif(%Q,%Q),%Q)",
2921                p->zDbName, p->zTableName,
2922                iRank, iLang, zWord, zK1, zWord, zK2
2923         );
2924       }else{
2925         newRowid = sqlite3_value_int64(argv[1]);
2926         spellfix1DbExec(&rc, db,
2927             "INSERT OR %s INTO \"%w\".\"%w_vocab\"(id,rank,langid,word,k1,k2) "
2928             "VALUES(%lld,%d,%d,%Q,nullif(%Q,%Q),%Q)",
2929             zConflict, p->zDbName, p->zTableName,
2930             newRowid, iRank, iLang, zWord, zK1, zWord, zK2
2931         );
2932       }
2933       *pRowid = sqlite3_last_insert_rowid(db);
2934     }else{
2935       rowid = sqlite3_value_int64(argv[0]);
2936       newRowid = *pRowid = sqlite3_value_int64(argv[1]);
2937       spellfix1DbExec(&rc, db,
2938              "UPDATE OR %s \"%w\".\"%w_vocab\" SET id=%lld, rank=%d, langid=%d,"
2939              " word=%Q, k1=nullif(%Q,%Q), k2=%Q WHERE id=%lld",
2940              zConflict, p->zDbName, p->zTableName, newRowid, iRank, iLang,
2941              zWord, zK1, zWord, zK2, rowid
2942       );
2943     }
2944     sqlite3_free(zK1);
2945     sqlite3_free(zK2);
2946   }
2947   return rc;
2948 }
2949 
2950 /*
2951 ** Rename the spellfix1 table.
2952 */
2953 static int spellfix1Rename(sqlite3_vtab *pVTab, const char *zNew){
2954   spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
2955   sqlite3 *db = p->db;
2956   int rc = SQLITE_OK;
2957   char *zNewName = sqlite3_mprintf("%s", zNew);
2958   if( zNewName==0 ){
2959     return SQLITE_NOMEM;
2960   }
2961   spellfix1DbExec(&rc, db,
2962      "ALTER TABLE \"%w\".\"%w_vocab\" RENAME TO \"%w_vocab\"",
2963      p->zDbName, p->zTableName, zNewName
2964   );
2965   if( rc==SQLITE_OK ){
2966     sqlite3_free(p->zTableName);
2967     p->zTableName = zNewName;
2968   }else{
2969     sqlite3_free(zNewName);
2970   }
2971   return rc;
2972 }
2973 
2974 
2975 /*
2976 ** A virtual table module that provides fuzzy search.
2977 */
2978 static sqlite3_module spellfix1Module = {
2979   0,                       /* iVersion */
2980   spellfix1Create,         /* xCreate - handle CREATE VIRTUAL TABLE */
2981   spellfix1Connect,        /* xConnect - reconnected to an existing table */
2982   spellfix1BestIndex,      /* xBestIndex - figure out how to do a query */
2983   spellfix1Disconnect,     /* xDisconnect - close a connection */
2984   spellfix1Destroy,        /* xDestroy - handle DROP TABLE */
2985   spellfix1Open,           /* xOpen - open a cursor */
2986   spellfix1Close,          /* xClose - close a cursor */
2987   spellfix1Filter,         /* xFilter - configure scan constraints */
2988   spellfix1Next,           /* xNext - advance a cursor */
2989   spellfix1Eof,            /* xEof - check for end of scan */
2990   spellfix1Column,         /* xColumn - read data */
2991   spellfix1Rowid,          /* xRowid - read data */
2992   spellfix1Update,         /* xUpdate */
2993   0,                       /* xBegin */
2994   0,                       /* xSync */
2995   0,                       /* xCommit */
2996   0,                       /* xRollback */
2997   0,                       /* xFindMethod */
2998   spellfix1Rename,         /* xRename */
2999 };
3000 
3001 /*
3002 ** Register the various functions and the virtual table.
3003 */
3004 static int spellfix1Register(sqlite3 *db){
3005   int rc = SQLITE_OK;
3006   int i;
3007   rc = sqlite3_create_function(db, "spellfix1_translit", 1,
3008                                SQLITE_UTF8|SQLITE_DETERMINISTIC, 0,
3009                                 transliterateSqlFunc, 0, 0);
3010   if( rc==SQLITE_OK ){
3011     rc = sqlite3_create_function(db, "spellfix1_editdist", 2,
3012                                  SQLITE_UTF8|SQLITE_DETERMINISTIC, 0,
3013                                   editdistSqlFunc, 0, 0);
3014   }
3015   if( rc==SQLITE_OK ){
3016     rc = sqlite3_create_function(db, "spellfix1_phonehash", 1,
3017                                  SQLITE_UTF8|SQLITE_DETERMINISTIC, 0,
3018                                   phoneticHashSqlFunc, 0, 0);
3019   }
3020   if( rc==SQLITE_OK ){
3021     rc = sqlite3_create_function(db, "spellfix1_scriptcode", 1,
3022                                   SQLITE_UTF8|SQLITE_DETERMINISTIC, 0,
3023                                   scriptCodeSqlFunc, 0, 0);
3024   }
3025   if( rc==SQLITE_OK ){
3026     rc = sqlite3_create_module(db, "spellfix1", &spellfix1Module, 0);
3027   }
3028   if( rc==SQLITE_OK ){
3029     rc = editDist3Install(db);
3030   }
3031 
3032   /* Verify sanity of the translit[] table */
3033   for(i=0; i<sizeof(translit)/sizeof(translit[0])-1; i++){
3034     assert( translit[i].cFrom<translit[i+1].cFrom );
3035   }
3036 
3037   return rc;
3038 }
3039 
3040 #endif /* SQLITE_OMIT_VIRTUALTABLE */
3041 
3042 /*
3043 ** Extension load function.
3044 */
3045 #ifdef _WIN32
3046 __declspec(dllexport)
3047 #endif
3048 int sqlite3_spellfix_init(
3049   sqlite3 *db,
3050   char **pzErrMsg,
3051   const sqlite3_api_routines *pApi
3052 ){
3053   SQLITE_EXTENSION_INIT2(pApi);
3054 #ifndef SQLITE_OMIT_VIRTUALTABLE
3055   return spellfix1Register(db);
3056 #endif
3057   return SQLITE_OK;
3058 }
3059