xref: /sqlite-3.40.0/ext/misc/spellfix.c (revision e2d27e02)
1 /*
2 ** 2012 April 10
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 *************************************************************************
12 **
13 ** This module implements the spellfix1 VIRTUAL TABLE that can be used
14 ** to search a large vocabulary for close matches.  See separate
15 ** documentation (http://www.sqlite.org/spellfix1.html) for details.
16 */
17 #include "sqlite3ext.h"
18 SQLITE_EXTENSION_INIT1
19 
20 #ifndef SQLITE_AMALGAMATION
21 # include <string.h>
22 # include <stdio.h>
23 # include <stdlib.h>
24 # include <assert.h>
25 # define ALWAYS(X)  1
26 # define NEVER(X)   0
27   typedef unsigned char u8;
28   typedef unsigned short u16;
29 #endif
30 #include <ctype.h>
31 
32 #ifndef SQLITE_OMIT_VIRTUALTABLE
33 
34 /*
35 ** Character classes for ASCII characters:
36 **
37 **   0   ''        Silent letters:   H W
38 **   1   'A'       Any vowel:   A E I O U (Y)
39 **   2   'B'       A bilabeal stop or fricative:  B F P V W
40 **   3   'C'       Other fricatives or back stops:  C G J K Q S X Z
41 **   4   'D'       Alveolar stops:  D T
42 **   5   'H'       Letter H at the beginning of a word
43 **   6   'L'       Glide:  L
44 **   7   'R'       Semivowel:  R
45 **   8   'M'       Nasals:  M N
46 **   9   'Y'       Letter Y at the beginning of a word.
47 **   10  '9'       Digits: 0 1 2 3 4 5 6 7 8 9
48 **   11  ' '       White space
49 **   12  '?'       Other.
50 */
51 #define CCLASS_SILENT         0
52 #define CCLASS_VOWEL          1
53 #define CCLASS_B              2
54 #define CCLASS_C              3
55 #define CCLASS_D              4
56 #define CCLASS_H              5
57 #define CCLASS_L              6
58 #define CCLASS_R              7
59 #define CCLASS_M              8
60 #define CCLASS_Y              9
61 #define CCLASS_DIGIT         10
62 #define CCLASS_SPACE         11
63 #define CCLASS_OTHER         12
64 
65 /*
66 ** The following table gives the character class for non-initial ASCII
67 ** characters.
68 */
69 static const unsigned char midClass[] = {
70  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
71  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
72  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
73  /*   */ CCLASS_SPACE,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
74  /*   */ CCLASS_SPACE,    /*   */ CCLASS_SPACE,   /*   */ CCLASS_OTHER,
75  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
76  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
77  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
78  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
79  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
80  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_SPACE,
81  /* ! */ CCLASS_OTHER,    /* " */ CCLASS_OTHER,   /* # */ CCLASS_OTHER,
82  /* $ */ CCLASS_OTHER,    /* % */ CCLASS_OTHER,   /* & */ CCLASS_OTHER,
83  /* ' */ CCLASS_SILENT,   /* ( */ CCLASS_OTHER,   /* ) */ CCLASS_OTHER,
84  /* * */ CCLASS_OTHER,    /* + */ CCLASS_OTHER,   /* , */ CCLASS_OTHER,
85  /* - */ CCLASS_OTHER,    /* . */ CCLASS_OTHER,   /* / */ CCLASS_OTHER,
86  /* 0 */ CCLASS_DIGIT,    /* 1 */ CCLASS_DIGIT,   /* 2 */ CCLASS_DIGIT,
87  /* 3 */ CCLASS_DIGIT,    /* 4 */ CCLASS_DIGIT,   /* 5 */ CCLASS_DIGIT,
88  /* 6 */ CCLASS_DIGIT,    /* 7 */ CCLASS_DIGIT,   /* 8 */ CCLASS_DIGIT,
89  /* 9 */ CCLASS_DIGIT,    /* : */ CCLASS_OTHER,   /* ; */ CCLASS_OTHER,
90  /* < */ CCLASS_OTHER,    /* = */ CCLASS_OTHER,   /* > */ CCLASS_OTHER,
91  /* ? */ CCLASS_OTHER,    /* @ */ CCLASS_OTHER,   /* A */ CCLASS_VOWEL,
92  /* B */ CCLASS_B,        /* C */ CCLASS_C,       /* D */ CCLASS_D,
93  /* E */ CCLASS_VOWEL,    /* F */ CCLASS_B,       /* G */ CCLASS_C,
94  /* H */ CCLASS_SILENT,   /* I */ CCLASS_VOWEL,   /* J */ CCLASS_C,
95  /* K */ CCLASS_C,        /* L */ CCLASS_L,       /* M */ CCLASS_M,
96  /* N */ CCLASS_M,        /* O */ CCLASS_VOWEL,   /* P */ CCLASS_B,
97  /* Q */ CCLASS_C,        /* R */ CCLASS_R,       /* S */ CCLASS_C,
98  /* T */ CCLASS_D,        /* U */ CCLASS_VOWEL,   /* V */ CCLASS_B,
99  /* W */ CCLASS_B,        /* X */ CCLASS_C,       /* Y */ CCLASS_VOWEL,
100  /* Z */ CCLASS_C,        /* [ */ CCLASS_OTHER,   /* \ */ CCLASS_OTHER,
101  /* ] */ CCLASS_OTHER,    /* ^ */ CCLASS_OTHER,   /* _ */ CCLASS_OTHER,
102  /* ` */ CCLASS_OTHER,    /* a */ CCLASS_VOWEL,   /* b */ CCLASS_B,
103  /* c */ CCLASS_C,        /* d */ CCLASS_D,       /* e */ CCLASS_VOWEL,
104  /* f */ CCLASS_B,        /* g */ CCLASS_C,       /* h */ CCLASS_SILENT,
105  /* i */ CCLASS_VOWEL,    /* j */ CCLASS_C,       /* k */ CCLASS_C,
106  /* l */ CCLASS_L,        /* m */ CCLASS_M,       /* n */ CCLASS_M,
107  /* o */ CCLASS_VOWEL,    /* p */ CCLASS_B,       /* q */ CCLASS_C,
108  /* r */ CCLASS_R,        /* s */ CCLASS_C,       /* t */ CCLASS_D,
109  /* u */ CCLASS_VOWEL,    /* v */ CCLASS_B,       /* w */ CCLASS_B,
110  /* x */ CCLASS_C,        /* y */ CCLASS_VOWEL,   /* z */ CCLASS_C,
111  /* { */ CCLASS_OTHER,    /* | */ CCLASS_OTHER,   /* } */ CCLASS_OTHER,
112  /* ~ */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,
113 };
114 /*
115 ** This tables gives the character class for ASCII characters that form the
116 ** initial character of a word.  The only difference from midClass is with
117 ** the letters H, W, and Y.
118 */
119 static const unsigned char initClass[] = {
120  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
121  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
122  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
123  /*   */ CCLASS_SPACE,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
124  /*   */ CCLASS_SPACE,    /*   */ CCLASS_SPACE,   /*   */ CCLASS_OTHER,
125  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
126  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
127  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
128  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
129  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
130  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_SPACE,
131  /* ! */ CCLASS_OTHER,    /* " */ CCLASS_OTHER,   /* # */ CCLASS_OTHER,
132  /* $ */ CCLASS_OTHER,    /* % */ CCLASS_OTHER,   /* & */ CCLASS_OTHER,
133  /* ' */ CCLASS_OTHER,    /* ( */ CCLASS_OTHER,   /* ) */ CCLASS_OTHER,
134  /* * */ CCLASS_OTHER,    /* + */ CCLASS_OTHER,   /* , */ CCLASS_OTHER,
135  /* - */ CCLASS_OTHER,    /* . */ CCLASS_OTHER,   /* / */ CCLASS_OTHER,
136  /* 0 */ CCLASS_DIGIT,    /* 1 */ CCLASS_DIGIT,   /* 2 */ CCLASS_DIGIT,
137  /* 3 */ CCLASS_DIGIT,    /* 4 */ CCLASS_DIGIT,   /* 5 */ CCLASS_DIGIT,
138  /* 6 */ CCLASS_DIGIT,    /* 7 */ CCLASS_DIGIT,   /* 8 */ CCLASS_DIGIT,
139  /* 9 */ CCLASS_DIGIT,    /* : */ CCLASS_OTHER,   /* ; */ CCLASS_OTHER,
140  /* < */ CCLASS_OTHER,    /* = */ CCLASS_OTHER,   /* > */ CCLASS_OTHER,
141  /* ? */ CCLASS_OTHER,    /* @ */ CCLASS_OTHER,   /* A */ CCLASS_VOWEL,
142  /* B */ CCLASS_B,        /* C */ CCLASS_C,       /* D */ CCLASS_D,
143  /* E */ CCLASS_VOWEL,    /* F */ CCLASS_B,       /* G */ CCLASS_C,
144  /* H */ CCLASS_SILENT,   /* I */ CCLASS_VOWEL,   /* J */ CCLASS_C,
145  /* K */ CCLASS_C,        /* L */ CCLASS_L,       /* M */ CCLASS_M,
146  /* N */ CCLASS_M,        /* O */ CCLASS_VOWEL,   /* P */ CCLASS_B,
147  /* Q */ CCLASS_C,        /* R */ CCLASS_R,       /* S */ CCLASS_C,
148  /* T */ CCLASS_D,        /* U */ CCLASS_VOWEL,   /* V */ CCLASS_B,
149  /* W */ CCLASS_B,        /* X */ CCLASS_C,       /* Y */ CCLASS_Y,
150  /* Z */ CCLASS_C,        /* [ */ CCLASS_OTHER,   /* \ */ CCLASS_OTHER,
151  /* ] */ CCLASS_OTHER,    /* ^ */ CCLASS_OTHER,   /* _ */ CCLASS_OTHER,
152  /* ` */ CCLASS_OTHER,    /* a */ CCLASS_VOWEL,   /* b */ CCLASS_B,
153  /* c */ CCLASS_C,        /* d */ CCLASS_D,       /* e */ CCLASS_VOWEL,
154  /* f */ CCLASS_B,        /* g */ CCLASS_C,       /* h */ CCLASS_SILENT,
155  /* i */ CCLASS_VOWEL,    /* j */ CCLASS_C,       /* k */ CCLASS_C,
156  /* l */ CCLASS_L,        /* m */ CCLASS_M,       /* n */ CCLASS_M,
157  /* o */ CCLASS_VOWEL,    /* p */ CCLASS_B,       /* q */ CCLASS_C,
158  /* r */ CCLASS_R,        /* s */ CCLASS_C,       /* t */ CCLASS_D,
159  /* u */ CCLASS_VOWEL,    /* v */ CCLASS_B,       /* w */ CCLASS_B,
160  /* x */ CCLASS_C,        /* y */ CCLASS_Y,       /* z */ CCLASS_C,
161  /* { */ CCLASS_OTHER,    /* | */ CCLASS_OTHER,   /* } */ CCLASS_OTHER,
162  /* ~ */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,
163 };
164 
165 /*
166 ** Mapping from the character class number (0-13) to a symbol for each
167 ** character class.  Note that initClass[] can be used to map the class
168 ** symbol back into the class number.
169 */
170 static const unsigned char className[] = ".ABCDHLRMY9 ?";
171 
172 /*
173 ** Generate a "phonetic hash" from a string of ASCII characters
174 ** in zIn[0..nIn-1].
175 **
176 **   * Map characters by character class as defined above.
177 **   * Omit double-letters
178 **   * Omit vowels beside R and L
179 **   * Omit T when followed by CH
180 **   * Omit W when followed by R
181 **   * Omit D when followed by J or G
182 **   * Omit K in KN or G in GN at the beginning of a word
183 **
184 ** Space to hold the result is obtained from sqlite3_malloc()
185 **
186 ** Return NULL if memory allocation fails.
187 */
188 static unsigned char *phoneticHash(const unsigned char *zIn, int nIn){
189   unsigned char *zOut = sqlite3_malloc64( nIn + 1 );
190   int i;
191   int nOut = 0;
192   char cPrev = 0x77;
193   char cPrevX = 0x77;
194   const unsigned char *aClass = initClass;
195 
196   if( zOut==0 ) return 0;
197   if( nIn>2 ){
198     switch( zIn[0] ){
199       case 'g':
200       case 'k': {
201         if( zIn[1]=='n' ){ zIn++; nIn--; }
202         break;
203       }
204     }
205   }
206   for(i=0; i<nIn; i++){
207     unsigned char c = zIn[i];
208     if( i+1<nIn ){
209       if( c=='w' && zIn[i+1]=='r' ) continue;
210       if( c=='d' && (zIn[i+1]=='j' || zIn[i+1]=='g') ) continue;
211       if( i+2<nIn ){
212         if( c=='t' && zIn[i+1]=='c' && zIn[i+2]=='h' ) continue;
213       }
214     }
215     c = aClass[c&0x7f];
216     if( c==CCLASS_SPACE ) continue;
217     if( c==CCLASS_OTHER && cPrev!=CCLASS_DIGIT ) continue;
218     aClass = midClass;
219     if( c==CCLASS_VOWEL && (cPrevX==CCLASS_R || cPrevX==CCLASS_L) ){
220        continue; /* No vowels beside L or R */
221     }
222     if( (c==CCLASS_R || c==CCLASS_L) && cPrevX==CCLASS_VOWEL ){
223        nOut--;   /* No vowels beside L or R */
224     }
225     cPrev = c;
226     if( c==CCLASS_SILENT ) continue;
227     cPrevX = c;
228     c = className[c];
229     assert( nOut>=0 );
230     if( nOut==0 || c!=zOut[nOut-1] ) zOut[nOut++] = c;
231   }
232   zOut[nOut] = 0;
233   return zOut;
234 }
235 
236 /*
237 ** This is an SQL function wrapper around phoneticHash().  See
238 ** the description of phoneticHash() for additional information.
239 */
240 static void phoneticHashSqlFunc(
241   sqlite3_context *context,
242   int argc,
243   sqlite3_value **argv
244 ){
245   const unsigned char *zIn;
246   unsigned char *zOut;
247 
248   zIn = sqlite3_value_text(argv[0]);
249   if( zIn==0 ) return;
250   zOut = phoneticHash(zIn, sqlite3_value_bytes(argv[0]));
251   if( zOut==0 ){
252     sqlite3_result_error_nomem(context);
253   }else{
254     sqlite3_result_text(context, (char*)zOut, -1, sqlite3_free);
255   }
256 }
257 
258 /*
259 ** Return the character class number for a character given its
260 ** context.
261 */
262 static char characterClass(char cPrev, char c){
263   return cPrev==0 ? initClass[c&0x7f] : midClass[c&0x7f];
264 }
265 
266 /*
267 ** Return the cost of inserting or deleting character c immediately
268 ** following character cPrev.  If cPrev==0, that means c is the first
269 ** character of the word.
270 */
271 static int insertOrDeleteCost(char cPrev, char c, char cNext){
272   char classC = characterClass(cPrev, c);
273   char classCprev;
274 
275   if( classC==CCLASS_SILENT ){
276     /* Insert or delete "silent" characters such as H or W */
277     return 1;
278   }
279   if( cPrev==c ){
280     /* Repeated characters, or miss a repeat */
281     return 10;
282   }
283   if( classC==CCLASS_VOWEL && (cPrev=='r' || cNext=='r') ){
284     return 20;  /* Insert a vowel before or after 'r' */
285   }
286   classCprev = characterClass(cPrev, cPrev);
287   if( classC==classCprev ){
288     if( classC==CCLASS_VOWEL ){
289       /* Remove or add a new vowel to a vowel cluster */
290       return 15;
291     }else{
292       /* Remove or add a consonant not in the same class */
293       return 50;
294     }
295   }
296 
297   /* any other character insertion or deletion */
298   return 100;
299 }
300 
301 /*
302 ** Divide the insertion cost by this factor when appending to the
303 ** end of the word.
304 */
305 #define FINAL_INS_COST_DIV  4
306 
307 /*
308 ** Return the cost of substituting cTo in place of cFrom assuming
309 ** the previous character is cPrev.  If cPrev==0 then cTo is the first
310 ** character of the word.
311 */
312 static int substituteCost(char cPrev, char cFrom, char cTo){
313   char classFrom, classTo;
314   if( cFrom==cTo ){
315     /* Exact match */
316     return 0;
317   }
318   if( cFrom==(cTo^0x20) && ((cTo>='A' && cTo<='Z') || (cTo>='a' && cTo<='z')) ){
319     /* differ only in case */
320     return 0;
321   }
322   classFrom = characterClass(cPrev, cFrom);
323   classTo = characterClass(cPrev, cTo);
324   if( classFrom==classTo ){
325     /* Same character class */
326     return 40;
327   }
328   if( classFrom>=CCLASS_B && classFrom<=CCLASS_Y
329       && classTo>=CCLASS_B && classTo<=CCLASS_Y ){
330     /* Convert from one consonant to another, but in a different class */
331     return 75;
332   }
333   /* Any other subsitution */
334   return 100;
335 }
336 
337 /*
338 ** Given two strings zA and zB which are pure ASCII, return the cost
339 ** of transforming zA into zB.  If zA ends with '*' assume that it is
340 ** a prefix of zB and give only minimal penalty for extra characters
341 ** on the end of zB.
342 **
343 ** Smaller numbers mean a closer match.
344 **
345 ** Negative values indicate an error:
346 **    -1  One of the inputs is NULL
347 **    -2  Non-ASCII characters on input
348 **    -3  Unable to allocate memory
349 **
350 ** If pnMatch is not NULL, then *pnMatch is set to the number of bytes
351 ** of zB that matched the pattern in zA. If zA does not end with a '*',
352 ** then this value is always the number of bytes in zB (i.e. strlen(zB)).
353 ** If zA does end in a '*', then it is the number of bytes in the prefix
354 ** of zB that was deemed to match zA.
355 */
356 static int editdist1(const char *zA, const char *zB, int *pnMatch){
357   int nA, nB;            /* Number of characters in zA[] and zB[] */
358   int xA, xB;            /* Loop counters for zA[] and zB[] */
359   char cA = 0, cB;       /* Current character of zA and zB */
360   char cAprev, cBprev;   /* Previous character of zA and zB */
361   char cAnext, cBnext;   /* Next character in zA and zB */
362   int d;                 /* North-west cost value */
363   int dc = 0;            /* North-west character value */
364   int res;               /* Final result */
365   int *m;                /* The cost matrix */
366   char *cx;              /* Corresponding character values */
367   int *toFree = 0;       /* Malloced space */
368   int nMatch = 0;
369   int mStack[60+15];     /* Stack space to use if not too much is needed */
370 
371   /* Early out if either input is NULL */
372   if( zA==0 || zB==0 ) return -1;
373 
374   /* Skip any common prefix */
375   while( zA[0] && zA[0]==zB[0] ){ dc = zA[0]; zA++; zB++; nMatch++; }
376   if( pnMatch ) *pnMatch = nMatch;
377   if( zA[0]==0 && zB[0]==0 ) return 0;
378 
379 #if 0
380   printf("A=\"%s\" B=\"%s\" dc=%c\n", zA, zB, dc?dc:' ');
381 #endif
382 
383   /* Verify input strings and measure their lengths */
384   for(nA=0; zA[nA]; nA++){
385     if( zA[nA]&0x80 ) return -2;
386   }
387   for(nB=0; zB[nB]; nB++){
388     if( zB[nB]&0x80 ) return -2;
389   }
390 
391   /* Special processing if either string is empty */
392   if( nA==0 ){
393     cBprev = (char)dc;
394     for(xB=res=0; (cB = zB[xB])!=0; xB++){
395       res += insertOrDeleteCost(cBprev, cB, zB[xB+1])/FINAL_INS_COST_DIV;
396       cBprev = cB;
397     }
398     return res;
399   }
400   if( nB==0 ){
401     cAprev = (char)dc;
402     for(xA=res=0; (cA = zA[xA])!=0; xA++){
403       res += insertOrDeleteCost(cAprev, cA, zA[xA+1]);
404       cAprev = cA;
405     }
406     return res;
407   }
408 
409   /* A is a prefix of B */
410   if( zA[0]=='*' && zA[1]==0 ) return 0;
411 
412   /* Allocate and initialize the Wagner matrix */
413   if( nB<(sizeof(mStack)*4)/(sizeof(mStack[0])*5) ){
414     m = mStack;
415   }else{
416     m = toFree = sqlite3_malloc64( (nB+1)*5*sizeof(m[0])/4 );
417     if( m==0 ) return -3;
418   }
419   cx = (char*)&m[nB+1];
420 
421   /* Compute the Wagner edit distance */
422   m[0] = 0;
423   cx[0] = (char)dc;
424   cBprev = (char)dc;
425   for(xB=1; xB<=nB; xB++){
426     cBnext = zB[xB];
427     cB = zB[xB-1];
428     cx[xB] = cB;
429     m[xB] = m[xB-1] + insertOrDeleteCost(cBprev, cB, cBnext);
430     cBprev = cB;
431   }
432   cAprev = (char)dc;
433   for(xA=1; xA<=nA; xA++){
434     int lastA = (xA==nA);
435     cA = zA[xA-1];
436     cAnext = zA[xA];
437     if( cA=='*' && lastA ) break;
438     d = m[0];
439     dc = cx[0];
440     m[0] = d + insertOrDeleteCost(cAprev, cA, cAnext);
441     cBprev = 0;
442     for(xB=1; xB<=nB; xB++){
443       int totalCost, insCost, delCost, subCost, ncx;
444       cB = zB[xB-1];
445       cBnext = zB[xB];
446 
447       /* Cost to insert cB */
448       insCost = insertOrDeleteCost(cx[xB-1], cB, cBnext);
449       if( lastA ) insCost /= FINAL_INS_COST_DIV;
450 
451       /* Cost to delete cA */
452       delCost = insertOrDeleteCost(cx[xB], cA, cBnext);
453 
454       /* Cost to substitute cA->cB */
455       subCost = substituteCost(cx[xB-1], cA, cB);
456 
457       /* Best cost */
458       totalCost = insCost + m[xB-1];
459       ncx = cB;
460       if( (delCost + m[xB])<totalCost ){
461         totalCost = delCost + m[xB];
462         ncx = cA;
463       }
464       if( (subCost + d)<totalCost ){
465         totalCost = subCost + d;
466       }
467 
468 #if 0
469       printf("%d,%d d=%4d u=%4d r=%4d dc=%c cA=%c cB=%c"
470              " ins=%4d del=%4d sub=%4d t=%4d ncx=%c\n",
471              xA, xB, d, m[xB], m[xB-1], dc?dc:' ', cA, cB,
472              insCost, delCost, subCost, totalCost, ncx?ncx:' ');
473 #endif
474 
475       /* Update the matrix */
476       d = m[xB];
477       dc = cx[xB];
478       m[xB] = totalCost;
479       cx[xB] = (char)ncx;
480       cBprev = cB;
481     }
482     cAprev = cA;
483   }
484 
485   /* Free the wagner matrix and return the result */
486   if( cA=='*' ){
487     res = m[1];
488     for(xB=1; xB<=nB; xB++){
489       if( m[xB]<res ){
490         res = m[xB];
491         if( pnMatch ) *pnMatch = xB+nMatch;
492       }
493     }
494   }else{
495     res = m[nB];
496     /* In the current implementation, pnMatch is always NULL if zA does
497     ** not end in "*" */
498     assert( pnMatch==0 );
499   }
500   sqlite3_free(toFree);
501   return res;
502 }
503 
504 /*
505 ** Function:    editdist(A,B)
506 **
507 ** Return the cost of transforming string A into string B.  Both strings
508 ** must be pure ASCII text.  If A ends with '*' then it is assumed to be
509 ** a prefix of B and extra characters on the end of B have minimal additional
510 ** cost.
511 */
512 static void editdistSqlFunc(
513   sqlite3_context *context,
514   int argc,
515   sqlite3_value **argv
516 ){
517   int res = editdist1(
518                     (const char*)sqlite3_value_text(argv[0]),
519                     (const char*)sqlite3_value_text(argv[1]),
520                     0);
521   if( res<0 ){
522     if( res==(-3) ){
523       sqlite3_result_error_nomem(context);
524     }else if( res==(-2) ){
525       sqlite3_result_error(context, "non-ASCII input to editdist()", -1);
526     }else{
527       sqlite3_result_error(context, "NULL input to editdist()", -1);
528     }
529   }else{
530     sqlite3_result_int(context, res);
531   }
532 }
533 
534 /* End of the fixed-cost edit distance implementation
535 ******************************************************************************
536 *****************************************************************************
537 ** Begin: Configurable cost unicode edit distance routines
538 */
539 /* Forward declaration of structures */
540 typedef struct EditDist3Cost EditDist3Cost;
541 typedef struct EditDist3Config EditDist3Config;
542 typedef struct EditDist3Point EditDist3Point;
543 typedef struct EditDist3From EditDist3From;
544 typedef struct EditDist3FromString EditDist3FromString;
545 typedef struct EditDist3To EditDist3To;
546 typedef struct EditDist3ToString EditDist3ToString;
547 typedef struct EditDist3Lang EditDist3Lang;
548 
549 
550 /*
551 ** An entry in the edit cost table
552 */
553 struct EditDist3Cost {
554   EditDist3Cost *pNext;     /* Next cost element */
555   u8 nFrom;                 /* Number of bytes in aFrom */
556   u8 nTo;                   /* Number of bytes in aTo */
557   u16 iCost;                /* Cost of this transformation */
558   char a[4]    ;            /* FROM string followed by TO string */
559   /* Additional TO and FROM string bytes appended as necessary */
560 };
561 
562 /*
563 ** Edit costs for a particular language ID
564 */
565 struct EditDist3Lang {
566   int iLang;             /* Language ID */
567   int iInsCost;          /* Default insertion cost */
568   int iDelCost;          /* Default deletion cost */
569   int iSubCost;          /* Default substitution cost */
570   EditDist3Cost *pCost;  /* Costs */
571 };
572 
573 
574 /*
575 ** The default EditDist3Lang object, with default costs.
576 */
577 static const EditDist3Lang editDist3Lang = { 0, 100, 100, 150, 0 };
578 
579 /*
580 ** Complete configuration
581 */
582 struct EditDist3Config {
583   int nLang;             /* Number of language IDs.  Size of a[] */
584   EditDist3Lang *a;      /* One for each distinct language ID */
585 };
586 
587 /*
588 ** Extra information about each character in the FROM string.
589 */
590 struct EditDist3From {
591   int nSubst;              /* Number of substitution cost entries */
592   int nDel;                /* Number of deletion cost entries */
593   int nByte;               /* Number of bytes in this character */
594   EditDist3Cost **apSubst; /* Array of substitution costs for this element */
595   EditDist3Cost **apDel;   /* Array of deletion cost entries */
596 };
597 
598 /*
599 ** A precompiled FROM string.
600 *
601 ** In the common case we expect the FROM string to be reused multiple times.
602 ** In other words, the common case will be to measure the edit distance
603 ** from a single origin string to multiple target strings.
604 */
605 struct EditDist3FromString {
606   char *z;                 /* The complete text of the FROM string */
607   int n;                   /* Number of characters in the FROM string */
608   int isPrefix;            /* True if ends with '*' character */
609   EditDist3From *a;        /* Extra info about each char of the FROM string */
610 };
611 
612 /*
613 ** Extra information about each character in the TO string.
614 */
615 struct EditDist3To {
616   int nIns;                /* Number of insertion cost entries */
617   int nByte;               /* Number of bytes in this character */
618   EditDist3Cost **apIns;   /* Array of deletion cost entries */
619 };
620 
621 /*
622 ** A precompiled FROM string
623 */
624 struct EditDist3ToString {
625   char *z;                 /* The complete text of the TO string */
626   int n;                   /* Number of characters in the TO string */
627   EditDist3To *a;          /* Extra info about each char of the TO string */
628 };
629 
630 /*
631 ** Clear or delete an instance of the object that records all edit-distance
632 ** weights.
633 */
634 static void editDist3ConfigClear(EditDist3Config *p){
635   int i;
636   if( p==0 ) return;
637   for(i=0; i<p->nLang; i++){
638     EditDist3Cost *pCost, *pNext;
639     pCost = p->a[i].pCost;
640     while( pCost ){
641       pNext = pCost->pNext;
642       sqlite3_free(pCost);
643       pCost = pNext;
644     }
645   }
646   sqlite3_free(p->a);
647   memset(p, 0, sizeof(*p));
648 }
649 static void editDist3ConfigDelete(void *pIn){
650   EditDist3Config *p = (EditDist3Config*)pIn;
651   editDist3ConfigClear(p);
652   sqlite3_free(p);
653 }
654 
655 /*
656 ** Load all edit-distance weights from a table.
657 */
658 static int editDist3ConfigLoad(
659   EditDist3Config *p,      /* The edit distance configuration to load */
660   sqlite3 *db,            /* Load from this database */
661   const char *zTable      /* Name of the table from which to load */
662 ){
663   sqlite3_stmt *pStmt;
664   int rc, rc2;
665   char *zSql;
666   int iLangPrev = -9999;
667   EditDist3Lang *pLang = 0;
668 
669   zSql = sqlite3_mprintf("SELECT iLang, cFrom, cTo, iCost"
670                          " FROM \"%w\" WHERE iLang>=0 ORDER BY iLang", zTable);
671   if( zSql==0 ) return SQLITE_NOMEM;
672   rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
673   sqlite3_free(zSql);
674   if( rc ) return rc;
675   editDist3ConfigClear(p);
676   while( sqlite3_step(pStmt)==SQLITE_ROW ){
677     int iLang = sqlite3_column_int(pStmt, 0);
678     const char *zFrom = (const char*)sqlite3_column_text(pStmt, 1);
679     int nFrom = zFrom ? sqlite3_column_bytes(pStmt, 1) : 0;
680     const char *zTo = (const char*)sqlite3_column_text(pStmt, 2);
681     int nTo = zTo ? sqlite3_column_bytes(pStmt, 2) : 0;
682     int iCost = sqlite3_column_int(pStmt, 3);
683 
684     assert( zFrom!=0 || nFrom==0 );
685     assert( zTo!=0 || nTo==0 );
686     if( nFrom>100 || nTo>100 ) continue;
687     if( iCost<0 ) continue;
688     if( pLang==0 || iLang!=iLangPrev ){
689       EditDist3Lang *pNew;
690       pNew = sqlite3_realloc64(p->a, (p->nLang+1)*sizeof(p->a[0]));
691       if( pNew==0 ){ rc = SQLITE_NOMEM; break; }
692       p->a = pNew;
693       pLang = &p->a[p->nLang];
694       p->nLang++;
695       pLang->iLang = iLang;
696       pLang->iInsCost = 100;
697       pLang->iDelCost = 100;
698       pLang->iSubCost = 150;
699       pLang->pCost = 0;
700       iLangPrev = iLang;
701     }
702     if( nFrom==1 && zFrom[0]=='?' && nTo==0 ){
703       pLang->iDelCost = iCost;
704     }else if( nFrom==0 && nTo==1 && zTo[0]=='?' ){
705       pLang->iInsCost = iCost;
706     }else if( nFrom==1 && nTo==1 && zFrom[0]=='?' && zTo[0]=='?' ){
707       pLang->iSubCost = iCost;
708     }else{
709       EditDist3Cost *pCost;
710       int nExtra = nFrom + nTo - 4;
711       if( nExtra<0 ) nExtra = 0;
712       pCost = sqlite3_malloc64( sizeof(*pCost) + nExtra );
713       if( pCost==0 ){ rc = SQLITE_NOMEM; break; }
714       pCost->nFrom = (u8)nFrom;
715       pCost->nTo = (u8)nTo;
716       pCost->iCost = (u16)iCost;
717       memcpy(pCost->a, zFrom, nFrom);
718       memcpy(pCost->a + nFrom, zTo, nTo);
719       pCost->pNext = pLang->pCost;
720       pLang->pCost = pCost;
721     }
722   }
723   rc2 = sqlite3_finalize(pStmt);
724   if( rc==SQLITE_OK ) rc = rc2;
725   return rc;
726 }
727 
728 /*
729 ** Return the length (in bytes) of a utf-8 character.  Or return a maximum
730 ** of N.
731 */
732 static int utf8Len(unsigned char c, int N){
733   int len = 1;
734   if( c>0x7f ){
735     if( (c&0xe0)==0xc0 ){
736       len = 2;
737     }else if( (c&0xf0)==0xe0 ){
738       len = 3;
739     }else{
740       len = 4;
741     }
742   }
743   if( len>N ) len = N;
744   return len;
745 }
746 
747 /*
748 ** Return TRUE (non-zero) if the To side of the given cost matches
749 ** the given string.
750 */
751 static int matchTo(EditDist3Cost *p, const char *z, int n){
752   if( p->nTo>n ) return 0;
753   if( strncmp(p->a+p->nFrom, z, p->nTo)!=0 ) return 0;
754   return 1;
755 }
756 
757 /*
758 ** Return TRUE (non-zero) if the From side of the given cost matches
759 ** the given string.
760 */
761 static int matchFrom(EditDist3Cost *p, const char *z, int n){
762   assert( p->nFrom<=n );
763   if( strncmp(p->a, z, p->nFrom)!=0 ) return 0;
764   return 1;
765 }
766 
767 /*
768 ** Return TRUE (non-zero) of the next FROM character and the next TO
769 ** character are the same.
770 */
771 static int matchFromTo(
772   EditDist3FromString *pStr,  /* Left hand string */
773   int n1,                     /* Index of comparison character on the left */
774   const char *z2,             /* Right-handl comparison character */
775   int n2                      /* Bytes remaining in z2[] */
776 ){
777   int b1 = pStr->a[n1].nByte;
778   if( b1>n2 ) return 0;
779   if( memcmp(pStr->z+n1, z2, b1)!=0 ) return 0;
780   return 1;
781 }
782 
783 /*
784 ** Delete an EditDist3FromString objecct
785 */
786 static void editDist3FromStringDelete(EditDist3FromString *p){
787   int i;
788   if( p ){
789     for(i=0; i<p->n; i++){
790       sqlite3_free(p->a[i].apDel);
791       sqlite3_free(p->a[i].apSubst);
792     }
793     sqlite3_free(p);
794   }
795 }
796 
797 /*
798 ** Create a EditDist3FromString object.
799 */
800 static EditDist3FromString *editDist3FromStringNew(
801   const EditDist3Lang *pLang,
802   const char *z,
803   int n
804 ){
805   EditDist3FromString *pStr;
806   EditDist3Cost *p;
807   int i;
808 
809   if( z==0 ) return 0;
810   if( n<0 ) n = (int)strlen(z);
811   pStr = sqlite3_malloc64( sizeof(*pStr) + sizeof(pStr->a[0])*n + n + 1 );
812   if( pStr==0 ) return 0;
813   pStr->a = (EditDist3From*)&pStr[1];
814   memset(pStr->a, 0, sizeof(pStr->a[0])*n);
815   pStr->n = n;
816   pStr->z = (char*)&pStr->a[n];
817   memcpy(pStr->z, z, n+1);
818   if( n && z[n-1]=='*' ){
819     pStr->isPrefix = 1;
820     n--;
821     pStr->n--;
822     pStr->z[n] = 0;
823   }else{
824     pStr->isPrefix = 0;
825   }
826 
827   for(i=0; i<n; i++){
828     EditDist3From *pFrom = &pStr->a[i];
829     memset(pFrom, 0, sizeof(*pFrom));
830     pFrom->nByte = utf8Len((unsigned char)z[i], n-i);
831     for(p=pLang->pCost; p; p=p->pNext){
832       EditDist3Cost **apNew;
833       if( i+p->nFrom>n ) continue;
834       if( matchFrom(p, z+i, n-i)==0 ) continue;
835       if( p->nTo==0 ){
836         apNew = sqlite3_realloc64(pFrom->apDel,
837                                 sizeof(*apNew)*(pFrom->nDel+1));
838         if( apNew==0 ) break;
839         pFrom->apDel = apNew;
840         apNew[pFrom->nDel++] = p;
841       }else{
842         apNew = sqlite3_realloc64(pFrom->apSubst,
843                                 sizeof(*apNew)*(pFrom->nSubst+1));
844         if( apNew==0 ) break;
845         pFrom->apSubst = apNew;
846         apNew[pFrom->nSubst++] = p;
847       }
848     }
849     if( p ){
850       editDist3FromStringDelete(pStr);
851       pStr = 0;
852       break;
853     }
854   }
855   return pStr;
856 }
857 
858 /*
859 ** Update entry m[i] such that it is the minimum of its current value
860 ** and m[j]+iCost.
861 **
862 ** If the iCost is 1,000,000 or greater, then consider the cost to be
863 ** infinite and skip the update.
864 */
865 static void updateCost(
866   unsigned int *m,
867   int i,
868   int j,
869   int iCost
870 ){
871   assert( iCost>=0 );
872   if( iCost<10000 ){
873     unsigned int b = m[j] + iCost;
874     if( b<m[i] ) m[i] = b;
875   }
876 }
877 
878 /*
879 ** How much stack space (int bytes) to use for Wagner matrix in
880 ** editDist3Core().  If more space than this is required, the entire
881 ** matrix is taken from the heap.  To reduce the load on the memory
882 ** allocator, make this value as large as practical for the
883 ** architecture in use.
884 */
885 #ifndef SQLITE_SPELLFIX_STACKALLOC_SZ
886 # define SQLITE_SPELLFIX_STACKALLOC_SZ  (1024)
887 #endif
888 
889 /* Compute the edit distance between two strings.
890 **
891 ** If an error occurs, return a negative number which is the error code.
892 **
893 ** If pnMatch is not NULL, then *pnMatch is set to the number of characters
894 ** (not bytes) in z2 that matched the search pattern in *pFrom. If pFrom does
895 ** not contain the pattern for a prefix-search, then this is always the number
896 ** of characters in z2. If pFrom does contain a prefix search pattern, then
897 ** it is the number of characters in the prefix of z2 that was deemed to
898 ** match pFrom.
899 */
900 static int editDist3Core(
901   EditDist3FromString *pFrom,  /* The FROM string */
902   const char *z2,              /* The TO string */
903   int n2,                      /* Length of the TO string */
904   const EditDist3Lang *pLang,  /* Edit weights for a particular language ID */
905   int *pnMatch                 /* OUT: Characters in matched prefix */
906 ){
907   int k, n;
908   int i1, b1;
909   int i2, b2;
910   EditDist3FromString f = *pFrom;
911   EditDist3To *a2;
912   unsigned int *m;
913   unsigned int *pToFree;
914   int szRow;
915   EditDist3Cost *p;
916   int res;
917   sqlite3_uint64 nByte;
918   unsigned int stackSpace[SQLITE_SPELLFIX_STACKALLOC_SZ/sizeof(unsigned int)];
919 
920   /* allocate the Wagner matrix and the aTo[] array for the TO string */
921   n = (f.n+1)*(n2+1);
922   n = (n+1)&~1;
923   nByte = n*sizeof(m[0]) + sizeof(a2[0])*n2;
924   if( nByte<=sizeof(stackSpace) ){
925     m = stackSpace;
926     pToFree = 0;
927   }else{
928     m = pToFree = sqlite3_malloc64( nByte );
929     if( m==0 ) return -1;            /* Out of memory */
930   }
931   a2 = (EditDist3To*)&m[n];
932   memset(a2, 0, sizeof(a2[0])*n2);
933 
934   /* Fill in the a1[] matrix for all characters of the TO string */
935   for(i2=0; i2<n2; i2++){
936     a2[i2].nByte = utf8Len((unsigned char)z2[i2], n2-i2);
937     for(p=pLang->pCost; p; p=p->pNext){
938       EditDist3Cost **apNew;
939       if( p->nFrom>0 ) continue;
940       if( i2+p->nTo>n2 ) continue;
941       if( matchTo(p, z2+i2, n2-i2)==0 ) continue;
942       a2[i2].nIns++;
943       apNew = sqlite3_realloc64(a2[i2].apIns, sizeof(*apNew)*a2[i2].nIns);
944       if( apNew==0 ){
945         res = -1;  /* Out of memory */
946         goto editDist3Abort;
947       }
948       a2[i2].apIns = apNew;
949       a2[i2].apIns[a2[i2].nIns-1] = p;
950     }
951   }
952 
953   /* Prepare to compute the minimum edit distance */
954   szRow = f.n+1;
955   memset(m, 0x01, (n2+1)*szRow*sizeof(m[0]));
956   m[0] = 0;
957 
958   /* First fill in the top-row of the matrix with FROM deletion costs */
959   for(i1=0; i1<f.n; i1 += b1){
960     b1 = f.a[i1].nByte;
961     updateCost(m, i1+b1, i1, pLang->iDelCost);
962     for(k=0; k<f.a[i1].nDel; k++){
963       p = f.a[i1].apDel[k];
964       updateCost(m, i1+p->nFrom, i1, p->iCost);
965     }
966   }
967 
968   /* Fill in all subsequent rows, top-to-bottom, left-to-right */
969   for(i2=0; i2<n2; i2 += b2){
970     int rx;      /* Starting index for current row */
971     int rxp;     /* Starting index for previous row */
972     b2 = a2[i2].nByte;
973     rx = szRow*(i2+b2);
974     rxp = szRow*i2;
975     updateCost(m, rx, rxp, pLang->iInsCost);
976     for(k=0; k<a2[i2].nIns; k++){
977       p = a2[i2].apIns[k];
978       updateCost(m, szRow*(i2+p->nTo), rxp, p->iCost);
979     }
980     for(i1=0; i1<f.n; i1+=b1){
981       int cx;    /* Index of current cell */
982       int cxp;   /* Index of cell immediately to the left */
983       int cxd;   /* Index of cell to the left and one row above */
984       int cxu;   /* Index of cell immediately above */
985       b1 = f.a[i1].nByte;
986       cxp = rx + i1;
987       cx = cxp + b1;
988       cxd = rxp + i1;
989       cxu = cxd + b1;
990       updateCost(m, cx, cxp, pLang->iDelCost);
991       for(k=0; k<f.a[i1].nDel; k++){
992         p = f.a[i1].apDel[k];
993         updateCost(m, cxp+p->nFrom, cxp, p->iCost);
994       }
995       updateCost(m, cx, cxu, pLang->iInsCost);
996       if( matchFromTo(&f, i1, z2+i2, n2-i2) ){
997         updateCost(m, cx, cxd, 0);
998       }
999       updateCost(m, cx, cxd, pLang->iSubCost);
1000       for(k=0; k<f.a[i1].nSubst; k++){
1001         p = f.a[i1].apSubst[k];
1002         if( matchTo(p, z2+i2, n2-i2) ){
1003           updateCost(m, cxd+p->nFrom+szRow*p->nTo, cxd, p->iCost);
1004         }
1005       }
1006     }
1007   }
1008 
1009 #if 0  /* Enable for debugging */
1010   printf("         ^");
1011   for(i1=0; i1<f.n; i1++) printf(" %c-%2x", f.z[i1], f.z[i1]&0xff);
1012   printf("\n   ^:");
1013   for(i1=0; i1<szRow; i1++){
1014     int v = m[i1];
1015     if( v>9999 ) printf(" ****");
1016     else         printf(" %4d", v);
1017   }
1018   printf("\n");
1019   for(i2=0; i2<n2; i2++){
1020     printf("%c-%02x:", z2[i2], z2[i2]&0xff);
1021     for(i1=0; i1<szRow; i1++){
1022       int v = m[(i2+1)*szRow+i1];
1023       if( v>9999 ) printf(" ****");
1024       else         printf(" %4d", v);
1025     }
1026     printf("\n");
1027   }
1028 #endif
1029 
1030   /* Free memory allocations and return the result */
1031   res = (int)m[szRow*(n2+1)-1];
1032   n = n2;
1033   if( f.isPrefix ){
1034     for(i2=1; i2<=n2; i2++){
1035       int b = m[szRow*i2-1];
1036       if( b<=res ){
1037         res = b;
1038         n = i2 - 1;
1039       }
1040     }
1041   }
1042   if( pnMatch ){
1043     int nExtra = 0;
1044     for(k=0; k<n; k++){
1045       if( (z2[k] & 0xc0)==0x80 ) nExtra++;
1046     }
1047     *pnMatch = n - nExtra;
1048   }
1049 
1050 editDist3Abort:
1051   for(i2=0; i2<n2; i2++) sqlite3_free(a2[i2].apIns);
1052   sqlite3_free(pToFree);
1053   return res;
1054 }
1055 
1056 /*
1057 ** Get an appropriate EditDist3Lang object.
1058 */
1059 static const EditDist3Lang *editDist3FindLang(
1060   EditDist3Config *pConfig,
1061   int iLang
1062 ){
1063   int i;
1064   for(i=0; i<pConfig->nLang; i++){
1065     if( pConfig->a[i].iLang==iLang ) return &pConfig->a[i];
1066   }
1067   return &editDist3Lang;
1068 }
1069 
1070 /*
1071 ** Function:    editdist3(A,B,iLang)
1072 **              editdist3(tablename)
1073 **
1074 ** Return the cost of transforming string A into string B using edit
1075 ** weights for iLang.
1076 **
1077 ** The second form loads edit weights into memory from a table.
1078 */
1079 static void editDist3SqlFunc(
1080   sqlite3_context *context,
1081   int argc,
1082   sqlite3_value **argv
1083 ){
1084   EditDist3Config *pConfig = (EditDist3Config*)sqlite3_user_data(context);
1085   sqlite3 *db = sqlite3_context_db_handle(context);
1086   int rc;
1087   if( argc==1 ){
1088     const char *zTable = (const char*)sqlite3_value_text(argv[0]);
1089     rc = editDist3ConfigLoad(pConfig, db, zTable);
1090     if( rc ) sqlite3_result_error_code(context, rc);
1091   }else{
1092     const char *zA = (const char*)sqlite3_value_text(argv[0]);
1093     const char *zB = (const char*)sqlite3_value_text(argv[1]);
1094     int nA = sqlite3_value_bytes(argv[0]);
1095     int nB = sqlite3_value_bytes(argv[1]);
1096     int iLang = argc==3 ? sqlite3_value_int(argv[2]) : 0;
1097     const EditDist3Lang *pLang = editDist3FindLang(pConfig, iLang);
1098     EditDist3FromString *pFrom;
1099     int dist;
1100 
1101     pFrom = editDist3FromStringNew(pLang, zA, nA);
1102     if( pFrom==0 ){
1103       sqlite3_result_error_nomem(context);
1104       return;
1105     }
1106     dist = editDist3Core(pFrom, zB, nB, pLang, 0);
1107     editDist3FromStringDelete(pFrom);
1108     if( dist==(-1) ){
1109       sqlite3_result_error_nomem(context);
1110     }else{
1111       sqlite3_result_int(context, dist);
1112     }
1113   }
1114 }
1115 
1116 /*
1117 ** Register the editDist3 function with SQLite
1118 */
1119 static int editDist3Install(sqlite3 *db){
1120   int rc;
1121   EditDist3Config *pConfig = sqlite3_malloc64( sizeof(*pConfig) );
1122   if( pConfig==0 ) return SQLITE_NOMEM;
1123   memset(pConfig, 0, sizeof(*pConfig));
1124   rc = sqlite3_create_function_v2(db, "editdist3",
1125               2, SQLITE_UTF8|SQLITE_DETERMINISTIC, pConfig,
1126               editDist3SqlFunc, 0, 0, 0);
1127   if( rc==SQLITE_OK ){
1128     rc = sqlite3_create_function_v2(db, "editdist3",
1129                 3, SQLITE_UTF8|SQLITE_DETERMINISTIC, pConfig,
1130                 editDist3SqlFunc, 0, 0, 0);
1131   }
1132   if( rc==SQLITE_OK ){
1133     rc = sqlite3_create_function_v2(db, "editdist3",
1134                 1, SQLITE_UTF8|SQLITE_DETERMINISTIC, pConfig,
1135                 editDist3SqlFunc, 0, 0, editDist3ConfigDelete);
1136   }else{
1137     sqlite3_free(pConfig);
1138   }
1139   return rc;
1140 }
1141 /* End configurable cost unicode edit distance routines
1142 ******************************************************************************
1143 ******************************************************************************
1144 ** Begin transliterate unicode-to-ascii implementation
1145 */
1146 
1147 #if !SQLITE_AMALGAMATION
1148 /*
1149 ** This lookup table is used to help decode the first byte of
1150 ** a multi-byte UTF8 character.
1151 */
1152 static const unsigned char sqlite3Utf8Trans1[] = {
1153   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1154   0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1155   0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
1156   0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
1157   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1158   0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1159   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1160   0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
1161 };
1162 #endif
1163 
1164 /*
1165 ** Return the value of the first UTF-8 character in the string.
1166 */
1167 static int utf8Read(const unsigned char *z, int n, int *pSize){
1168   int c, i;
1169 
1170   /* All callers to this routine (in the current implementation)
1171   ** always have n>0. */
1172   if( NEVER(n==0) ){
1173     c = i = 0;
1174   }else{
1175     c = z[0];
1176     i = 1;
1177     if( c>=0xc0 ){
1178       c = sqlite3Utf8Trans1[c-0xc0];
1179       while( i<n && (z[i] & 0xc0)==0x80 ){
1180         c = (c<<6) + (0x3f & z[i++]);
1181       }
1182     }
1183   }
1184   *pSize = i;
1185   return c;
1186 }
1187 
1188 /*
1189 ** Return the number of characters in the utf-8 string in the nIn byte
1190 ** buffer pointed to by zIn.
1191 */
1192 static int utf8Charlen(const char *zIn, int nIn){
1193   int i;
1194   int nChar = 0;
1195   for(i=0; i<nIn; nChar++){
1196     int sz;
1197     utf8Read((const unsigned char *)&zIn[i], nIn-i, &sz);
1198     i += sz;
1199   }
1200   return nChar;
1201 }
1202 
1203 typedef struct Transliteration Transliteration;
1204 struct Transliteration {
1205  unsigned short int cFrom;
1206  unsigned char cTo0, cTo1, cTo2, cTo3;
1207 };
1208 
1209 /*
1210 ** Table of translations from unicode characters into ASCII.
1211 */
1212 static const Transliteration translit[] = {
1213   { 0x00A0,  0x20, 0x00, 0x00, 0x00 },  /*   to   */
1214   { 0x00B5,  0x75, 0x00, 0x00, 0x00 },  /* µ to u */
1215   { 0x00C0,  0x41, 0x00, 0x00, 0x00 },  /* À to A */
1216   { 0x00C1,  0x41, 0x00, 0x00, 0x00 },  /* Á to A */
1217   { 0x00C2,  0x41, 0x00, 0x00, 0x00 },  /* Â to A */
1218   { 0x00C3,  0x41, 0x00, 0x00, 0x00 },  /* Ã to A */
1219   { 0x00C4,  0x41, 0x65, 0x00, 0x00 },  /* Ä to Ae */
1220   { 0x00C5,  0x41, 0x61, 0x00, 0x00 },  /* Å to Aa */
1221   { 0x00C6,  0x41, 0x45, 0x00, 0x00 },  /* Æ to AE */
1222   { 0x00C7,  0x43, 0x00, 0x00, 0x00 },  /* Ç to C */
1223   { 0x00C8,  0x45, 0x00, 0x00, 0x00 },  /* È to E */
1224   { 0x00C9,  0x45, 0x00, 0x00, 0x00 },  /* É to E */
1225   { 0x00CA,  0x45, 0x00, 0x00, 0x00 },  /* Ê to E */
1226   { 0x00CB,  0x45, 0x00, 0x00, 0x00 },  /* Ë to E */
1227   { 0x00CC,  0x49, 0x00, 0x00, 0x00 },  /* Ì to I */
1228   { 0x00CD,  0x49, 0x00, 0x00, 0x00 },  /* Í to I */
1229   { 0x00CE,  0x49, 0x00, 0x00, 0x00 },  /* Î to I */
1230   { 0x00CF,  0x49, 0x00, 0x00, 0x00 },  /* Ï to I */
1231   { 0x00D0,  0x44, 0x00, 0x00, 0x00 },  /* Ð to D */
1232   { 0x00D1,  0x4E, 0x00, 0x00, 0x00 },  /* Ñ to N */
1233   { 0x00D2,  0x4F, 0x00, 0x00, 0x00 },  /* Ò to O */
1234   { 0x00D3,  0x4F, 0x00, 0x00, 0x00 },  /* Ó to O */
1235   { 0x00D4,  0x4F, 0x00, 0x00, 0x00 },  /* Ô to O */
1236   { 0x00D5,  0x4F, 0x00, 0x00, 0x00 },  /* Õ to O */
1237   { 0x00D6,  0x4F, 0x65, 0x00, 0x00 },  /* Ö to Oe */
1238   { 0x00D7,  0x78, 0x00, 0x00, 0x00 },  /* × to x */
1239   { 0x00D8,  0x4F, 0x00, 0x00, 0x00 },  /* Ø to O */
1240   { 0x00D9,  0x55, 0x00, 0x00, 0x00 },  /* Ù to U */
1241   { 0x00DA,  0x55, 0x00, 0x00, 0x00 },  /* Ú to U */
1242   { 0x00DB,  0x55, 0x00, 0x00, 0x00 },  /* Û to U */
1243   { 0x00DC,  0x55, 0x65, 0x00, 0x00 },  /* Ü to Ue */
1244   { 0x00DD,  0x59, 0x00, 0x00, 0x00 },  /* Ý to Y */
1245   { 0x00DE,  0x54, 0x68, 0x00, 0x00 },  /* Þ to Th */
1246   { 0x00DF,  0x73, 0x73, 0x00, 0x00 },  /* ß to ss */
1247   { 0x00E0,  0x61, 0x00, 0x00, 0x00 },  /* à to a */
1248   { 0x00E1,  0x61, 0x00, 0x00, 0x00 },  /* á to a */
1249   { 0x00E2,  0x61, 0x00, 0x00, 0x00 },  /* â to a */
1250   { 0x00E3,  0x61, 0x00, 0x00, 0x00 },  /* ã to a */
1251   { 0x00E4,  0x61, 0x65, 0x00, 0x00 },  /* ä to ae */
1252   { 0x00E5,  0x61, 0x61, 0x00, 0x00 },  /* å to aa */
1253   { 0x00E6,  0x61, 0x65, 0x00, 0x00 },  /* æ to ae */
1254   { 0x00E7,  0x63, 0x00, 0x00, 0x00 },  /* ç to c */
1255   { 0x00E8,  0x65, 0x00, 0x00, 0x00 },  /* è to e */
1256   { 0x00E9,  0x65, 0x00, 0x00, 0x00 },  /* é to e */
1257   { 0x00EA,  0x65, 0x00, 0x00, 0x00 },  /* ê to e */
1258   { 0x00EB,  0x65, 0x00, 0x00, 0x00 },  /* ë to e */
1259   { 0x00EC,  0x69, 0x00, 0x00, 0x00 },  /* ì to i */
1260   { 0x00ED,  0x69, 0x00, 0x00, 0x00 },  /* í to i */
1261   { 0x00EE,  0x69, 0x00, 0x00, 0x00 },  /* î to i */
1262   { 0x00EF,  0x69, 0x00, 0x00, 0x00 },  /* ï to i */
1263   { 0x00F0,  0x64, 0x00, 0x00, 0x00 },  /* ð to d */
1264   { 0x00F1,  0x6E, 0x00, 0x00, 0x00 },  /* ñ to n */
1265   { 0x00F2,  0x6F, 0x00, 0x00, 0x00 },  /* ò to o */
1266   { 0x00F3,  0x6F, 0x00, 0x00, 0x00 },  /* ó to o */
1267   { 0x00F4,  0x6F, 0x00, 0x00, 0x00 },  /* ô to o */
1268   { 0x00F5,  0x6F, 0x00, 0x00, 0x00 },  /* õ to o */
1269   { 0x00F6,  0x6F, 0x65, 0x00, 0x00 },  /* ö to oe */
1270   { 0x00F7,  0x3A, 0x00, 0x00, 0x00 },  /* ÷ to : */
1271   { 0x00F8,  0x6F, 0x00, 0x00, 0x00 },  /* ø to o */
1272   { 0x00F9,  0x75, 0x00, 0x00, 0x00 },  /* ù to u */
1273   { 0x00FA,  0x75, 0x00, 0x00, 0x00 },  /* ú to u */
1274   { 0x00FB,  0x75, 0x00, 0x00, 0x00 },  /* û to u */
1275   { 0x00FC,  0x75, 0x65, 0x00, 0x00 },  /* ü to ue */
1276   { 0x00FD,  0x79, 0x00, 0x00, 0x00 },  /* ý to y */
1277   { 0x00FE,  0x74, 0x68, 0x00, 0x00 },  /* þ to th */
1278   { 0x00FF,  0x79, 0x00, 0x00, 0x00 },  /* ÿ to y */
1279   { 0x0100,  0x41, 0x00, 0x00, 0x00 },  /* Ā to A */
1280   { 0x0101,  0x61, 0x00, 0x00, 0x00 },  /* ā to a */
1281   { 0x0102,  0x41, 0x00, 0x00, 0x00 },  /* Ă to A */
1282   { 0x0103,  0x61, 0x00, 0x00, 0x00 },  /* ă to a */
1283   { 0x0104,  0x41, 0x00, 0x00, 0x00 },  /* Ą to A */
1284   { 0x0105,  0x61, 0x00, 0x00, 0x00 },  /* ą to a */
1285   { 0x0106,  0x43, 0x00, 0x00, 0x00 },  /* Ć to C */
1286   { 0x0107,  0x63, 0x00, 0x00, 0x00 },  /* ć to c */
1287   { 0x0108,  0x43, 0x68, 0x00, 0x00 },  /* Ĉ to Ch */
1288   { 0x0109,  0x63, 0x68, 0x00, 0x00 },  /* ĉ to ch */
1289   { 0x010A,  0x43, 0x00, 0x00, 0x00 },  /* Ċ to C */
1290   { 0x010B,  0x63, 0x00, 0x00, 0x00 },  /* ċ to c */
1291   { 0x010C,  0x43, 0x00, 0x00, 0x00 },  /* Č to C */
1292   { 0x010D,  0x63, 0x00, 0x00, 0x00 },  /* č to c */
1293   { 0x010E,  0x44, 0x00, 0x00, 0x00 },  /* Ď to D */
1294   { 0x010F,  0x64, 0x00, 0x00, 0x00 },  /* ď to d */
1295   { 0x0110,  0x44, 0x00, 0x00, 0x00 },  /* Đ to D */
1296   { 0x0111,  0x64, 0x00, 0x00, 0x00 },  /* đ to d */
1297   { 0x0112,  0x45, 0x00, 0x00, 0x00 },  /* Ē to E */
1298   { 0x0113,  0x65, 0x00, 0x00, 0x00 },  /* ē to e */
1299   { 0x0114,  0x45, 0x00, 0x00, 0x00 },  /* Ĕ to E */
1300   { 0x0115,  0x65, 0x00, 0x00, 0x00 },  /* ĕ to e */
1301   { 0x0116,  0x45, 0x00, 0x00, 0x00 },  /* Ė to E */
1302   { 0x0117,  0x65, 0x00, 0x00, 0x00 },  /* ė to e */
1303   { 0x0118,  0x45, 0x00, 0x00, 0x00 },  /* Ę to E */
1304   { 0x0119,  0x65, 0x00, 0x00, 0x00 },  /* ę to e */
1305   { 0x011A,  0x45, 0x00, 0x00, 0x00 },  /* Ě to E */
1306   { 0x011B,  0x65, 0x00, 0x00, 0x00 },  /* ě to e */
1307   { 0x011C,  0x47, 0x68, 0x00, 0x00 },  /* Ĝ to Gh */
1308   { 0x011D,  0x67, 0x68, 0x00, 0x00 },  /* ĝ to gh */
1309   { 0x011E,  0x47, 0x00, 0x00, 0x00 },  /* Ğ to G */
1310   { 0x011F,  0x67, 0x00, 0x00, 0x00 },  /* ğ to g */
1311   { 0x0120,  0x47, 0x00, 0x00, 0x00 },  /* Ġ to G */
1312   { 0x0121,  0x67, 0x00, 0x00, 0x00 },  /* ġ to g */
1313   { 0x0122,  0x47, 0x00, 0x00, 0x00 },  /* Ģ to G */
1314   { 0x0123,  0x67, 0x00, 0x00, 0x00 },  /* ģ to g */
1315   { 0x0124,  0x48, 0x68, 0x00, 0x00 },  /* Ĥ to Hh */
1316   { 0x0125,  0x68, 0x68, 0x00, 0x00 },  /* ĥ to hh */
1317   { 0x0126,  0x48, 0x00, 0x00, 0x00 },  /* Ħ to H */
1318   { 0x0127,  0x68, 0x00, 0x00, 0x00 },  /* ħ to h */
1319   { 0x0128,  0x49, 0x00, 0x00, 0x00 },  /* Ĩ to I */
1320   { 0x0129,  0x69, 0x00, 0x00, 0x00 },  /* ĩ to i */
1321   { 0x012A,  0x49, 0x00, 0x00, 0x00 },  /* Ī to I */
1322   { 0x012B,  0x69, 0x00, 0x00, 0x00 },  /* ī to i */
1323   { 0x012C,  0x49, 0x00, 0x00, 0x00 },  /* Ĭ to I */
1324   { 0x012D,  0x69, 0x00, 0x00, 0x00 },  /* ĭ to i */
1325   { 0x012E,  0x49, 0x00, 0x00, 0x00 },  /* Į to I */
1326   { 0x012F,  0x69, 0x00, 0x00, 0x00 },  /* į to i */
1327   { 0x0130,  0x49, 0x00, 0x00, 0x00 },  /* İ to I */
1328   { 0x0131,  0x69, 0x00, 0x00, 0x00 },  /* ı to i */
1329   { 0x0132,  0x49, 0x4A, 0x00, 0x00 },  /* IJ to IJ */
1330   { 0x0133,  0x69, 0x6A, 0x00, 0x00 },  /* ij to ij */
1331   { 0x0134,  0x4A, 0x68, 0x00, 0x00 },  /* Ĵ to Jh */
1332   { 0x0135,  0x6A, 0x68, 0x00, 0x00 },  /* ĵ to jh */
1333   { 0x0136,  0x4B, 0x00, 0x00, 0x00 },  /* Ķ to K */
1334   { 0x0137,  0x6B, 0x00, 0x00, 0x00 },  /* ķ to k */
1335   { 0x0138,  0x6B, 0x00, 0x00, 0x00 },  /* ĸ to k */
1336   { 0x0139,  0x4C, 0x00, 0x00, 0x00 },  /* Ĺ to L */
1337   { 0x013A,  0x6C, 0x00, 0x00, 0x00 },  /* ĺ to l */
1338   { 0x013B,  0x4C, 0x00, 0x00, 0x00 },  /* Ļ to L */
1339   { 0x013C,  0x6C, 0x00, 0x00, 0x00 },  /* ļ to l */
1340   { 0x013D,  0x4C, 0x00, 0x00, 0x00 },  /* Ľ to L */
1341   { 0x013E,  0x6C, 0x00, 0x00, 0x00 },  /* ľ to l */
1342   { 0x013F,  0x4C, 0x2E, 0x00, 0x00 },  /* Ŀ to L. */
1343   { 0x0140,  0x6C, 0x2E, 0x00, 0x00 },  /* ŀ to l. */
1344   { 0x0141,  0x4C, 0x00, 0x00, 0x00 },  /* Ł to L */
1345   { 0x0142,  0x6C, 0x00, 0x00, 0x00 },  /* ł to l */
1346   { 0x0143,  0x4E, 0x00, 0x00, 0x00 },  /* Ń to N */
1347   { 0x0144,  0x6E, 0x00, 0x00, 0x00 },  /* ń to n */
1348   { 0x0145,  0x4E, 0x00, 0x00, 0x00 },  /* Ņ to N */
1349   { 0x0146,  0x6E, 0x00, 0x00, 0x00 },  /* ņ to n */
1350   { 0x0147,  0x4E, 0x00, 0x00, 0x00 },  /* Ň to N */
1351   { 0x0148,  0x6E, 0x00, 0x00, 0x00 },  /* ň to n */
1352   { 0x0149,  0x27, 0x6E, 0x00, 0x00 },  /* ʼn to 'n */
1353   { 0x014A,  0x4E, 0x47, 0x00, 0x00 },  /* Ŋ to NG */
1354   { 0x014B,  0x6E, 0x67, 0x00, 0x00 },  /* ŋ to ng */
1355   { 0x014C,  0x4F, 0x00, 0x00, 0x00 },  /* Ō to O */
1356   { 0x014D,  0x6F, 0x00, 0x00, 0x00 },  /* ō to o */
1357   { 0x014E,  0x4F, 0x00, 0x00, 0x00 },  /* Ŏ to O */
1358   { 0x014F,  0x6F, 0x00, 0x00, 0x00 },  /* ŏ to o */
1359   { 0x0150,  0x4F, 0x00, 0x00, 0x00 },  /* Ő to O */
1360   { 0x0151,  0x6F, 0x00, 0x00, 0x00 },  /* ő to o */
1361   { 0x0152,  0x4F, 0x45, 0x00, 0x00 },  /* Œ to OE */
1362   { 0x0153,  0x6F, 0x65, 0x00, 0x00 },  /* œ to oe */
1363   { 0x0154,  0x52, 0x00, 0x00, 0x00 },  /* Ŕ to R */
1364   { 0x0155,  0x72, 0x00, 0x00, 0x00 },  /* ŕ to r */
1365   { 0x0156,  0x52, 0x00, 0x00, 0x00 },  /* Ŗ to R */
1366   { 0x0157,  0x72, 0x00, 0x00, 0x00 },  /* ŗ to r */
1367   { 0x0158,  0x52, 0x00, 0x00, 0x00 },  /* Ř to R */
1368   { 0x0159,  0x72, 0x00, 0x00, 0x00 },  /* ř to r */
1369   { 0x015A,  0x53, 0x00, 0x00, 0x00 },  /* Ś to S */
1370   { 0x015B,  0x73, 0x00, 0x00, 0x00 },  /* ś to s */
1371   { 0x015C,  0x53, 0x68, 0x00, 0x00 },  /* Ŝ to Sh */
1372   { 0x015D,  0x73, 0x68, 0x00, 0x00 },  /* ŝ to sh */
1373   { 0x015E,  0x53, 0x00, 0x00, 0x00 },  /* Ş to S */
1374   { 0x015F,  0x73, 0x00, 0x00, 0x00 },  /* ş to s */
1375   { 0x0160,  0x53, 0x00, 0x00, 0x00 },  /* Š to S */
1376   { 0x0161,  0x73, 0x00, 0x00, 0x00 },  /* š to s */
1377   { 0x0162,  0x54, 0x00, 0x00, 0x00 },  /* Ţ to T */
1378   { 0x0163,  0x74, 0x00, 0x00, 0x00 },  /* ţ to t */
1379   { 0x0164,  0x54, 0x00, 0x00, 0x00 },  /* Ť to T */
1380   { 0x0165,  0x74, 0x00, 0x00, 0x00 },  /* ť to t */
1381   { 0x0166,  0x54, 0x00, 0x00, 0x00 },  /* Ŧ to T */
1382   { 0x0167,  0x74, 0x00, 0x00, 0x00 },  /* ŧ to t */
1383   { 0x0168,  0x55, 0x00, 0x00, 0x00 },  /* Ũ to U */
1384   { 0x0169,  0x75, 0x00, 0x00, 0x00 },  /* ũ to u */
1385   { 0x016A,  0x55, 0x00, 0x00, 0x00 },  /* Ū to U */
1386   { 0x016B,  0x75, 0x00, 0x00, 0x00 },  /* ū to u */
1387   { 0x016C,  0x55, 0x00, 0x00, 0x00 },  /* Ŭ to U */
1388   { 0x016D,  0x75, 0x00, 0x00, 0x00 },  /* ŭ to u */
1389   { 0x016E,  0x55, 0x00, 0x00, 0x00 },  /* Ů to U */
1390   { 0x016F,  0x75, 0x00, 0x00, 0x00 },  /* ů to u */
1391   { 0x0170,  0x55, 0x00, 0x00, 0x00 },  /* Ű to U */
1392   { 0x0171,  0x75, 0x00, 0x00, 0x00 },  /* ű to u */
1393   { 0x0172,  0x55, 0x00, 0x00, 0x00 },  /* Ų to U */
1394   { 0x0173,  0x75, 0x00, 0x00, 0x00 },  /* ų to u */
1395   { 0x0174,  0x57, 0x00, 0x00, 0x00 },  /* Ŵ to W */
1396   { 0x0175,  0x77, 0x00, 0x00, 0x00 },  /* ŵ to w */
1397   { 0x0176,  0x59, 0x00, 0x00, 0x00 },  /* Ŷ to Y */
1398   { 0x0177,  0x79, 0x00, 0x00, 0x00 },  /* ŷ to y */
1399   { 0x0178,  0x59, 0x00, 0x00, 0x00 },  /* Ÿ to Y */
1400   { 0x0179,  0x5A, 0x00, 0x00, 0x00 },  /* Ź to Z */
1401   { 0x017A,  0x7A, 0x00, 0x00, 0x00 },  /* ź to z */
1402   { 0x017B,  0x5A, 0x00, 0x00, 0x00 },  /* Ż to Z */
1403   { 0x017C,  0x7A, 0x00, 0x00, 0x00 },  /* ż to z */
1404   { 0x017D,  0x5A, 0x00, 0x00, 0x00 },  /* Ž to Z */
1405   { 0x017E,  0x7A, 0x00, 0x00, 0x00 },  /* ž to z */
1406   { 0x017F,  0x73, 0x00, 0x00, 0x00 },  /* ſ to s */
1407   { 0x0192,  0x66, 0x00, 0x00, 0x00 },  /* ƒ to f */
1408   { 0x0218,  0x53, 0x00, 0x00, 0x00 },  /* Ș to S */
1409   { 0x0219,  0x73, 0x00, 0x00, 0x00 },  /* ș to s */
1410   { 0x021A,  0x54, 0x00, 0x00, 0x00 },  /* Ț to T */
1411   { 0x021B,  0x74, 0x00, 0x00, 0x00 },  /* ț to t */
1412   { 0x0386,  0x41, 0x00, 0x00, 0x00 },  /* Ά to A */
1413   { 0x0388,  0x45, 0x00, 0x00, 0x00 },  /* Έ to E */
1414   { 0x0389,  0x49, 0x00, 0x00, 0x00 },  /* Ή to I */
1415   { 0x038A,  0x49, 0x00, 0x00, 0x00 },  /* Ί to I */
1416   { 0x038C,  0x4f, 0x00, 0x00, 0x00 },  /* Ό to O */
1417   { 0x038E,  0x59, 0x00, 0x00, 0x00 },  /* Ύ to Y */
1418   { 0x038F,  0x4f, 0x00, 0x00, 0x00 },  /* Ώ to O */
1419   { 0x0390,  0x69, 0x00, 0x00, 0x00 },  /* ΐ to i */
1420   { 0x0391,  0x41, 0x00, 0x00, 0x00 },  /* Α to A */
1421   { 0x0392,  0x42, 0x00, 0x00, 0x00 },  /* Β to B */
1422   { 0x0393,  0x47, 0x00, 0x00, 0x00 },  /* Γ to G */
1423   { 0x0394,  0x44, 0x00, 0x00, 0x00 },  /* Δ to D */
1424   { 0x0395,  0x45, 0x00, 0x00, 0x00 },  /* Ε to E */
1425   { 0x0396,  0x5a, 0x00, 0x00, 0x00 },  /* Ζ to Z */
1426   { 0x0397,  0x49, 0x00, 0x00, 0x00 },  /* Η to I */
1427   { 0x0398,  0x54, 0x68, 0x00, 0x00 },  /* Θ to Th */
1428   { 0x0399,  0x49, 0x00, 0x00, 0x00 },  /* Ι to I */
1429   { 0x039A,  0x4b, 0x00, 0x00, 0x00 },  /* Κ to K */
1430   { 0x039B,  0x4c, 0x00, 0x00, 0x00 },  /* Λ to L */
1431   { 0x039C,  0x4d, 0x00, 0x00, 0x00 },  /* Μ to M */
1432   { 0x039D,  0x4e, 0x00, 0x00, 0x00 },  /* Ν to N */
1433   { 0x039E,  0x58, 0x00, 0x00, 0x00 },  /* Ξ to X */
1434   { 0x039F,  0x4f, 0x00, 0x00, 0x00 },  /* Ο to O */
1435   { 0x03A0,  0x50, 0x00, 0x00, 0x00 },  /* Π to P */
1436   { 0x03A1,  0x52, 0x00, 0x00, 0x00 },  /* Ρ to R */
1437   { 0x03A3,  0x53, 0x00, 0x00, 0x00 },  /* Σ to S */
1438   { 0x03A4,  0x54, 0x00, 0x00, 0x00 },  /* Τ to T */
1439   { 0x03A5,  0x59, 0x00, 0x00, 0x00 },  /* Υ to Y */
1440   { 0x03A6,  0x46, 0x00, 0x00, 0x00 },  /* Φ to F */
1441   { 0x03A7,  0x43, 0x68, 0x00, 0x00 },  /* Χ to Ch */
1442   { 0x03A8,  0x50, 0x73, 0x00, 0x00 },  /* Ψ to Ps */
1443   { 0x03A9,  0x4f, 0x00, 0x00, 0x00 },  /* Ω to O */
1444   { 0x03AA,  0x49, 0x00, 0x00, 0x00 },  /* Ϊ to I */
1445   { 0x03AB,  0x59, 0x00, 0x00, 0x00 },  /* Ϋ to Y */
1446   { 0x03AC,  0x61, 0x00, 0x00, 0x00 },  /* ά to a */
1447   { 0x03AD,  0x65, 0x00, 0x00, 0x00 },  /* έ to e */
1448   { 0x03AE,  0x69, 0x00, 0x00, 0x00 },  /* ή to i */
1449   { 0x03AF,  0x69, 0x00, 0x00, 0x00 },  /* ί to i */
1450   { 0x03B1,  0x61, 0x00, 0x00, 0x00 },  /* α to a */
1451   { 0x03B2,  0x62, 0x00, 0x00, 0x00 },  /* β to b */
1452   { 0x03B3,  0x67, 0x00, 0x00, 0x00 },  /* γ to g */
1453   { 0x03B4,  0x64, 0x00, 0x00, 0x00 },  /* δ to d */
1454   { 0x03B5,  0x65, 0x00, 0x00, 0x00 },  /* ε to e */
1455   { 0x03B6,  0x7a, 0x00, 0x00, 0x00 },  /* ζ to z */
1456   { 0x03B7,  0x69, 0x00, 0x00, 0x00 },  /* η to i */
1457   { 0x03B8,  0x74, 0x68, 0x00, 0x00 },  /* θ to th */
1458   { 0x03B9,  0x69, 0x00, 0x00, 0x00 },  /* ι to i */
1459   { 0x03BA,  0x6b, 0x00, 0x00, 0x00 },  /* κ to k */
1460   { 0x03BB,  0x6c, 0x00, 0x00, 0x00 },  /* λ to l */
1461   { 0x03BC,  0x6d, 0x00, 0x00, 0x00 },  /* μ to m */
1462   { 0x03BD,  0x6e, 0x00, 0x00, 0x00 },  /* ν to n */
1463   { 0x03BE,  0x78, 0x00, 0x00, 0x00 },  /* ξ to x */
1464   { 0x03BF,  0x6f, 0x00, 0x00, 0x00 },  /* ο to o */
1465   { 0x03C0,  0x70, 0x00, 0x00, 0x00 },  /* π to p */
1466   { 0x03C1,  0x72, 0x00, 0x00, 0x00 },  /* ρ to r */
1467   { 0x03C3,  0x73, 0x00, 0x00, 0x00 },  /* σ to s */
1468   { 0x03C4,  0x74, 0x00, 0x00, 0x00 },  /* τ to t */
1469   { 0x03C5,  0x79, 0x00, 0x00, 0x00 },  /* υ to y */
1470   { 0x03C6,  0x66, 0x00, 0x00, 0x00 },  /* φ to f */
1471   { 0x03C7,  0x63, 0x68, 0x00, 0x00 },  /* χ to ch */
1472   { 0x03C8,  0x70, 0x73, 0x00, 0x00 },  /* ψ to ps */
1473   { 0x03C9,  0x6f, 0x00, 0x00, 0x00 },  /* ω to o */
1474   { 0x03CA,  0x69, 0x00, 0x00, 0x00 },  /* ϊ to i */
1475   { 0x03CB,  0x79, 0x00, 0x00, 0x00 },  /* ϋ to y */
1476   { 0x03CC,  0x6f, 0x00, 0x00, 0x00 },  /* ό to o */
1477   { 0x03CD,  0x79, 0x00, 0x00, 0x00 },  /* ύ to y */
1478   { 0x03CE,  0x69, 0x00, 0x00, 0x00 },  /* ώ to i */
1479   { 0x0400,  0x45, 0x00, 0x00, 0x00 },  /* Ѐ to E */
1480   { 0x0401,  0x45, 0x00, 0x00, 0x00 },  /* Ё to E */
1481   { 0x0402,  0x44, 0x00, 0x00, 0x00 },  /* Ђ to D */
1482   { 0x0403,  0x47, 0x00, 0x00, 0x00 },  /* Ѓ to G */
1483   { 0x0404,  0x45, 0x00, 0x00, 0x00 },  /* Є to E */
1484   { 0x0405,  0x5a, 0x00, 0x00, 0x00 },  /* Ѕ to Z */
1485   { 0x0406,  0x49, 0x00, 0x00, 0x00 },  /* І to I */
1486   { 0x0407,  0x49, 0x00, 0x00, 0x00 },  /* Ї to I */
1487   { 0x0408,  0x4a, 0x00, 0x00, 0x00 },  /* Ј to J */
1488   { 0x0409,  0x49, 0x00, 0x00, 0x00 },  /* Љ to I */
1489   { 0x040A,  0x4e, 0x00, 0x00, 0x00 },  /* Њ to N */
1490   { 0x040B,  0x44, 0x00, 0x00, 0x00 },  /* Ћ to D */
1491   { 0x040C,  0x4b, 0x00, 0x00, 0x00 },  /* Ќ to K */
1492   { 0x040D,  0x49, 0x00, 0x00, 0x00 },  /* Ѝ to I */
1493   { 0x040E,  0x55, 0x00, 0x00, 0x00 },  /* Ў to U */
1494   { 0x040F,  0x44, 0x00, 0x00, 0x00 },  /* Џ to D */
1495   { 0x0410,  0x41, 0x00, 0x00, 0x00 },  /* А to A */
1496   { 0x0411,  0x42, 0x00, 0x00, 0x00 },  /* Б to B */
1497   { 0x0412,  0x56, 0x00, 0x00, 0x00 },  /* В to V */
1498   { 0x0413,  0x47, 0x00, 0x00, 0x00 },  /* Г to G */
1499   { 0x0414,  0x44, 0x00, 0x00, 0x00 },  /* Д to D */
1500   { 0x0415,  0x45, 0x00, 0x00, 0x00 },  /* Е to E */
1501   { 0x0416,  0x5a, 0x68, 0x00, 0x00 },  /* Ж to Zh */
1502   { 0x0417,  0x5a, 0x00, 0x00, 0x00 },  /* З to Z */
1503   { 0x0418,  0x49, 0x00, 0x00, 0x00 },  /* И to I */
1504   { 0x0419,  0x49, 0x00, 0x00, 0x00 },  /* Й to I */
1505   { 0x041A,  0x4b, 0x00, 0x00, 0x00 },  /* К to K */
1506   { 0x041B,  0x4c, 0x00, 0x00, 0x00 },  /* Л to L */
1507   { 0x041C,  0x4d, 0x00, 0x00, 0x00 },  /* М to M */
1508   { 0x041D,  0x4e, 0x00, 0x00, 0x00 },  /* Н to N */
1509   { 0x041E,  0x4f, 0x00, 0x00, 0x00 },  /* О to O */
1510   { 0x041F,  0x50, 0x00, 0x00, 0x00 },  /* П to P */
1511   { 0x0420,  0x52, 0x00, 0x00, 0x00 },  /* Р to R */
1512   { 0x0421,  0x53, 0x00, 0x00, 0x00 },  /* С to S */
1513   { 0x0422,  0x54, 0x00, 0x00, 0x00 },  /* Т to T */
1514   { 0x0423,  0x55, 0x00, 0x00, 0x00 },  /* У to U */
1515   { 0x0424,  0x46, 0x00, 0x00, 0x00 },  /* Ф to F */
1516   { 0x0425,  0x4b, 0x68, 0x00, 0x00 },  /* Х to Kh */
1517   { 0x0426,  0x54, 0x63, 0x00, 0x00 },  /* Ц to Tc */
1518   { 0x0427,  0x43, 0x68, 0x00, 0x00 },  /* Ч to Ch */
1519   { 0x0428,  0x53, 0x68, 0x00, 0x00 },  /* Ш to Sh */
1520   { 0x0429,  0x53, 0x68, 0x63, 0x68 },  /* Щ to Shch */
1521   { 0x042A,  0x61, 0x00, 0x00, 0x00 },  /*  to A */
1522   { 0x042B,  0x59, 0x00, 0x00, 0x00 },  /* Ы to Y */
1523   { 0x042C,  0x59, 0x00, 0x00, 0x00 },  /*  to Y */
1524   { 0x042D,  0x45, 0x00, 0x00, 0x00 },  /* Э to E */
1525   { 0x042E,  0x49, 0x75, 0x00, 0x00 },  /* Ю to Iu */
1526   { 0x042F,  0x49, 0x61, 0x00, 0x00 },  /* Я to Ia */
1527   { 0x0430,  0x61, 0x00, 0x00, 0x00 },  /* а to a */
1528   { 0x0431,  0x62, 0x00, 0x00, 0x00 },  /* б to b */
1529   { 0x0432,  0x76, 0x00, 0x00, 0x00 },  /* в to v */
1530   { 0x0433,  0x67, 0x00, 0x00, 0x00 },  /* г to g */
1531   { 0x0434,  0x64, 0x00, 0x00, 0x00 },  /* д to d */
1532   { 0x0435,  0x65, 0x00, 0x00, 0x00 },  /* е to e */
1533   { 0x0436,  0x7a, 0x68, 0x00, 0x00 },  /* ж to zh */
1534   { 0x0437,  0x7a, 0x00, 0x00, 0x00 },  /* з to z */
1535   { 0x0438,  0x69, 0x00, 0x00, 0x00 },  /* и to i */
1536   { 0x0439,  0x69, 0x00, 0x00, 0x00 },  /* й to i */
1537   { 0x043A,  0x6b, 0x00, 0x00, 0x00 },  /* к to k */
1538   { 0x043B,  0x6c, 0x00, 0x00, 0x00 },  /* л to l */
1539   { 0x043C,  0x6d, 0x00, 0x00, 0x00 },  /* м to m */
1540   { 0x043D,  0x6e, 0x00, 0x00, 0x00 },  /* н to n */
1541   { 0x043E,  0x6f, 0x00, 0x00, 0x00 },  /* о to o */
1542   { 0x043F,  0x70, 0x00, 0x00, 0x00 },  /* п to p */
1543   { 0x0440,  0x72, 0x00, 0x00, 0x00 },  /* р to r */
1544   { 0x0441,  0x73, 0x00, 0x00, 0x00 },  /* с to s */
1545   { 0x0442,  0x74, 0x00, 0x00, 0x00 },  /* т to t */
1546   { 0x0443,  0x75, 0x00, 0x00, 0x00 },  /* у to u */
1547   { 0x0444,  0x66, 0x00, 0x00, 0x00 },  /* ф to f */
1548   { 0x0445,  0x6b, 0x68, 0x00, 0x00 },  /* х to kh */
1549   { 0x0446,  0x74, 0x63, 0x00, 0x00 },  /* ц to tc */
1550   { 0x0447,  0x63, 0x68, 0x00, 0x00 },  /* ч to ch */
1551   { 0x0448,  0x73, 0x68, 0x00, 0x00 },  /* ш to sh */
1552   { 0x0449,  0x73, 0x68, 0x63, 0x68 },  /* щ to shch */
1553   { 0x044A,  0x61, 0x00, 0x00, 0x00 },  /*  to a */
1554   { 0x044B,  0x79, 0x00, 0x00, 0x00 },  /* ы to y */
1555   { 0x044C,  0x79, 0x00, 0x00, 0x00 },  /*  to y */
1556   { 0x044D,  0x65, 0x00, 0x00, 0x00 },  /* э to e */
1557   { 0x044E,  0x69, 0x75, 0x00, 0x00 },  /* ю to iu */
1558   { 0x044F,  0x69, 0x61, 0x00, 0x00 },  /* я to ia */
1559   { 0x0450,  0x65, 0x00, 0x00, 0x00 },  /* ѐ to e */
1560   { 0x0451,  0x65, 0x00, 0x00, 0x00 },  /* ё to e */
1561   { 0x0452,  0x64, 0x00, 0x00, 0x00 },  /* ђ to d */
1562   { 0x0453,  0x67, 0x00, 0x00, 0x00 },  /* ѓ to g */
1563   { 0x0454,  0x65, 0x00, 0x00, 0x00 },  /* є to e */
1564   { 0x0455,  0x7a, 0x00, 0x00, 0x00 },  /* ѕ to z */
1565   { 0x0456,  0x69, 0x00, 0x00, 0x00 },  /* і to i */
1566   { 0x0457,  0x69, 0x00, 0x00, 0x00 },  /* ї to i */
1567   { 0x0458,  0x6a, 0x00, 0x00, 0x00 },  /* ј to j */
1568   { 0x0459,  0x69, 0x00, 0x00, 0x00 },  /* љ to i */
1569   { 0x045A,  0x6e, 0x00, 0x00, 0x00 },  /* њ to n */
1570   { 0x045B,  0x64, 0x00, 0x00, 0x00 },  /* ћ to d */
1571   { 0x045C,  0x6b, 0x00, 0x00, 0x00 },  /* ќ to k */
1572   { 0x045D,  0x69, 0x00, 0x00, 0x00 },  /* ѝ to i */
1573   { 0x045E,  0x75, 0x00, 0x00, 0x00 },  /* ў to u */
1574   { 0x045F,  0x64, 0x00, 0x00, 0x00 },  /* џ to d */
1575   { 0x1E02,  0x42, 0x00, 0x00, 0x00 },  /* Ḃ to B */
1576   { 0x1E03,  0x62, 0x00, 0x00, 0x00 },  /* ḃ to b */
1577   { 0x1E0A,  0x44, 0x00, 0x00, 0x00 },  /* Ḋ to D */
1578   { 0x1E0B,  0x64, 0x00, 0x00, 0x00 },  /* ḋ to d */
1579   { 0x1E1E,  0x46, 0x00, 0x00, 0x00 },  /* Ḟ to F */
1580   { 0x1E1F,  0x66, 0x00, 0x00, 0x00 },  /* ḟ to f */
1581   { 0x1E40,  0x4D, 0x00, 0x00, 0x00 },  /* Ṁ to M */
1582   { 0x1E41,  0x6D, 0x00, 0x00, 0x00 },  /* ṁ to m */
1583   { 0x1E56,  0x50, 0x00, 0x00, 0x00 },  /* Ṗ to P */
1584   { 0x1E57,  0x70, 0x00, 0x00, 0x00 },  /* ṗ to p */
1585   { 0x1E60,  0x53, 0x00, 0x00, 0x00 },  /* Ṡ to S */
1586   { 0x1E61,  0x73, 0x00, 0x00, 0x00 },  /* ṡ to s */
1587   { 0x1E6A,  0x54, 0x00, 0x00, 0x00 },  /* Ṫ to T */
1588   { 0x1E6B,  0x74, 0x00, 0x00, 0x00 },  /* ṫ to t */
1589   { 0x1E80,  0x57, 0x00, 0x00, 0x00 },  /* Ẁ to W */
1590   { 0x1E81,  0x77, 0x00, 0x00, 0x00 },  /* ẁ to w */
1591   { 0x1E82,  0x57, 0x00, 0x00, 0x00 },  /* Ẃ to W */
1592   { 0x1E83,  0x77, 0x00, 0x00, 0x00 },  /* ẃ to w */
1593   { 0x1E84,  0x57, 0x00, 0x00, 0x00 },  /* Ẅ to W */
1594   { 0x1E85,  0x77, 0x00, 0x00, 0x00 },  /* ẅ to w */
1595   { 0x1EF2,  0x59, 0x00, 0x00, 0x00 },  /* Ỳ to Y */
1596   { 0x1EF3,  0x79, 0x00, 0x00, 0x00 },  /* ỳ to y */
1597   { 0xFB00,  0x66, 0x66, 0x00, 0x00 },  /* ff to ff */
1598   { 0xFB01,  0x66, 0x69, 0x00, 0x00 },  /* fi to fi */
1599   { 0xFB02,  0x66, 0x6C, 0x00, 0x00 },  /* fl to fl */
1600   { 0xFB05,  0x73, 0x74, 0x00, 0x00 },  /* ſt to st */
1601   { 0xFB06,  0x73, 0x74, 0x00, 0x00 },  /* st to st */
1602 };
1603 
1604 static const Transliteration *spellfixFindTranslit(int c, int *pxTop){
1605   *pxTop = (sizeof(translit)/sizeof(translit[0])) - 1;
1606   return translit;
1607 }
1608 
1609 /*
1610 ** Convert the input string from UTF-8 into pure ASCII by converting
1611 ** all non-ASCII characters to some combination of characters in the
1612 ** ASCII subset.
1613 **
1614 ** The returned string might contain more characters than the input.
1615 **
1616 ** Space to hold the returned string comes from sqlite3_malloc() and
1617 ** should be freed by the caller.
1618 */
1619 static unsigned char *transliterate(const unsigned char *zIn, int nIn){
1620   unsigned char *zOut = sqlite3_malloc64( nIn*4 + 1 );
1621   int c, sz, nOut;
1622   if( zOut==0 ) return 0;
1623   nOut = 0;
1624   while( nIn>0 ){
1625     c = utf8Read(zIn, nIn, &sz);
1626     zIn += sz;
1627     nIn -= sz;
1628     if( c<=127 ){
1629       zOut[nOut++] = (unsigned char)c;
1630     }else{
1631       int xTop, xBtm, x;
1632       const Transliteration *tbl = spellfixFindTranslit(c, &xTop);
1633       xBtm = 0;
1634       while( xTop>=xBtm ){
1635         x = (xTop + xBtm)/2;
1636         if( tbl[x].cFrom==c ){
1637           zOut[nOut++] = tbl[x].cTo0;
1638           if( tbl[x].cTo1 ){
1639             zOut[nOut++] = tbl[x].cTo1;
1640             if( tbl[x].cTo2 ){
1641               zOut[nOut++] = tbl[x].cTo2;
1642               if( tbl[x].cTo3 ){
1643                 zOut[nOut++] = tbl[x].cTo3;
1644               }
1645             }
1646           }
1647           c = 0;
1648           break;
1649         }else if( tbl[x].cFrom>c ){
1650           xTop = x-1;
1651         }else{
1652           xBtm = x+1;
1653         }
1654       }
1655       if( c ) zOut[nOut++] = '?';
1656     }
1657   }
1658   zOut[nOut] = 0;
1659   return zOut;
1660 }
1661 
1662 /*
1663 ** Return the number of characters in the shortest prefix of the input
1664 ** string that transliterates to an ASCII string nTrans bytes or longer.
1665 ** Or, if the transliteration of the input string is less than nTrans
1666 ** bytes in size, return the number of characters in the input string.
1667 */
1668 static int translen_to_charlen(const char *zIn, int nIn, int nTrans){
1669   int i, c, sz, nOut;
1670   int nChar;
1671 
1672   i = nOut = 0;
1673   for(nChar=0; i<nIn && nOut<nTrans; nChar++){
1674     c = utf8Read((const unsigned char *)&zIn[i], nIn-i, &sz);
1675     i += sz;
1676 
1677     nOut++;
1678     if( c>=128 ){
1679       int xTop, xBtm, x;
1680       const Transliteration *tbl = spellfixFindTranslit(c, &xTop);
1681       xBtm = 0;
1682       while( xTop>=xBtm ){
1683         x = (xTop + xBtm)/2;
1684         if( tbl[x].cFrom==c ){
1685           if( tbl[x].cTo1 ){
1686             nOut++;
1687             if( tbl[x].cTo2 ){
1688               nOut++;
1689               if( tbl[x].cTo3 ){
1690                 nOut++;
1691               }
1692             }
1693           }
1694           break;
1695         }else if( tbl[x].cFrom>c ){
1696           xTop = x-1;
1697         }else{
1698           xBtm = x+1;
1699         }
1700       }
1701     }
1702   }
1703 
1704   return nChar;
1705 }
1706 
1707 
1708 /*
1709 **    spellfix1_translit(X)
1710 **
1711 ** Convert a string that contains non-ASCII Roman characters into
1712 ** pure ASCII.
1713 */
1714 static void transliterateSqlFunc(
1715   sqlite3_context *context,
1716   int argc,
1717   sqlite3_value **argv
1718 ){
1719   const unsigned char *zIn = sqlite3_value_text(argv[0]);
1720   int nIn = sqlite3_value_bytes(argv[0]);
1721   unsigned char *zOut = transliterate(zIn, nIn);
1722   if( zOut==0 ){
1723     sqlite3_result_error_nomem(context);
1724   }else{
1725     sqlite3_result_text(context, (char*)zOut, -1, sqlite3_free);
1726   }
1727 }
1728 
1729 /*
1730 **    spellfix1_scriptcode(X)
1731 **
1732 ** Try to determine the dominant script used by the word X and return
1733 ** its ISO 15924 numeric code.
1734 **
1735 ** The current implementation only understands the following scripts:
1736 **
1737 **    215  (Latin)
1738 **    220  (Cyrillic)
1739 **    200  (Greek)
1740 **
1741 ** This routine will return 998 if the input X contains characters from
1742 ** two or more of the above scripts or 999 if X contains no characters
1743 ** from any of the above scripts.
1744 */
1745 static void scriptCodeSqlFunc(
1746   sqlite3_context *context,
1747   int argc,
1748   sqlite3_value **argv
1749 ){
1750   const unsigned char *zIn = sqlite3_value_text(argv[0]);
1751   int nIn = sqlite3_value_bytes(argv[0]);
1752   int c, sz;
1753   int scriptMask = 0;
1754   int res;
1755   int seenDigit = 0;
1756 # define SCRIPT_LATIN       0x0001
1757 # define SCRIPT_CYRILLIC    0x0002
1758 # define SCRIPT_GREEK       0x0004
1759 # define SCRIPT_HEBREW      0x0008
1760 # define SCRIPT_ARABIC      0x0010
1761 
1762   while( nIn>0 ){
1763     c = utf8Read(zIn, nIn, &sz);
1764     zIn += sz;
1765     nIn -= sz;
1766     if( c<0x02af ){
1767       if( c>=0x80 || midClass[c&0x7f]<CCLASS_DIGIT ){
1768         scriptMask |= SCRIPT_LATIN;
1769       }else if( c>='0' && c<='9' ){
1770         seenDigit = 1;
1771       }
1772     }else if( c>=0x0400 && c<=0x04ff ){
1773       scriptMask |= SCRIPT_CYRILLIC;
1774     }else if( c>=0x0386 && c<=0x03ce ){
1775       scriptMask |= SCRIPT_GREEK;
1776     }else if( c>=0x0590 && c<=0x05ff ){
1777       scriptMask |= SCRIPT_HEBREW;
1778     }else if( c>=0x0600 && c<=0x06ff ){
1779       scriptMask |= SCRIPT_ARABIC;
1780     }
1781   }
1782   if( scriptMask==0 && seenDigit ) scriptMask = SCRIPT_LATIN;
1783   switch( scriptMask ){
1784     case 0:                res = 999; break;
1785     case SCRIPT_LATIN:     res = 215; break;
1786     case SCRIPT_CYRILLIC:  res = 220; break;
1787     case SCRIPT_GREEK:     res = 200; break;
1788     case SCRIPT_HEBREW:    res = 125; break;
1789     case SCRIPT_ARABIC:    res = 160; break;
1790     default:               res = 998; break;
1791   }
1792   sqlite3_result_int(context, res);
1793 }
1794 
1795 /* End transliterate
1796 ******************************************************************************
1797 ******************************************************************************
1798 ** Begin spellfix1 virtual table.
1799 */
1800 
1801 /* Maximum length of a phonehash used for querying the shadow table */
1802 #define SPELLFIX_MX_HASH  32
1803 
1804 /* Maximum number of hash strings to examine per query */
1805 #define SPELLFIX_MX_RUN   1
1806 
1807 typedef struct spellfix1_vtab spellfix1_vtab;
1808 typedef struct spellfix1_cursor spellfix1_cursor;
1809 
1810 /* Fuzzy-search virtual table object */
1811 struct spellfix1_vtab {
1812   sqlite3_vtab base;         /* Base class - must be first */
1813   sqlite3 *db;               /* Database connection */
1814   char *zDbName;             /* Name of database holding this table */
1815   char *zTableName;          /* Name of the virtual table */
1816   char *zCostTable;          /* Table holding edit-distance cost numbers */
1817   EditDist3Config *pConfig3; /* Parsed edit distance costs */
1818 };
1819 
1820 /* Fuzzy-search cursor object */
1821 struct spellfix1_cursor {
1822   sqlite3_vtab_cursor base;    /* Base class - must be first */
1823   spellfix1_vtab *pVTab;       /* The table to which this cursor belongs */
1824   char *zPattern;              /* rhs of MATCH clause */
1825   int idxNum;                  /* idxNum value passed to xFilter() */
1826   int nRow;                    /* Number of rows of content */
1827   int nAlloc;                  /* Number of allocated rows */
1828   int iRow;                    /* Current row of content */
1829   int iLang;                   /* Value of the langid= constraint */
1830   int iTop;                    /* Value of the top= constraint */
1831   int iScope;                  /* Value of the scope= constraint */
1832   int nSearch;                 /* Number of vocabulary items checked */
1833   sqlite3_stmt *pFullScan;     /* Shadow query for a full table scan */
1834   struct spellfix1_row {       /* For each row of content */
1835     sqlite3_int64 iRowid;         /* Rowid for this row */
1836     char *zWord;                  /* Text for this row */
1837     int iRank;                    /* Rank for this row */
1838     int iDistance;                /* Distance from pattern for this row */
1839     int iScore;                   /* Score for sorting */
1840     int iMatchlen;                /* Value of matchlen column (or -1) */
1841     char zHash[SPELLFIX_MX_HASH]; /* the phonehash used for this match */
1842   } *a;
1843 };
1844 
1845 /*
1846 ** Construct one or more SQL statements from the format string given
1847 ** and then evaluate those statements. The success code is written
1848 ** into *pRc.
1849 **
1850 ** If *pRc is initially non-zero then this routine is a no-op.
1851 */
1852 static void spellfix1DbExec(
1853   int *pRc,              /* Success code */
1854   sqlite3 *db,           /* Database in which to run SQL */
1855   const char *zFormat,   /* Format string for SQL */
1856   ...                    /* Arguments to the format string */
1857 ){
1858   va_list ap;
1859   char *zSql;
1860   if( *pRc ) return;
1861   va_start(ap, zFormat);
1862   zSql = sqlite3_vmprintf(zFormat, ap);
1863   va_end(ap);
1864   if( zSql==0 ){
1865     *pRc = SQLITE_NOMEM;
1866   }else{
1867     *pRc = sqlite3_exec(db, zSql, 0, 0, 0);
1868     sqlite3_free(zSql);
1869   }
1870 }
1871 
1872 /*
1873 ** xDisconnect/xDestroy method for the fuzzy-search module.
1874 */
1875 static int spellfix1Uninit(int isDestroy, sqlite3_vtab *pVTab){
1876   spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
1877   int rc = SQLITE_OK;
1878   if( isDestroy ){
1879     sqlite3 *db = p->db;
1880     spellfix1DbExec(&rc, db, "DROP TABLE IF EXISTS \"%w\".\"%w_vocab\"",
1881                   p->zDbName, p->zTableName);
1882   }
1883   if( rc==SQLITE_OK ){
1884     sqlite3_free(p->zTableName);
1885     editDist3ConfigDelete(p->pConfig3);
1886     sqlite3_free(p->zCostTable);
1887     sqlite3_free(p);
1888   }
1889   return rc;
1890 }
1891 static int spellfix1Disconnect(sqlite3_vtab *pVTab){
1892   return spellfix1Uninit(0, pVTab);
1893 }
1894 static int spellfix1Destroy(sqlite3_vtab *pVTab){
1895   return spellfix1Uninit(1, pVTab);
1896 }
1897 
1898 /*
1899 ** Make a copy of a string.  Remove leading and trailing whitespace
1900 ** and dequote it.
1901 */
1902 static char *spellfix1Dequote(const char *zIn){
1903   char *zOut;
1904   int i, j;
1905   char c;
1906   while( isspace((unsigned char)zIn[0]) ) zIn++;
1907   zOut = sqlite3_mprintf("%s", zIn);
1908   if( zOut==0 ) return 0;
1909   i = (int)strlen(zOut);
1910 #if 0  /* The parser will never leave spaces at the end */
1911   while( i>0 && isspace(zOut[i-1]) ){ i--; }
1912 #endif
1913   zOut[i] = 0;
1914   c = zOut[0];
1915   if( c=='\'' || c=='"' ){
1916     for(i=1, j=0; ALWAYS(zOut[i]); i++){
1917       zOut[j++] = zOut[i];
1918       if( zOut[i]==c ){
1919         if( zOut[i+1]==c ){
1920           i++;
1921         }else{
1922           zOut[j-1] = 0;
1923           break;
1924         }
1925       }
1926     }
1927   }
1928   return zOut;
1929 }
1930 
1931 
1932 /*
1933 ** xConnect/xCreate method for the spellfix1 module. Arguments are:
1934 **
1935 **   argv[0]   -> module name  ("spellfix1")
1936 **   argv[1]   -> database name
1937 **   argv[2]   -> table name
1938 **   argv[3].. -> optional arguments (i.e. "edit_cost_table" parameter)
1939 */
1940 static int spellfix1Init(
1941   int isCreate,
1942   sqlite3 *db,
1943   void *pAux,
1944   int argc, const char *const*argv,
1945   sqlite3_vtab **ppVTab,
1946   char **pzErr
1947 ){
1948   spellfix1_vtab *pNew = 0;
1949   /* const char *zModule = argv[0]; // not used */
1950   const char *zDbName = argv[1];
1951   const char *zTableName = argv[2];
1952   int nDbName;
1953   int rc = SQLITE_OK;
1954   int i;
1955 
1956   nDbName = (int)strlen(zDbName);
1957   pNew = sqlite3_malloc64( sizeof(*pNew) + nDbName + 1);
1958   if( pNew==0 ){
1959     rc = SQLITE_NOMEM;
1960   }else{
1961     memset(pNew, 0, sizeof(*pNew));
1962     pNew->zDbName = (char*)&pNew[1];
1963     memcpy(pNew->zDbName, zDbName, nDbName+1);
1964     pNew->zTableName = sqlite3_mprintf("%s", zTableName);
1965     pNew->db = db;
1966     if( pNew->zTableName==0 ){
1967       rc = SQLITE_NOMEM;
1968     }else{
1969       rc = sqlite3_declare_vtab(db,
1970            "CREATE TABLE x(word,rank,distance,langid, "
1971            "score, matchlen, phonehash HIDDEN, "
1972            "top HIDDEN, scope HIDDEN, srchcnt HIDDEN, "
1973            "soundslike HIDDEN, command HIDDEN)"
1974       );
1975 #define SPELLFIX_COL_WORD            0
1976 #define SPELLFIX_COL_RANK            1
1977 #define SPELLFIX_COL_DISTANCE        2
1978 #define SPELLFIX_COL_LANGID          3
1979 #define SPELLFIX_COL_SCORE           4
1980 #define SPELLFIX_COL_MATCHLEN        5
1981 #define SPELLFIX_COL_PHONEHASH       6
1982 #define SPELLFIX_COL_TOP             7
1983 #define SPELLFIX_COL_SCOPE           8
1984 #define SPELLFIX_COL_SRCHCNT         9
1985 #define SPELLFIX_COL_SOUNDSLIKE     10
1986 #define SPELLFIX_COL_COMMAND        11
1987     }
1988     if( rc==SQLITE_OK && isCreate ){
1989       spellfix1DbExec(&rc, db,
1990          "CREATE TABLE IF NOT EXISTS \"%w\".\"%w_vocab\"(\n"
1991          "  id INTEGER PRIMARY KEY,\n"
1992          "  rank INT,\n"
1993          "  langid INT,\n"
1994          "  word TEXT,\n"
1995          "  k1 TEXT,\n"
1996          "  k2 TEXT\n"
1997          ");\n",
1998          zDbName, zTableName
1999       );
2000       spellfix1DbExec(&rc, db,
2001          "CREATE INDEX IF NOT EXISTS \"%w\".\"%w_vocab_index_langid_k2\" "
2002             "ON \"%w_vocab\"(langid,k2);",
2003          zDbName, zTableName, zTableName
2004       );
2005     }
2006     for(i=3; rc==SQLITE_OK && i<argc; i++){
2007       if( strncmp(argv[i],"edit_cost_table=",16)==0 && pNew->zCostTable==0 ){
2008         pNew->zCostTable = spellfix1Dequote(&argv[i][16]);
2009         if( pNew->zCostTable==0 ) rc = SQLITE_NOMEM;
2010         continue;
2011       }
2012       *pzErr = sqlite3_mprintf("bad argument to spellfix1(): \"%s\"", argv[i]);
2013       rc = SQLITE_ERROR;
2014     }
2015   }
2016 
2017   if( rc && pNew ){
2018     *ppVTab = 0;
2019     spellfix1Uninit(0, &pNew->base);
2020   }else{
2021     *ppVTab = (sqlite3_vtab *)pNew;
2022   }
2023   return rc;
2024 }
2025 
2026 /*
2027 ** The xConnect and xCreate methods
2028 */
2029 static int spellfix1Connect(
2030   sqlite3 *db,
2031   void *pAux,
2032   int argc, const char *const*argv,
2033   sqlite3_vtab **ppVTab,
2034   char **pzErr
2035 ){
2036   return spellfix1Init(0, db, pAux, argc, argv, ppVTab, pzErr);
2037 }
2038 static int spellfix1Create(
2039   sqlite3 *db,
2040   void *pAux,
2041   int argc, const char *const*argv,
2042   sqlite3_vtab **ppVTab,
2043   char **pzErr
2044 ){
2045   return spellfix1Init(1, db, pAux, argc, argv, ppVTab, pzErr);
2046 }
2047 
2048 /*
2049 ** Clear all of the content from a cursor.
2050 */
2051 static void spellfix1ResetCursor(spellfix1_cursor *pCur){
2052   int i;
2053   for(i=0; i<pCur->nRow; i++){
2054     sqlite3_free(pCur->a[i].zWord);
2055   }
2056   pCur->nRow = 0;
2057   pCur->iRow = 0;
2058   pCur->nSearch = 0;
2059   if( pCur->pFullScan ){
2060     sqlite3_finalize(pCur->pFullScan);
2061     pCur->pFullScan = 0;
2062   }
2063 }
2064 
2065 /*
2066 ** Resize the cursor to hold up to N rows of content
2067 */
2068 static void spellfix1ResizeCursor(spellfix1_cursor *pCur, int N){
2069   struct spellfix1_row *aNew;
2070   assert( N>=pCur->nRow );
2071   aNew = sqlite3_realloc64(pCur->a, sizeof(pCur->a[0])*N);
2072   if( aNew==0 && N>0 ){
2073     spellfix1ResetCursor(pCur);
2074     sqlite3_free(pCur->a);
2075     pCur->nAlloc = 0;
2076     pCur->a = 0;
2077   }else{
2078     pCur->nAlloc = N;
2079     pCur->a = aNew;
2080   }
2081 }
2082 
2083 
2084 /*
2085 ** Close a fuzzy-search cursor.
2086 */
2087 static int spellfix1Close(sqlite3_vtab_cursor *cur){
2088   spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2089   spellfix1ResetCursor(pCur);
2090   spellfix1ResizeCursor(pCur, 0);
2091   sqlite3_free(pCur->zPattern);
2092   sqlite3_free(pCur);
2093   return SQLITE_OK;
2094 }
2095 
2096 #define SPELLFIX_IDXNUM_MATCH  0x01         /* word MATCH $str */
2097 #define SPELLFIX_IDXNUM_LANGID 0x02         /* langid == $langid */
2098 #define SPELLFIX_IDXNUM_TOP    0x04         /* top = $top */
2099 #define SPELLFIX_IDXNUM_SCOPE  0x08         /* scope = $scope */
2100 #define SPELLFIX_IDXNUM_DISTLT 0x10         /* distance < $distance */
2101 #define SPELLFIX_IDXNUM_DISTLE 0x20         /* distance <= $distance */
2102 #define SPELLFIX_IDXNUM_ROWID  0x40         /* rowid = $rowid */
2103 #define SPELLFIX_IDXNUM_DIST   (0x10|0x20)  /* DISTLT and DISTLE */
2104 
2105 /*
2106 **
2107 ** The plan number is a bitmask of the SPELLFIX_IDXNUM_* values defined
2108 ** above.
2109 **
2110 ** filter.argv[*] values contains $str, $langid, $top, $scope and $rowid
2111 ** if specified and in that order.
2112 */
2113 static int spellfix1BestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){
2114   int iPlan = 0;
2115   int iLangTerm = -1;
2116   int iTopTerm = -1;
2117   int iScopeTerm = -1;
2118   int iDistTerm = -1;
2119   int iRowidTerm = -1;
2120   int i;
2121   const struct sqlite3_index_constraint *pConstraint;
2122   pConstraint = pIdxInfo->aConstraint;
2123   for(i=0; i<pIdxInfo->nConstraint; i++, pConstraint++){
2124     if( pConstraint->usable==0 ) continue;
2125 
2126     /* Terms of the form:  word MATCH $str */
2127     if( (iPlan & SPELLFIX_IDXNUM_MATCH)==0
2128      && pConstraint->iColumn==SPELLFIX_COL_WORD
2129      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH
2130     ){
2131       iPlan |= SPELLFIX_IDXNUM_MATCH;
2132       pIdxInfo->aConstraintUsage[i].argvIndex = 1;
2133       pIdxInfo->aConstraintUsage[i].omit = 1;
2134     }
2135 
2136     /* Terms of the form:  langid = $langid  */
2137     if( (iPlan & SPELLFIX_IDXNUM_LANGID)==0
2138      && pConstraint->iColumn==SPELLFIX_COL_LANGID
2139      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2140     ){
2141       iPlan |= SPELLFIX_IDXNUM_LANGID;
2142       iLangTerm = i;
2143     }
2144 
2145     /* Terms of the form:  top = $top */
2146     if( (iPlan & SPELLFIX_IDXNUM_TOP)==0
2147      && pConstraint->iColumn==SPELLFIX_COL_TOP
2148      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2149     ){
2150       iPlan |= SPELLFIX_IDXNUM_TOP;
2151       iTopTerm = i;
2152     }
2153 
2154     /* Terms of the form:  scope = $scope */
2155     if( (iPlan & SPELLFIX_IDXNUM_SCOPE)==0
2156      && pConstraint->iColumn==SPELLFIX_COL_SCOPE
2157      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2158     ){
2159       iPlan |= SPELLFIX_IDXNUM_SCOPE;
2160       iScopeTerm = i;
2161     }
2162 
2163     /* Terms of the form:  distance < $dist or distance <= $dist */
2164     if( (iPlan & SPELLFIX_IDXNUM_DIST)==0
2165      && pConstraint->iColumn==SPELLFIX_COL_DISTANCE
2166      && (pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT
2167           || pConstraint->op==SQLITE_INDEX_CONSTRAINT_LE)
2168     ){
2169       if( pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT ){
2170         iPlan |= SPELLFIX_IDXNUM_DISTLT;
2171       }else{
2172         iPlan |= SPELLFIX_IDXNUM_DISTLE;
2173       }
2174       iDistTerm = i;
2175     }
2176 
2177     /* Terms of the form:  distance < $dist or distance <= $dist */
2178     if( (iPlan & SPELLFIX_IDXNUM_ROWID)==0
2179      && pConstraint->iColumn<0
2180      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2181     ){
2182       iPlan |= SPELLFIX_IDXNUM_ROWID;
2183       iRowidTerm = i;
2184     }
2185   }
2186   if( iPlan&SPELLFIX_IDXNUM_MATCH ){
2187     int idx = 2;
2188     pIdxInfo->idxNum = iPlan;
2189     if( pIdxInfo->nOrderBy==1
2190      && pIdxInfo->aOrderBy[0].iColumn==SPELLFIX_COL_SCORE
2191      && pIdxInfo->aOrderBy[0].desc==0
2192     ){
2193       pIdxInfo->orderByConsumed = 1;  /* Default order by iScore */
2194     }
2195     if( iPlan&SPELLFIX_IDXNUM_LANGID ){
2196       pIdxInfo->aConstraintUsage[iLangTerm].argvIndex = idx++;
2197       pIdxInfo->aConstraintUsage[iLangTerm].omit = 1;
2198     }
2199     if( iPlan&SPELLFIX_IDXNUM_TOP ){
2200       pIdxInfo->aConstraintUsage[iTopTerm].argvIndex = idx++;
2201       pIdxInfo->aConstraintUsage[iTopTerm].omit = 1;
2202     }
2203     if( iPlan&SPELLFIX_IDXNUM_SCOPE ){
2204       pIdxInfo->aConstraintUsage[iScopeTerm].argvIndex = idx++;
2205       pIdxInfo->aConstraintUsage[iScopeTerm].omit = 1;
2206     }
2207     if( iPlan&SPELLFIX_IDXNUM_DIST ){
2208       pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = idx++;
2209       pIdxInfo->aConstraintUsage[iDistTerm].omit = 1;
2210     }
2211     pIdxInfo->estimatedCost = 1e5;
2212   }else if( (iPlan & SPELLFIX_IDXNUM_ROWID) ){
2213     pIdxInfo->idxNum = SPELLFIX_IDXNUM_ROWID;
2214     pIdxInfo->aConstraintUsage[iRowidTerm].argvIndex = 1;
2215     pIdxInfo->aConstraintUsage[iRowidTerm].omit = 1;
2216     pIdxInfo->estimatedCost = 5;
2217   }else{
2218     pIdxInfo->idxNum = 0;
2219     pIdxInfo->estimatedCost = 1e50;
2220   }
2221   return SQLITE_OK;
2222 }
2223 
2224 /*
2225 ** Open a new fuzzy-search cursor.
2226 */
2227 static int spellfix1Open(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
2228   spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
2229   spellfix1_cursor *pCur;
2230   pCur = sqlite3_malloc64( sizeof(*pCur) );
2231   if( pCur==0 ) return SQLITE_NOMEM;
2232   memset(pCur, 0, sizeof(*pCur));
2233   pCur->pVTab = p;
2234   *ppCursor = &pCur->base;
2235   return SQLITE_OK;
2236 }
2237 
2238 /*
2239 ** Adjust a distance measurement by the words rank in order to show
2240 ** preference to common words.
2241 */
2242 static int spellfix1Score(int iDistance, int iRank){
2243   int iLog2;
2244   for(iLog2=0; iRank>0; iLog2++, iRank>>=1){}
2245   return iDistance + 32 - iLog2;
2246 }
2247 
2248 /*
2249 ** Compare two spellfix1_row objects for sorting purposes in qsort() such
2250 ** that they sort in order of increasing distance.
2251 */
2252 static int SQLITE_CDECL spellfix1RowCompare(const void *A, const void *B){
2253   const struct spellfix1_row *a = (const struct spellfix1_row*)A;
2254   const struct spellfix1_row *b = (const struct spellfix1_row*)B;
2255   return a->iScore - b->iScore;
2256 }
2257 
2258 /*
2259 ** A structure used to pass information from spellfix1FilterForMatch()
2260 ** into spellfix1RunQuery().
2261 */
2262 typedef struct MatchQuery {
2263   spellfix1_cursor *pCur;          /* The cursor being queried */
2264   sqlite3_stmt *pStmt;             /* shadow table query statment */
2265   char zHash[SPELLFIX_MX_HASH];    /* The current phonehash for zPattern */
2266   const char *zPattern;            /* Transliterated input string */
2267   int nPattern;                    /* Length of zPattern */
2268   EditDist3FromString *pMatchStr3; /* Original unicode string */
2269   EditDist3Config *pConfig3;       /* Edit-distance cost coefficients */
2270   const EditDist3Lang *pLang;      /* The selected language coefficients */
2271   int iLang;                       /* The language id */
2272   int iScope;                      /* Default scope */
2273   int iMaxDist;                    /* Maximum allowed edit distance, or -1 */
2274   int rc;                          /* Error code */
2275   int nRun;                  /* Number of prior runs for the same zPattern */
2276   char azPrior[SPELLFIX_MX_RUN][SPELLFIX_MX_HASH];  /* Prior hashes */
2277 } MatchQuery;
2278 
2279 /*
2280 ** Run a query looking for the best matches against zPattern using
2281 ** zHash as the character class seed hash.
2282 */
2283 static void spellfix1RunQuery(MatchQuery *p, const char *zQuery, int nQuery){
2284   const char *zK1;
2285   const char *zWord;
2286   int iDist;
2287   int iRank;
2288   int iScore;
2289   int iWorst = 0;
2290   int idx;
2291   int idxWorst = -1;
2292   int i;
2293   int iScope = p->iScope;
2294   spellfix1_cursor *pCur = p->pCur;
2295   sqlite3_stmt *pStmt = p->pStmt;
2296   char zHash1[SPELLFIX_MX_HASH];
2297   char zHash2[SPELLFIX_MX_HASH];
2298   char *zClass;
2299   int nClass;
2300   int rc;
2301 
2302   if( pCur->a==0 || p->rc ) return;   /* Prior memory allocation failure */
2303   zClass = (char*)phoneticHash((unsigned char*)zQuery, nQuery);
2304   if( zClass==0 ){
2305     p->rc = SQLITE_NOMEM;
2306     return;
2307   }
2308   nClass = (int)strlen(zClass);
2309   if( nClass>SPELLFIX_MX_HASH-2 ){
2310     nClass = SPELLFIX_MX_HASH-2;
2311     zClass[nClass] = 0;
2312   }
2313   if( nClass<=iScope ){
2314     if( nClass>2 ){
2315       iScope = nClass-1;
2316     }else{
2317       iScope = nClass;
2318     }
2319   }
2320   memcpy(zHash1, zClass, iScope);
2321   sqlite3_free(zClass);
2322   zHash1[iScope] = 0;
2323   memcpy(zHash2, zHash1, iScope);
2324   zHash2[iScope] = 'Z';
2325   zHash2[iScope+1] = 0;
2326 #if SPELLFIX_MX_RUN>1
2327   for(i=0; i<p->nRun; i++){
2328     if( strcmp(p->azPrior[i], zHash1)==0 ) return;
2329   }
2330 #endif
2331   assert( p->nRun<SPELLFIX_MX_RUN );
2332   memcpy(p->azPrior[p->nRun++], zHash1, iScope+1);
2333   if( sqlite3_bind_text(pStmt, 1, zHash1, -1, SQLITE_STATIC)==SQLITE_NOMEM
2334    || sqlite3_bind_text(pStmt, 2, zHash2, -1, SQLITE_STATIC)==SQLITE_NOMEM
2335   ){
2336     p->rc = SQLITE_NOMEM;
2337     return;
2338   }
2339 #if SPELLFIX_MX_RUN>1
2340   for(i=0; i<pCur->nRow; i++){
2341     if( pCur->a[i].iScore>iWorst ){
2342       iWorst = pCur->a[i].iScore;
2343       idxWorst = i;
2344     }
2345   }
2346 #endif
2347   while( sqlite3_step(pStmt)==SQLITE_ROW ){
2348     int iMatchlen = -1;
2349     iRank = sqlite3_column_int(pStmt, 2);
2350     if( p->pMatchStr3 ){
2351       int nWord = sqlite3_column_bytes(pStmt, 1);
2352       zWord = (const char*)sqlite3_column_text(pStmt, 1);
2353       iDist = editDist3Core(p->pMatchStr3, zWord, nWord, p->pLang, &iMatchlen);
2354     }else{
2355       zK1 = (const char*)sqlite3_column_text(pStmt, 3);
2356       if( zK1==0 ) continue;
2357       iDist = editdist1(p->zPattern, zK1, 0);
2358     }
2359     if( iDist<0 ){
2360       p->rc = SQLITE_NOMEM;
2361       break;
2362     }
2363     pCur->nSearch++;
2364 
2365     /* If there is a "distance < $dist" or "distance <= $dist" constraint,
2366     ** check if this row meets it. If not, jump back up to the top of the
2367     ** loop to process the next row. Otherwise, if the row does match the
2368     ** distance constraint, check if the pCur->a[] array is already full.
2369     ** If it is and no explicit "top = ?" constraint was present in the
2370     ** query, grow the array to ensure there is room for the new entry. */
2371     assert( (p->iMaxDist>=0)==((pCur->idxNum & SPELLFIX_IDXNUM_DIST) ? 1 : 0) );
2372     if( p->iMaxDist>=0 ){
2373       if( iDist>p->iMaxDist ) continue;
2374       if( pCur->nRow>=pCur->nAlloc && (pCur->idxNum & SPELLFIX_IDXNUM_TOP)==0 ){
2375         spellfix1ResizeCursor(pCur, pCur->nAlloc*2 + 10);
2376         if( pCur->a==0 ) break;
2377       }
2378     }
2379 
2380     iScore = spellfix1Score(iDist,iRank);
2381     if( pCur->nRow<pCur->nAlloc ){
2382       idx = pCur->nRow;
2383     }else if( iScore<iWorst ){
2384       idx = idxWorst;
2385       sqlite3_free(pCur->a[idx].zWord);
2386     }else{
2387       continue;
2388     }
2389 
2390     pCur->a[idx].zWord = sqlite3_mprintf("%s", sqlite3_column_text(pStmt, 1));
2391     if( pCur->a[idx].zWord==0 ){
2392       p->rc = SQLITE_NOMEM;
2393       break;
2394     }
2395     pCur->a[idx].iRowid = sqlite3_column_int64(pStmt, 0);
2396     pCur->a[idx].iRank = iRank;
2397     pCur->a[idx].iDistance = iDist;
2398     pCur->a[idx].iScore = iScore;
2399     pCur->a[idx].iMatchlen = iMatchlen;
2400     memcpy(pCur->a[idx].zHash, zHash1, iScope+1);
2401     if( pCur->nRow<pCur->nAlloc ) pCur->nRow++;
2402     if( pCur->nRow==pCur->nAlloc ){
2403       iWorst = pCur->a[0].iScore;
2404       idxWorst = 0;
2405       for(i=1; i<pCur->nRow; i++){
2406         iScore = pCur->a[i].iScore;
2407         if( iWorst<iScore ){
2408           iWorst = iScore;
2409           idxWorst = i;
2410         }
2411       }
2412     }
2413   }
2414   rc = sqlite3_reset(pStmt);
2415   if( rc ) p->rc = rc;
2416 }
2417 
2418 /*
2419 ** This version of the xFilter method work if the MATCH term is present
2420 ** and we are doing a scan.
2421 */
2422 static int spellfix1FilterForMatch(
2423   spellfix1_cursor *pCur,
2424   int argc,
2425   sqlite3_value **argv
2426 ){
2427   int idxNum = pCur->idxNum;
2428   const unsigned char *zMatchThis;   /* RHS of the MATCH operator */
2429   EditDist3FromString *pMatchStr3 = 0; /* zMatchThis as an editdist string */
2430   char *zPattern;                    /* Transliteration of zMatchThis */
2431   int nPattern;                      /* Length of zPattern */
2432   int iLimit = 20;                   /* Max number of rows of output */
2433   int iScope = 3;                    /* Use this many characters of zClass */
2434   int iLang = 0;                     /* Language code */
2435   char *zSql;                        /* SQL of shadow table query */
2436   sqlite3_stmt *pStmt = 0;           /* Shadow table query */
2437   int rc;                            /* Result code */
2438   int idx = 1;                       /* Next available filter parameter */
2439   spellfix1_vtab *p = pCur->pVTab;   /* The virtual table that owns pCur */
2440   MatchQuery x;                      /* For passing info to RunQuery() */
2441 
2442   /* Load the cost table if we have not already done so */
2443   if( p->zCostTable!=0 && p->pConfig3==0 ){
2444     p->pConfig3 = sqlite3_malloc64( sizeof(p->pConfig3[0]) );
2445     if( p->pConfig3==0 ) return SQLITE_NOMEM;
2446     memset(p->pConfig3, 0, sizeof(p->pConfig3[0]));
2447     rc = editDist3ConfigLoad(p->pConfig3, p->db, p->zCostTable);
2448     if( rc ) return rc;
2449   }
2450   memset(&x, 0, sizeof(x));
2451   x.iScope = 3;  /* Default scope if none specified by "WHERE scope=N" */
2452   x.iMaxDist = -1;   /* Maximum allowed edit distance */
2453 
2454   if( idxNum&2 ){
2455     iLang = sqlite3_value_int(argv[idx++]);
2456   }
2457   if( idxNum&4 ){
2458     iLimit = sqlite3_value_int(argv[idx++]);
2459     if( iLimit<1 ) iLimit = 1;
2460   }
2461   if( idxNum&8 ){
2462     x.iScope = sqlite3_value_int(argv[idx++]);
2463     if( x.iScope<1 ) x.iScope = 1;
2464     if( x.iScope>SPELLFIX_MX_HASH-2 ) x.iScope = SPELLFIX_MX_HASH-2;
2465   }
2466   if( idxNum&(16|32) ){
2467     x.iMaxDist = sqlite3_value_int(argv[idx++]);
2468     if( idxNum&16 ) x.iMaxDist--;
2469     if( x.iMaxDist<0 ) x.iMaxDist = 0;
2470   }
2471   spellfix1ResetCursor(pCur);
2472   spellfix1ResizeCursor(pCur, iLimit);
2473   zMatchThis = sqlite3_value_text(argv[0]);
2474   if( zMatchThis==0 ) return SQLITE_OK;
2475   if( p->pConfig3 ){
2476     x.pLang = editDist3FindLang(p->pConfig3, iLang);
2477     pMatchStr3 = editDist3FromStringNew(x.pLang, (const char*)zMatchThis, -1);
2478     if( pMatchStr3==0 ){
2479       x.rc = SQLITE_NOMEM;
2480       goto filter_exit;
2481     }
2482   }else{
2483     x.pLang = 0;
2484   }
2485   zPattern = (char*)transliterate(zMatchThis, sqlite3_value_bytes(argv[0]));
2486   sqlite3_free(pCur->zPattern);
2487   pCur->zPattern = zPattern;
2488   if( zPattern==0 ){
2489     x.rc = SQLITE_NOMEM;
2490     goto filter_exit;
2491   }
2492   nPattern = (int)strlen(zPattern);
2493   if( zPattern[nPattern-1]=='*' ) nPattern--;
2494   zSql = sqlite3_mprintf(
2495      "SELECT id, word, rank, coalesce(k1,word)"
2496      "  FROM \"%w\".\"%w_vocab\""
2497      " WHERE langid=%d AND k2>=?1 AND k2<?2",
2498      p->zDbName, p->zTableName, iLang
2499   );
2500   if( zSql==0 ){
2501     x.rc = SQLITE_NOMEM;
2502     pStmt = 0;
2503     goto filter_exit;
2504   }
2505   rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0);
2506   sqlite3_free(zSql);
2507   pCur->iLang = iLang;
2508   x.pCur = pCur;
2509   x.pStmt = pStmt;
2510   x.zPattern = zPattern;
2511   x.nPattern = nPattern;
2512   x.pMatchStr3 = pMatchStr3;
2513   x.iLang = iLang;
2514   x.rc = rc;
2515   x.pConfig3 = p->pConfig3;
2516   if( x.rc==SQLITE_OK ){
2517     spellfix1RunQuery(&x, zPattern, nPattern);
2518   }
2519 
2520   if( pCur->a ){
2521     qsort(pCur->a, pCur->nRow, sizeof(pCur->a[0]), spellfix1RowCompare);
2522     pCur->iTop = iLimit;
2523     pCur->iScope = iScope;
2524   }else{
2525     x.rc = SQLITE_NOMEM;
2526   }
2527 
2528 filter_exit:
2529   sqlite3_finalize(pStmt);
2530   editDist3FromStringDelete(pMatchStr3);
2531   return x.rc;
2532 }
2533 
2534 /*
2535 ** This version of xFilter handles a full-table scan case
2536 */
2537 static int spellfix1FilterForFullScan(
2538   spellfix1_cursor *pCur,
2539   int argc,
2540   sqlite3_value **argv
2541 ){
2542   int rc = SQLITE_OK;
2543   int idxNum = pCur->idxNum;
2544   char *zSql;
2545   spellfix1_vtab *pVTab = pCur->pVTab;
2546   spellfix1ResetCursor(pCur);
2547   assert( idxNum==0 || idxNum==64 );
2548   zSql = sqlite3_mprintf(
2549      "SELECT word, rank, NULL, langid, id FROM \"%w\".\"%w_vocab\"%s",
2550      pVTab->zDbName, pVTab->zTableName,
2551      ((idxNum & 64) ? " WHERE rowid=?" : "")
2552   );
2553   if( zSql==0 ) return SQLITE_NOMEM;
2554   rc = sqlite3_prepare_v2(pVTab->db, zSql, -1, &pCur->pFullScan, 0);
2555   sqlite3_free(zSql);
2556   if( rc==SQLITE_OK && (idxNum & 64) ){
2557     assert( argc==1 );
2558     rc = sqlite3_bind_value(pCur->pFullScan, 1, argv[0]);
2559   }
2560   pCur->nRow = pCur->iRow = 0;
2561   if( rc==SQLITE_OK ){
2562     rc = sqlite3_step(pCur->pFullScan);
2563     if( rc==SQLITE_ROW ){ pCur->iRow = -1; rc = SQLITE_OK; }
2564     if( rc==SQLITE_DONE ){ rc = SQLITE_OK; }
2565   }else{
2566     pCur->iRow = 0;
2567   }
2568   return rc;
2569 }
2570 
2571 
2572 /*
2573 ** Called to "rewind" a cursor back to the beginning so that
2574 ** it starts its output over again.  Always called at least once
2575 ** prior to any spellfix1Column, spellfix1Rowid, or spellfix1Eof call.
2576 */
2577 static int spellfix1Filter(
2578   sqlite3_vtab_cursor *cur,
2579   int idxNum, const char *idxStr,
2580   int argc, sqlite3_value **argv
2581 ){
2582   spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2583   int rc;
2584   pCur->idxNum = idxNum;
2585   if( idxNum & 1 ){
2586     rc = spellfix1FilterForMatch(pCur, argc, argv);
2587   }else{
2588     rc = spellfix1FilterForFullScan(pCur, argc, argv);
2589   }
2590   return rc;
2591 }
2592 
2593 
2594 /*
2595 ** Advance a cursor to its next row of output
2596 */
2597 static int spellfix1Next(sqlite3_vtab_cursor *cur){
2598   spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2599   int rc = SQLITE_OK;
2600   if( pCur->iRow < pCur->nRow ){
2601     if( pCur->pFullScan ){
2602       rc = sqlite3_step(pCur->pFullScan);
2603       if( rc!=SQLITE_ROW ) pCur->iRow = pCur->nRow;
2604       if( rc==SQLITE_ROW || rc==SQLITE_DONE ) rc = SQLITE_OK;
2605     }else{
2606       pCur->iRow++;
2607     }
2608   }
2609   return rc;
2610 }
2611 
2612 /*
2613 ** Return TRUE if we are at the end-of-file
2614 */
2615 static int spellfix1Eof(sqlite3_vtab_cursor *cur){
2616   spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2617   return pCur->iRow>=pCur->nRow;
2618 }
2619 
2620 /*
2621 ** Return columns from the current row.
2622 */
2623 static int spellfix1Column(
2624   sqlite3_vtab_cursor *cur,
2625   sqlite3_context *ctx,
2626   int i
2627 ){
2628   spellfix1_cursor *pCur = (spellfix1_cursor*)cur;
2629   if( pCur->pFullScan ){
2630     if( i<=SPELLFIX_COL_LANGID ){
2631       sqlite3_result_value(ctx, sqlite3_column_value(pCur->pFullScan, i));
2632     }else{
2633       sqlite3_result_null(ctx);
2634     }
2635     return SQLITE_OK;
2636   }
2637   switch( i ){
2638     case SPELLFIX_COL_WORD: {
2639       sqlite3_result_text(ctx, pCur->a[pCur->iRow].zWord, -1, SQLITE_STATIC);
2640       break;
2641     }
2642     case SPELLFIX_COL_RANK: {
2643       sqlite3_result_int(ctx, pCur->a[pCur->iRow].iRank);
2644       break;
2645     }
2646     case SPELLFIX_COL_DISTANCE: {
2647       sqlite3_result_int(ctx, pCur->a[pCur->iRow].iDistance);
2648       break;
2649     }
2650     case SPELLFIX_COL_LANGID: {
2651       sqlite3_result_int(ctx, pCur->iLang);
2652       break;
2653     }
2654     case SPELLFIX_COL_SCORE: {
2655       sqlite3_result_int(ctx, pCur->a[pCur->iRow].iScore);
2656       break;
2657     }
2658     case SPELLFIX_COL_MATCHLEN: {
2659       int iMatchlen = pCur->a[pCur->iRow].iMatchlen;
2660       if( iMatchlen<0 ){
2661         int nPattern = (int)strlen(pCur->zPattern);
2662         char *zWord = pCur->a[pCur->iRow].zWord;
2663         int nWord = (int)strlen(zWord);
2664 
2665         if( nPattern>0 && pCur->zPattern[nPattern-1]=='*' ){
2666           char *zTranslit;
2667           int res;
2668           zTranslit = (char *)transliterate((unsigned char *)zWord, nWord);
2669           if( !zTranslit ) return SQLITE_NOMEM;
2670           res = editdist1(pCur->zPattern, zTranslit, &iMatchlen);
2671           sqlite3_free(zTranslit);
2672           if( res<0 ) return SQLITE_NOMEM;
2673           iMatchlen = translen_to_charlen(zWord, nWord, iMatchlen);
2674         }else{
2675           iMatchlen = utf8Charlen(zWord, nWord);
2676         }
2677       }
2678 
2679       sqlite3_result_int(ctx, iMatchlen);
2680       break;
2681     }
2682     case SPELLFIX_COL_PHONEHASH: {
2683       sqlite3_result_text(ctx, pCur->a[pCur->iRow].zHash, -1, SQLITE_STATIC);
2684       break;
2685     }
2686     case SPELLFIX_COL_TOP: {
2687       sqlite3_result_int(ctx, pCur->iTop);
2688       break;
2689     }
2690     case SPELLFIX_COL_SCOPE: {
2691       sqlite3_result_int(ctx, pCur->iScope);
2692       break;
2693     }
2694     case SPELLFIX_COL_SRCHCNT: {
2695       sqlite3_result_int(ctx, pCur->nSearch);
2696       break;
2697     }
2698     default: {
2699       sqlite3_result_null(ctx);
2700       break;
2701     }
2702   }
2703   return SQLITE_OK;
2704 }
2705 
2706 /*
2707 ** The rowid.
2708 */
2709 static int spellfix1Rowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
2710   spellfix1_cursor *pCur = (spellfix1_cursor*)cur;
2711   if( pCur->pFullScan ){
2712     *pRowid = sqlite3_column_int64(pCur->pFullScan, 4);
2713   }else{
2714     *pRowid = pCur->a[pCur->iRow].iRowid;
2715   }
2716   return SQLITE_OK;
2717 }
2718 
2719 /*
2720 ** This function is called by the xUpdate() method. It returns a string
2721 ** containing the conflict mode that xUpdate() should use for the current
2722 ** operation. One of: "ROLLBACK", "IGNORE", "ABORT" or "REPLACE".
2723 */
2724 static const char *spellfix1GetConflict(sqlite3 *db){
2725   static const char *azConflict[] = {
2726     /* Note: Instead of "FAIL" - "ABORT". */
2727     "ROLLBACK", "IGNORE", "ABORT", "ABORT", "REPLACE"
2728   };
2729   int eConflict = sqlite3_vtab_on_conflict(db);
2730 
2731   assert( eConflict==SQLITE_ROLLBACK || eConflict==SQLITE_IGNORE
2732        || eConflict==SQLITE_FAIL || eConflict==SQLITE_ABORT
2733        || eConflict==SQLITE_REPLACE
2734   );
2735   assert( SQLITE_ROLLBACK==1 );
2736   assert( SQLITE_IGNORE==2 );
2737   assert( SQLITE_FAIL==3 );
2738   assert( SQLITE_ABORT==4 );
2739   assert( SQLITE_REPLACE==5 );
2740 
2741   return azConflict[eConflict-1];
2742 }
2743 
2744 /*
2745 ** The xUpdate() method.
2746 */
2747 static int spellfix1Update(
2748   sqlite3_vtab *pVTab,
2749   int argc,
2750   sqlite3_value **argv,
2751   sqlite_int64 *pRowid
2752 ){
2753   int rc = SQLITE_OK;
2754   sqlite3_int64 rowid, newRowid;
2755   spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
2756   sqlite3 *db = p->db;
2757 
2758   if( argc==1 ){
2759     /* A delete operation on the rowid given by argv[0] */
2760     rowid = *pRowid = sqlite3_value_int64(argv[0]);
2761     spellfix1DbExec(&rc, db, "DELETE FROM \"%w\".\"%w_vocab\" "
2762                            " WHERE id=%lld",
2763                   p->zDbName, p->zTableName, rowid);
2764   }else{
2765     const unsigned char *zWord = sqlite3_value_text(argv[SPELLFIX_COL_WORD+2]);
2766     int nWord = sqlite3_value_bytes(argv[SPELLFIX_COL_WORD+2]);
2767     int iLang = sqlite3_value_int(argv[SPELLFIX_COL_LANGID+2]);
2768     int iRank = sqlite3_value_int(argv[SPELLFIX_COL_RANK+2]);
2769     const unsigned char *zSoundslike =
2770            sqlite3_value_text(argv[SPELLFIX_COL_SOUNDSLIKE+2]);
2771     int nSoundslike = sqlite3_value_bytes(argv[SPELLFIX_COL_SOUNDSLIKE+2]);
2772     char *zK1, *zK2;
2773     int i;
2774     char c;
2775     const char *zConflict = spellfix1GetConflict(db);
2776 
2777     if( zWord==0 ){
2778       /* Inserts of the form:  INSERT INTO table(command) VALUES('xyzzy');
2779       ** cause zWord to be NULL, so we look at the "command" column to see
2780       ** what special actions to take */
2781       const char *zCmd =
2782          (const char*)sqlite3_value_text(argv[SPELLFIX_COL_COMMAND+2]);
2783       if( zCmd==0 ){
2784         pVTab->zErrMsg = sqlite3_mprintf("NOT NULL constraint failed: %s.word",
2785                                          p->zTableName);
2786         return SQLITE_CONSTRAINT_NOTNULL;
2787       }
2788       if( strcmp(zCmd,"reset")==0 ){
2789         /* Reset the  edit cost table (if there is one). */
2790         editDist3ConfigDelete(p->pConfig3);
2791         p->pConfig3 = 0;
2792         return SQLITE_OK;
2793       }
2794       if( strncmp(zCmd,"edit_cost_table=",16)==0 ){
2795         editDist3ConfigDelete(p->pConfig3);
2796         p->pConfig3 = 0;
2797         sqlite3_free(p->zCostTable);
2798         p->zCostTable = spellfix1Dequote(zCmd+16);
2799         if( p->zCostTable==0 ) return SQLITE_NOMEM;
2800         if( p->zCostTable[0]==0 || sqlite3_stricmp(p->zCostTable,"null")==0 ){
2801           sqlite3_free(p->zCostTable);
2802           p->zCostTable = 0;
2803         }
2804         return SQLITE_OK;
2805       }
2806       pVTab->zErrMsg = sqlite3_mprintf("unknown value for %s.command: \"%w\"",
2807                                        p->zTableName, zCmd);
2808       return SQLITE_ERROR;
2809     }
2810     if( iRank<1 ) iRank = 1;
2811     if( zSoundslike ){
2812       zK1 = (char*)transliterate(zSoundslike, nSoundslike);
2813     }else{
2814       zK1 = (char*)transliterate(zWord, nWord);
2815     }
2816     if( zK1==0 ) return SQLITE_NOMEM;
2817     for(i=0; (c = zK1[i])!=0; i++){
2818        if( c>='A' && c<='Z' ) zK1[i] += 'a' - 'A';
2819     }
2820     zK2 = (char*)phoneticHash((const unsigned char*)zK1, i);
2821     if( zK2==0 ){
2822       sqlite3_free(zK1);
2823       return SQLITE_NOMEM;
2824     }
2825     if( sqlite3_value_type(argv[0])==SQLITE_NULL ){
2826       if( sqlite3_value_type(argv[1])==SQLITE_NULL ){
2827         spellfix1DbExec(&rc, db,
2828                "INSERT INTO \"%w\".\"%w_vocab\"(rank,langid,word,k1,k2) "
2829                "VALUES(%d,%d,%Q,nullif(%Q,%Q),%Q)",
2830                p->zDbName, p->zTableName,
2831                iRank, iLang, zWord, zK1, zWord, zK2
2832         );
2833       }else{
2834         newRowid = sqlite3_value_int64(argv[1]);
2835         spellfix1DbExec(&rc, db,
2836             "INSERT OR %s INTO \"%w\".\"%w_vocab\"(id,rank,langid,word,k1,k2) "
2837             "VALUES(%lld,%d,%d,%Q,nullif(%Q,%Q),%Q)",
2838             zConflict, p->zDbName, p->zTableName,
2839             newRowid, iRank, iLang, zWord, zK1, zWord, zK2
2840         );
2841       }
2842       *pRowid = sqlite3_last_insert_rowid(db);
2843     }else{
2844       rowid = sqlite3_value_int64(argv[0]);
2845       newRowid = *pRowid = sqlite3_value_int64(argv[1]);
2846       spellfix1DbExec(&rc, db,
2847              "UPDATE OR %s \"%w\".\"%w_vocab\" SET id=%lld, rank=%d, langid=%d,"
2848              " word=%Q, k1=nullif(%Q,%Q), k2=%Q WHERE id=%lld",
2849              zConflict, p->zDbName, p->zTableName, newRowid, iRank, iLang,
2850              zWord, zK1, zWord, zK2, rowid
2851       );
2852     }
2853     sqlite3_free(zK1);
2854     sqlite3_free(zK2);
2855   }
2856   return rc;
2857 }
2858 
2859 /*
2860 ** Rename the spellfix1 table.
2861 */
2862 static int spellfix1Rename(sqlite3_vtab *pVTab, const char *zNew){
2863   spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
2864   sqlite3 *db = p->db;
2865   int rc = SQLITE_OK;
2866   char *zNewName = sqlite3_mprintf("%s", zNew);
2867   if( zNewName==0 ){
2868     return SQLITE_NOMEM;
2869   }
2870   spellfix1DbExec(&rc, db,
2871      "ALTER TABLE \"%w\".\"%w_vocab\" RENAME TO \"%w_vocab\"",
2872      p->zDbName, p->zTableName, zNewName
2873   );
2874   if( rc==SQLITE_OK ){
2875     sqlite3_free(p->zTableName);
2876     p->zTableName = zNewName;
2877   }else{
2878     sqlite3_free(zNewName);
2879   }
2880   return rc;
2881 }
2882 
2883 
2884 /*
2885 ** A virtual table module that provides fuzzy search.
2886 */
2887 static sqlite3_module spellfix1Module = {
2888   0,                       /* iVersion */
2889   spellfix1Create,         /* xCreate - handle CREATE VIRTUAL TABLE */
2890   spellfix1Connect,        /* xConnect - reconnected to an existing table */
2891   spellfix1BestIndex,      /* xBestIndex - figure out how to do a query */
2892   spellfix1Disconnect,     /* xDisconnect - close a connection */
2893   spellfix1Destroy,        /* xDestroy - handle DROP TABLE */
2894   spellfix1Open,           /* xOpen - open a cursor */
2895   spellfix1Close,          /* xClose - close a cursor */
2896   spellfix1Filter,         /* xFilter - configure scan constraints */
2897   spellfix1Next,           /* xNext - advance a cursor */
2898   spellfix1Eof,            /* xEof - check for end of scan */
2899   spellfix1Column,         /* xColumn - read data */
2900   spellfix1Rowid,          /* xRowid - read data */
2901   spellfix1Update,         /* xUpdate */
2902   0,                       /* xBegin */
2903   0,                       /* xSync */
2904   0,                       /* xCommit */
2905   0,                       /* xRollback */
2906   0,                       /* xFindMethod */
2907   spellfix1Rename,         /* xRename */
2908 };
2909 
2910 /*
2911 ** Register the various functions and the virtual table.
2912 */
2913 static int spellfix1Register(sqlite3 *db){
2914   int rc = SQLITE_OK;
2915   int i;
2916   rc = sqlite3_create_function(db, "spellfix1_translit", 1,
2917                                SQLITE_UTF8|SQLITE_DETERMINISTIC, 0,
2918                                 transliterateSqlFunc, 0, 0);
2919   if( rc==SQLITE_OK ){
2920     rc = sqlite3_create_function(db, "spellfix1_editdist", 2,
2921                                  SQLITE_UTF8|SQLITE_DETERMINISTIC, 0,
2922                                   editdistSqlFunc, 0, 0);
2923   }
2924   if( rc==SQLITE_OK ){
2925     rc = sqlite3_create_function(db, "spellfix1_phonehash", 1,
2926                                  SQLITE_UTF8|SQLITE_DETERMINISTIC, 0,
2927                                   phoneticHashSqlFunc, 0, 0);
2928   }
2929   if( rc==SQLITE_OK ){
2930     rc = sqlite3_create_function(db, "spellfix1_scriptcode", 1,
2931                                   SQLITE_UTF8|SQLITE_DETERMINISTIC, 0,
2932                                   scriptCodeSqlFunc, 0, 0);
2933   }
2934   if( rc==SQLITE_OK ){
2935     rc = sqlite3_create_module(db, "spellfix1", &spellfix1Module, 0);
2936   }
2937   if( rc==SQLITE_OK ){
2938     rc = editDist3Install(db);
2939   }
2940 
2941   /* Verify sanity of the translit[] table */
2942   for(i=0; i<sizeof(translit)/sizeof(translit[0])-1; i++){
2943     assert( translit[i].cFrom<translit[i+1].cFrom );
2944   }
2945 
2946   return rc;
2947 }
2948 
2949 #endif /* SQLITE_OMIT_VIRTUALTABLE */
2950 
2951 /*
2952 ** Extension load function.
2953 */
2954 #ifdef _WIN32
2955 __declspec(dllexport)
2956 #endif
2957 int sqlite3_spellfix_init(
2958   sqlite3 *db,
2959   char **pzErrMsg,
2960   const sqlite3_api_routines *pApi
2961 ){
2962   SQLITE_EXTENSION_INIT2(pApi);
2963 #ifndef SQLITE_OMIT_VIRTUALTABLE
2964   return spellfix1Register(db);
2965 #endif
2966   return SQLITE_OK;
2967 }
2968