xref: /sqlite-3.40.0/ext/misc/spellfix.c (revision 988af251)
1b7045ab2Sdrh /*
2b7045ab2Sdrh ** 2012 April 10
3b7045ab2Sdrh **
4b7045ab2Sdrh ** The author disclaims copyright to this source code.  In place of
5b7045ab2Sdrh ** a legal notice, here is a blessing:
6b7045ab2Sdrh **
7b7045ab2Sdrh **    May you do good and not evil.
8b7045ab2Sdrh **    May you find forgiveness for yourself and forgive others.
9b7045ab2Sdrh **    May you share freely, never taking more than you give.
10b7045ab2Sdrh **
11b7045ab2Sdrh *************************************************************************
12b7045ab2Sdrh **
13b7045ab2Sdrh ** This module implements the spellfix1 VIRTUAL TABLE that can be used
14b7045ab2Sdrh ** to search a large vocabulary for close matches.  See separate
15015db9c8Sdrh ** documentation (http://www.sqlite.org/spellfix1.html) for details.
16b7045ab2Sdrh */
17b7045ab2Sdrh #include "sqlite3ext.h"
18b7045ab2Sdrh SQLITE_EXTENSION_INIT1
19b7045ab2Sdrh 
20ea41dc44Sdrh #ifndef SQLITE_AMALGAMATION
210fae06fcSdrh # if !defined(NDEBUG) && !defined(SQLITE_DEBUG)
220fae06fcSdrh #  define NDEBUG 1
230fae06fcSdrh # endif
240fae06fcSdrh # if defined(NDEBUG) && defined(SQLITE_DEBUG)
250fae06fcSdrh #  undef NDEBUG
260fae06fcSdrh # endif
27b7045ab2Sdrh # include <string.h>
28b7045ab2Sdrh # include <stdio.h>
29b7045ab2Sdrh # include <stdlib.h>
30b7045ab2Sdrh # include <assert.h>
31b7045ab2Sdrh # define ALWAYS(X)  1
32b7045ab2Sdrh # define NEVER(X)   0
33b7045ab2Sdrh   typedef unsigned char u8;
34b7045ab2Sdrh   typedef unsigned short u16;
35ea41dc44Sdrh #endif
36dc90d3d8Sdrh #include <ctype.h>
37b7045ab2Sdrh 
3811f71d6aSdan #ifndef SQLITE_OMIT_VIRTUALTABLE
3911f71d6aSdan 
40b7045ab2Sdrh /*
41b7045ab2Sdrh ** Character classes for ASCII characters:
42b7045ab2Sdrh **
43b7045ab2Sdrh **   0   ''        Silent letters:   H W
44b7045ab2Sdrh **   1   'A'       Any vowel:   A E I O U (Y)
45b7045ab2Sdrh **   2   'B'       A bilabeal stop or fricative:  B F P V W
46b7045ab2Sdrh **   3   'C'       Other fricatives or back stops:  C G J K Q S X Z
47b7045ab2Sdrh **   4   'D'       Alveolar stops:  D T
48b7045ab2Sdrh **   5   'H'       Letter H at the beginning of a word
49b7045ab2Sdrh **   6   'L'       Glide:  L
50b7045ab2Sdrh **   7   'R'       Semivowel:  R
51b7045ab2Sdrh **   8   'M'       Nasals:  M N
52b7045ab2Sdrh **   9   'Y'       Letter Y at the beginning of a word.
53b7045ab2Sdrh **   10  '9'       Digits: 0 1 2 3 4 5 6 7 8 9
54b7045ab2Sdrh **   11  ' '       White space
55b7045ab2Sdrh **   12  '?'       Other.
56b7045ab2Sdrh */
57b7045ab2Sdrh #define CCLASS_SILENT         0
58b7045ab2Sdrh #define CCLASS_VOWEL          1
59b7045ab2Sdrh #define CCLASS_B              2
60b7045ab2Sdrh #define CCLASS_C              3
61b7045ab2Sdrh #define CCLASS_D              4
62b7045ab2Sdrh #define CCLASS_H              5
63b7045ab2Sdrh #define CCLASS_L              6
64b7045ab2Sdrh #define CCLASS_R              7
65b7045ab2Sdrh #define CCLASS_M              8
66b7045ab2Sdrh #define CCLASS_Y              9
67b7045ab2Sdrh #define CCLASS_DIGIT         10
68b7045ab2Sdrh #define CCLASS_SPACE         11
69b7045ab2Sdrh #define CCLASS_OTHER         12
70b7045ab2Sdrh 
71b7045ab2Sdrh /*
72b7045ab2Sdrh ** The following table gives the character class for non-initial ASCII
73b7045ab2Sdrh ** characters.
74b7045ab2Sdrh */
75b7045ab2Sdrh static const unsigned char midClass[] = {
76b7045ab2Sdrh  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
77b7045ab2Sdrh  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
78b7045ab2Sdrh  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
79b7045ab2Sdrh  /*   */ CCLASS_SPACE,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
80b7045ab2Sdrh  /*   */ CCLASS_SPACE,    /*   */ CCLASS_SPACE,   /*   */ CCLASS_OTHER,
81b7045ab2Sdrh  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
82b7045ab2Sdrh  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
83b7045ab2Sdrh  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
84b7045ab2Sdrh  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
85b7045ab2Sdrh  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
86b7045ab2Sdrh  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_SPACE,
87b7045ab2Sdrh  /* ! */ CCLASS_OTHER,    /* " */ CCLASS_OTHER,   /* # */ CCLASS_OTHER,
88b7045ab2Sdrh  /* $ */ CCLASS_OTHER,    /* % */ CCLASS_OTHER,   /* & */ CCLASS_OTHER,
89b7045ab2Sdrh  /* ' */ CCLASS_SILENT,   /* ( */ CCLASS_OTHER,   /* ) */ CCLASS_OTHER,
90b7045ab2Sdrh  /* * */ CCLASS_OTHER,    /* + */ CCLASS_OTHER,   /* , */ CCLASS_OTHER,
91b7045ab2Sdrh  /* - */ CCLASS_OTHER,    /* . */ CCLASS_OTHER,   /* / */ CCLASS_OTHER,
92b7045ab2Sdrh  /* 0 */ CCLASS_DIGIT,    /* 1 */ CCLASS_DIGIT,   /* 2 */ CCLASS_DIGIT,
93b7045ab2Sdrh  /* 3 */ CCLASS_DIGIT,    /* 4 */ CCLASS_DIGIT,   /* 5 */ CCLASS_DIGIT,
94b7045ab2Sdrh  /* 6 */ CCLASS_DIGIT,    /* 7 */ CCLASS_DIGIT,   /* 8 */ CCLASS_DIGIT,
95b7045ab2Sdrh  /* 9 */ CCLASS_DIGIT,    /* : */ CCLASS_OTHER,   /* ; */ CCLASS_OTHER,
96b7045ab2Sdrh  /* < */ CCLASS_OTHER,    /* = */ CCLASS_OTHER,   /* > */ CCLASS_OTHER,
97b7045ab2Sdrh  /* ? */ CCLASS_OTHER,    /* @ */ CCLASS_OTHER,   /* A */ CCLASS_VOWEL,
98b7045ab2Sdrh  /* B */ CCLASS_B,        /* C */ CCLASS_C,       /* D */ CCLASS_D,
99b7045ab2Sdrh  /* E */ CCLASS_VOWEL,    /* F */ CCLASS_B,       /* G */ CCLASS_C,
100b7045ab2Sdrh  /* H */ CCLASS_SILENT,   /* I */ CCLASS_VOWEL,   /* J */ CCLASS_C,
101b7045ab2Sdrh  /* K */ CCLASS_C,        /* L */ CCLASS_L,       /* M */ CCLASS_M,
102b7045ab2Sdrh  /* N */ CCLASS_M,        /* O */ CCLASS_VOWEL,   /* P */ CCLASS_B,
103b7045ab2Sdrh  /* Q */ CCLASS_C,        /* R */ CCLASS_R,       /* S */ CCLASS_C,
104b7045ab2Sdrh  /* T */ CCLASS_D,        /* U */ CCLASS_VOWEL,   /* V */ CCLASS_B,
105b7045ab2Sdrh  /* W */ CCLASS_B,        /* X */ CCLASS_C,       /* Y */ CCLASS_VOWEL,
106b7045ab2Sdrh  /* Z */ CCLASS_C,        /* [ */ CCLASS_OTHER,   /* \ */ CCLASS_OTHER,
107b7045ab2Sdrh  /* ] */ CCLASS_OTHER,    /* ^ */ CCLASS_OTHER,   /* _ */ CCLASS_OTHER,
108b7045ab2Sdrh  /* ` */ CCLASS_OTHER,    /* a */ CCLASS_VOWEL,   /* b */ CCLASS_B,
109b7045ab2Sdrh  /* c */ CCLASS_C,        /* d */ CCLASS_D,       /* e */ CCLASS_VOWEL,
110b7045ab2Sdrh  /* f */ CCLASS_B,        /* g */ CCLASS_C,       /* h */ CCLASS_SILENT,
111b7045ab2Sdrh  /* i */ CCLASS_VOWEL,    /* j */ CCLASS_C,       /* k */ CCLASS_C,
112b7045ab2Sdrh  /* l */ CCLASS_L,        /* m */ CCLASS_M,       /* n */ CCLASS_M,
113b7045ab2Sdrh  /* o */ CCLASS_VOWEL,    /* p */ CCLASS_B,       /* q */ CCLASS_C,
114b7045ab2Sdrh  /* r */ CCLASS_R,        /* s */ CCLASS_C,       /* t */ CCLASS_D,
115b7045ab2Sdrh  /* u */ CCLASS_VOWEL,    /* v */ CCLASS_B,       /* w */ CCLASS_B,
116b7045ab2Sdrh  /* x */ CCLASS_C,        /* y */ CCLASS_VOWEL,   /* z */ CCLASS_C,
117b7045ab2Sdrh  /* { */ CCLASS_OTHER,    /* | */ CCLASS_OTHER,   /* } */ CCLASS_OTHER,
118b7045ab2Sdrh  /* ~ */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,
119b7045ab2Sdrh };
120b7045ab2Sdrh /*
121b7045ab2Sdrh ** This tables gives the character class for ASCII characters that form the
122b7045ab2Sdrh ** initial character of a word.  The only difference from midClass is with
123b7045ab2Sdrh ** the letters H, W, and Y.
124b7045ab2Sdrh */
125b7045ab2Sdrh static const unsigned char initClass[] = {
126b7045ab2Sdrh  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
127b7045ab2Sdrh  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
128b7045ab2Sdrh  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
129b7045ab2Sdrh  /*   */ CCLASS_SPACE,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
130b7045ab2Sdrh  /*   */ CCLASS_SPACE,    /*   */ CCLASS_SPACE,   /*   */ CCLASS_OTHER,
131b7045ab2Sdrh  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
132b7045ab2Sdrh  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
133b7045ab2Sdrh  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
134b7045ab2Sdrh  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
135b7045ab2Sdrh  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_OTHER,
136b7045ab2Sdrh  /*   */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,   /*   */ CCLASS_SPACE,
137b7045ab2Sdrh  /* ! */ CCLASS_OTHER,    /* " */ CCLASS_OTHER,   /* # */ CCLASS_OTHER,
138b7045ab2Sdrh  /* $ */ CCLASS_OTHER,    /* % */ CCLASS_OTHER,   /* & */ CCLASS_OTHER,
139b7045ab2Sdrh  /* ' */ CCLASS_OTHER,    /* ( */ CCLASS_OTHER,   /* ) */ CCLASS_OTHER,
140b7045ab2Sdrh  /* * */ CCLASS_OTHER,    /* + */ CCLASS_OTHER,   /* , */ CCLASS_OTHER,
141b7045ab2Sdrh  /* - */ CCLASS_OTHER,    /* . */ CCLASS_OTHER,   /* / */ CCLASS_OTHER,
142b7045ab2Sdrh  /* 0 */ CCLASS_DIGIT,    /* 1 */ CCLASS_DIGIT,   /* 2 */ CCLASS_DIGIT,
143b7045ab2Sdrh  /* 3 */ CCLASS_DIGIT,    /* 4 */ CCLASS_DIGIT,   /* 5 */ CCLASS_DIGIT,
144b7045ab2Sdrh  /* 6 */ CCLASS_DIGIT,    /* 7 */ CCLASS_DIGIT,   /* 8 */ CCLASS_DIGIT,
145b7045ab2Sdrh  /* 9 */ CCLASS_DIGIT,    /* : */ CCLASS_OTHER,   /* ; */ CCLASS_OTHER,
146b7045ab2Sdrh  /* < */ CCLASS_OTHER,    /* = */ CCLASS_OTHER,   /* > */ CCLASS_OTHER,
147b7045ab2Sdrh  /* ? */ CCLASS_OTHER,    /* @ */ CCLASS_OTHER,   /* A */ CCLASS_VOWEL,
148b7045ab2Sdrh  /* B */ CCLASS_B,        /* C */ CCLASS_C,       /* D */ CCLASS_D,
149b7045ab2Sdrh  /* E */ CCLASS_VOWEL,    /* F */ CCLASS_B,       /* G */ CCLASS_C,
150b7045ab2Sdrh  /* H */ CCLASS_SILENT,   /* I */ CCLASS_VOWEL,   /* J */ CCLASS_C,
151b7045ab2Sdrh  /* K */ CCLASS_C,        /* L */ CCLASS_L,       /* M */ CCLASS_M,
152b7045ab2Sdrh  /* N */ CCLASS_M,        /* O */ CCLASS_VOWEL,   /* P */ CCLASS_B,
153b7045ab2Sdrh  /* Q */ CCLASS_C,        /* R */ CCLASS_R,       /* S */ CCLASS_C,
154b7045ab2Sdrh  /* T */ CCLASS_D,        /* U */ CCLASS_VOWEL,   /* V */ CCLASS_B,
155b7045ab2Sdrh  /* W */ CCLASS_B,        /* X */ CCLASS_C,       /* Y */ CCLASS_Y,
156b7045ab2Sdrh  /* Z */ CCLASS_C,        /* [ */ CCLASS_OTHER,   /* \ */ CCLASS_OTHER,
157b7045ab2Sdrh  /* ] */ CCLASS_OTHER,    /* ^ */ CCLASS_OTHER,   /* _ */ CCLASS_OTHER,
158b7045ab2Sdrh  /* ` */ CCLASS_OTHER,    /* a */ CCLASS_VOWEL,   /* b */ CCLASS_B,
159b7045ab2Sdrh  /* c */ CCLASS_C,        /* d */ CCLASS_D,       /* e */ CCLASS_VOWEL,
160b7045ab2Sdrh  /* f */ CCLASS_B,        /* g */ CCLASS_C,       /* h */ CCLASS_SILENT,
161b7045ab2Sdrh  /* i */ CCLASS_VOWEL,    /* j */ CCLASS_C,       /* k */ CCLASS_C,
162b7045ab2Sdrh  /* l */ CCLASS_L,        /* m */ CCLASS_M,       /* n */ CCLASS_M,
163b7045ab2Sdrh  /* o */ CCLASS_VOWEL,    /* p */ CCLASS_B,       /* q */ CCLASS_C,
164b7045ab2Sdrh  /* r */ CCLASS_R,        /* s */ CCLASS_C,       /* t */ CCLASS_D,
165b7045ab2Sdrh  /* u */ CCLASS_VOWEL,    /* v */ CCLASS_B,       /* w */ CCLASS_B,
166b7045ab2Sdrh  /* x */ CCLASS_C,        /* y */ CCLASS_Y,       /* z */ CCLASS_C,
167b7045ab2Sdrh  /* { */ CCLASS_OTHER,    /* | */ CCLASS_OTHER,   /* } */ CCLASS_OTHER,
168b7045ab2Sdrh  /* ~ */ CCLASS_OTHER,    /*   */ CCLASS_OTHER,
169b7045ab2Sdrh };
170b7045ab2Sdrh 
171b7045ab2Sdrh /*
172b7045ab2Sdrh ** Mapping from the character class number (0-13) to a symbol for each
173b7045ab2Sdrh ** character class.  Note that initClass[] can be used to map the class
174b7045ab2Sdrh ** symbol back into the class number.
175b7045ab2Sdrh */
176b7045ab2Sdrh static const unsigned char className[] = ".ABCDHLRMY9 ?";
177b7045ab2Sdrh 
178b7045ab2Sdrh /*
179b7045ab2Sdrh ** Generate a "phonetic hash" from a string of ASCII characters
180b7045ab2Sdrh ** in zIn[0..nIn-1].
181b7045ab2Sdrh **
182b7045ab2Sdrh **   * Map characters by character class as defined above.
183b7045ab2Sdrh **   * Omit double-letters
184b7045ab2Sdrh **   * Omit vowels beside R and L
185b7045ab2Sdrh **   * Omit T when followed by CH
186b7045ab2Sdrh **   * Omit W when followed by R
187b7045ab2Sdrh **   * Omit D when followed by J or G
188b7045ab2Sdrh **   * Omit K in KN or G in GN at the beginning of a word
189b7045ab2Sdrh **
190b7045ab2Sdrh ** Space to hold the result is obtained from sqlite3_malloc()
191b7045ab2Sdrh **
192b7045ab2Sdrh ** Return NULL if memory allocation fails.
193b7045ab2Sdrh */
phoneticHash(const unsigned char * zIn,int nIn)194b7045ab2Sdrh static unsigned char *phoneticHash(const unsigned char *zIn, int nIn){
195c4703eedSdrh   unsigned char *zOut = sqlite3_malloc64( nIn + 1 );
196b7045ab2Sdrh   int i;
197b7045ab2Sdrh   int nOut = 0;
198b7045ab2Sdrh   char cPrev = 0x77;
199b7045ab2Sdrh   char cPrevX = 0x77;
200b7045ab2Sdrh   const unsigned char *aClass = initClass;
201b7045ab2Sdrh 
202b7045ab2Sdrh   if( zOut==0 ) return 0;
203b7045ab2Sdrh   if( nIn>2 ){
204b7045ab2Sdrh     switch( zIn[0] ){
205b7045ab2Sdrh       case 'g':
206b7045ab2Sdrh       case 'k': {
207b7045ab2Sdrh         if( zIn[1]=='n' ){ zIn++; nIn--; }
208b7045ab2Sdrh         break;
209b7045ab2Sdrh       }
210b7045ab2Sdrh     }
211b7045ab2Sdrh   }
212b7045ab2Sdrh   for(i=0; i<nIn; i++){
213b7045ab2Sdrh     unsigned char c = zIn[i];
214b7045ab2Sdrh     if( i+1<nIn ){
215b7045ab2Sdrh       if( c=='w' && zIn[i+1]=='r' ) continue;
216b7045ab2Sdrh       if( c=='d' && (zIn[i+1]=='j' || zIn[i+1]=='g') ) continue;
217b7045ab2Sdrh       if( i+2<nIn ){
218b7045ab2Sdrh         if( c=='t' && zIn[i+1]=='c' && zIn[i+2]=='h' ) continue;
219b7045ab2Sdrh       }
220b7045ab2Sdrh     }
221b7045ab2Sdrh     c = aClass[c&0x7f];
222b7045ab2Sdrh     if( c==CCLASS_SPACE ) continue;
223b7045ab2Sdrh     if( c==CCLASS_OTHER && cPrev!=CCLASS_DIGIT ) continue;
224b7045ab2Sdrh     aClass = midClass;
225b7045ab2Sdrh     if( c==CCLASS_VOWEL && (cPrevX==CCLASS_R || cPrevX==CCLASS_L) ){
226b7045ab2Sdrh        continue; /* No vowels beside L or R */
227b7045ab2Sdrh     }
228b7045ab2Sdrh     if( (c==CCLASS_R || c==CCLASS_L) && cPrevX==CCLASS_VOWEL ){
229b7045ab2Sdrh        nOut--;   /* No vowels beside L or R */
230b7045ab2Sdrh     }
231b7045ab2Sdrh     cPrev = c;
232b7045ab2Sdrh     if( c==CCLASS_SILENT ) continue;
233b7045ab2Sdrh     cPrevX = c;
234b7045ab2Sdrh     c = className[c];
235b7045ab2Sdrh     assert( nOut>=0 );
236b7045ab2Sdrh     if( nOut==0 || c!=zOut[nOut-1] ) zOut[nOut++] = c;
237b7045ab2Sdrh   }
238b7045ab2Sdrh   zOut[nOut] = 0;
239b7045ab2Sdrh   return zOut;
240b7045ab2Sdrh }
241b7045ab2Sdrh 
242b7045ab2Sdrh /*
243b7045ab2Sdrh ** This is an SQL function wrapper around phoneticHash().  See
244b7045ab2Sdrh ** the description of phoneticHash() for additional information.
245b7045ab2Sdrh */
phoneticHashSqlFunc(sqlite3_context * context,int argc,sqlite3_value ** argv)246b7045ab2Sdrh static void phoneticHashSqlFunc(
247b7045ab2Sdrh   sqlite3_context *context,
248b7045ab2Sdrh   int argc,
249b7045ab2Sdrh   sqlite3_value **argv
250b7045ab2Sdrh ){
251b7045ab2Sdrh   const unsigned char *zIn;
252b7045ab2Sdrh   unsigned char *zOut;
253b7045ab2Sdrh 
254b7045ab2Sdrh   zIn = sqlite3_value_text(argv[0]);
255b7045ab2Sdrh   if( zIn==0 ) return;
256b7045ab2Sdrh   zOut = phoneticHash(zIn, sqlite3_value_bytes(argv[0]));
257b7045ab2Sdrh   if( zOut==0 ){
258b7045ab2Sdrh     sqlite3_result_error_nomem(context);
259b7045ab2Sdrh   }else{
260b7045ab2Sdrh     sqlite3_result_text(context, (char*)zOut, -1, sqlite3_free);
261b7045ab2Sdrh   }
262b7045ab2Sdrh }
263b7045ab2Sdrh 
264b7045ab2Sdrh /*
265b7045ab2Sdrh ** Return the character class number for a character given its
266b7045ab2Sdrh ** context.
267b7045ab2Sdrh */
characterClass(char cPrev,char c)268b7045ab2Sdrh static char characterClass(char cPrev, char c){
269b7045ab2Sdrh   return cPrev==0 ? initClass[c&0x7f] : midClass[c&0x7f];
270b7045ab2Sdrh }
271b7045ab2Sdrh 
272b7045ab2Sdrh /*
273b7045ab2Sdrh ** Return the cost of inserting or deleting character c immediately
274b7045ab2Sdrh ** following character cPrev.  If cPrev==0, that means c is the first
275b7045ab2Sdrh ** character of the word.
276b7045ab2Sdrh */
insertOrDeleteCost(char cPrev,char c,char cNext)277b7045ab2Sdrh static int insertOrDeleteCost(char cPrev, char c, char cNext){
278b7045ab2Sdrh   char classC = characterClass(cPrev, c);
279b7045ab2Sdrh   char classCprev;
280b7045ab2Sdrh 
281b7045ab2Sdrh   if( classC==CCLASS_SILENT ){
282b7045ab2Sdrh     /* Insert or delete "silent" characters such as H or W */
283b7045ab2Sdrh     return 1;
284b7045ab2Sdrh   }
285b7045ab2Sdrh   if( cPrev==c ){
286b7045ab2Sdrh     /* Repeated characters, or miss a repeat */
287b7045ab2Sdrh     return 10;
288b7045ab2Sdrh   }
289b7045ab2Sdrh   if( classC==CCLASS_VOWEL && (cPrev=='r' || cNext=='r') ){
290b7045ab2Sdrh     return 20;  /* Insert a vowel before or after 'r' */
291b7045ab2Sdrh   }
292b7045ab2Sdrh   classCprev = characterClass(cPrev, cPrev);
293b7045ab2Sdrh   if( classC==classCprev ){
294b7045ab2Sdrh     if( classC==CCLASS_VOWEL ){
295b7045ab2Sdrh       /* Remove or add a new vowel to a vowel cluster */
296b7045ab2Sdrh       return 15;
297b7045ab2Sdrh     }else{
298b7045ab2Sdrh       /* Remove or add a consonant not in the same class */
299b7045ab2Sdrh       return 50;
300b7045ab2Sdrh     }
301b7045ab2Sdrh   }
302b7045ab2Sdrh 
303b7045ab2Sdrh   /* any other character insertion or deletion */
304b7045ab2Sdrh   return 100;
305b7045ab2Sdrh }
306b7045ab2Sdrh 
307b7045ab2Sdrh /*
308b7045ab2Sdrh ** Divide the insertion cost by this factor when appending to the
309b7045ab2Sdrh ** end of the word.
310b7045ab2Sdrh */
311b7045ab2Sdrh #define FINAL_INS_COST_DIV  4
312b7045ab2Sdrh 
313b7045ab2Sdrh /*
314b7045ab2Sdrh ** Return the cost of substituting cTo in place of cFrom assuming
315b7045ab2Sdrh ** the previous character is cPrev.  If cPrev==0 then cTo is the first
316b7045ab2Sdrh ** character of the word.
317b7045ab2Sdrh */
substituteCost(char cPrev,char cFrom,char cTo)318b7045ab2Sdrh static int substituteCost(char cPrev, char cFrom, char cTo){
319b7045ab2Sdrh   char classFrom, classTo;
320b7045ab2Sdrh   if( cFrom==cTo ){
321b7045ab2Sdrh     /* Exact match */
322b7045ab2Sdrh     return 0;
323b7045ab2Sdrh   }
324b7045ab2Sdrh   if( cFrom==(cTo^0x20) && ((cTo>='A' && cTo<='Z') || (cTo>='a' && cTo<='z')) ){
325b7045ab2Sdrh     /* differ only in case */
326b7045ab2Sdrh     return 0;
327b7045ab2Sdrh   }
328b7045ab2Sdrh   classFrom = characterClass(cPrev, cFrom);
329b7045ab2Sdrh   classTo = characterClass(cPrev, cTo);
330b7045ab2Sdrh   if( classFrom==classTo ){
331b7045ab2Sdrh     /* Same character class */
332b7045ab2Sdrh     return 40;
333b7045ab2Sdrh   }
334b7045ab2Sdrh   if( classFrom>=CCLASS_B && classFrom<=CCLASS_Y
335b7045ab2Sdrh       && classTo>=CCLASS_B && classTo<=CCLASS_Y ){
336b7045ab2Sdrh     /* Convert from one consonant to another, but in a different class */
337b7045ab2Sdrh     return 75;
338b7045ab2Sdrh   }
339b7045ab2Sdrh   /* Any other subsitution */
340b7045ab2Sdrh   return 100;
341b7045ab2Sdrh }
342b7045ab2Sdrh 
343b7045ab2Sdrh /*
344b7045ab2Sdrh ** Given two strings zA and zB which are pure ASCII, return the cost
345b7045ab2Sdrh ** of transforming zA into zB.  If zA ends with '*' assume that it is
346b7045ab2Sdrh ** a prefix of zB and give only minimal penalty for extra characters
347b7045ab2Sdrh ** on the end of zB.
348b7045ab2Sdrh **
349b7045ab2Sdrh ** Smaller numbers mean a closer match.
350b7045ab2Sdrh **
351b7045ab2Sdrh ** Negative values indicate an error:
352b7045ab2Sdrh **    -1  One of the inputs is NULL
353b7045ab2Sdrh **    -2  Non-ASCII characters on input
354b7045ab2Sdrh **    -3  Unable to allocate memory
355b7045ab2Sdrh **
356b7045ab2Sdrh ** If pnMatch is not NULL, then *pnMatch is set to the number of bytes
357b7045ab2Sdrh ** of zB that matched the pattern in zA. If zA does not end with a '*',
358b7045ab2Sdrh ** then this value is always the number of bytes in zB (i.e. strlen(zB)).
359b7045ab2Sdrh ** If zA does end in a '*', then it is the number of bytes in the prefix
360b7045ab2Sdrh ** of zB that was deemed to match zA.
361b7045ab2Sdrh */
editdist1(const char * zA,const char * zB,int * pnMatch)362b7045ab2Sdrh static int editdist1(const char *zA, const char *zB, int *pnMatch){
363b7045ab2Sdrh   int nA, nB;            /* Number of characters in zA[] and zB[] */
364b7045ab2Sdrh   int xA, xB;            /* Loop counters for zA[] and zB[] */
3657bb22ac7Smistachkin   char cA = 0, cB;       /* Current character of zA and zB */
366b7045ab2Sdrh   char cAprev, cBprev;   /* Previous character of zA and zB */
367b7045ab2Sdrh   char cAnext, cBnext;   /* Next character in zA and zB */
368b7045ab2Sdrh   int d;                 /* North-west cost value */
369b7045ab2Sdrh   int dc = 0;            /* North-west character value */
370b7045ab2Sdrh   int res;               /* Final result */
371b7045ab2Sdrh   int *m;                /* The cost matrix */
372b7045ab2Sdrh   char *cx;              /* Corresponding character values */
373b7045ab2Sdrh   int *toFree = 0;       /* Malloced space */
374b7045ab2Sdrh   int nMatch = 0;
375c6aab321Sdrh   int mStack[60+15];     /* Stack space to use if not too much is needed */
376b7045ab2Sdrh 
377b7045ab2Sdrh   /* Early out if either input is NULL */
378b7045ab2Sdrh   if( zA==0 || zB==0 ) return -1;
379b7045ab2Sdrh 
380b7045ab2Sdrh   /* Skip any common prefix */
381b7045ab2Sdrh   while( zA[0] && zA[0]==zB[0] ){ dc = zA[0]; zA++; zB++; nMatch++; }
382b7045ab2Sdrh   if( pnMatch ) *pnMatch = nMatch;
383b7045ab2Sdrh   if( zA[0]==0 && zB[0]==0 ) return 0;
384b7045ab2Sdrh 
385b7045ab2Sdrh #if 0
386b7045ab2Sdrh   printf("A=\"%s\" B=\"%s\" dc=%c\n", zA, zB, dc?dc:' ');
387b7045ab2Sdrh #endif
388b7045ab2Sdrh 
389b7045ab2Sdrh   /* Verify input strings and measure their lengths */
390b7045ab2Sdrh   for(nA=0; zA[nA]; nA++){
391b7045ab2Sdrh     if( zA[nA]&0x80 ) return -2;
392b7045ab2Sdrh   }
393b7045ab2Sdrh   for(nB=0; zB[nB]; nB++){
394b7045ab2Sdrh     if( zB[nB]&0x80 ) return -2;
395b7045ab2Sdrh   }
396b7045ab2Sdrh 
397b7045ab2Sdrh   /* Special processing if either string is empty */
398b7045ab2Sdrh   if( nA==0 ){
39977fac879Smistachkin     cBprev = (char)dc;
400b7045ab2Sdrh     for(xB=res=0; (cB = zB[xB])!=0; xB++){
401b7045ab2Sdrh       res += insertOrDeleteCost(cBprev, cB, zB[xB+1])/FINAL_INS_COST_DIV;
402b7045ab2Sdrh       cBprev = cB;
403b7045ab2Sdrh     }
404b7045ab2Sdrh     return res;
405b7045ab2Sdrh   }
406b7045ab2Sdrh   if( nB==0 ){
40777fac879Smistachkin     cAprev = (char)dc;
408b7045ab2Sdrh     for(xA=res=0; (cA = zA[xA])!=0; xA++){
409b7045ab2Sdrh       res += insertOrDeleteCost(cAprev, cA, zA[xA+1]);
410b7045ab2Sdrh       cAprev = cA;
411b7045ab2Sdrh     }
412b7045ab2Sdrh     return res;
413b7045ab2Sdrh   }
414b7045ab2Sdrh 
415b7045ab2Sdrh   /* A is a prefix of B */
416b7045ab2Sdrh   if( zA[0]=='*' && zA[1]==0 ) return 0;
417b7045ab2Sdrh 
418b7045ab2Sdrh   /* Allocate and initialize the Wagner matrix */
419b7045ab2Sdrh   if( nB<(sizeof(mStack)*4)/(sizeof(mStack[0])*5) ){
420b7045ab2Sdrh     m = mStack;
421b7045ab2Sdrh   }else{
422c4703eedSdrh     m = toFree = sqlite3_malloc64( (nB+1)*5*sizeof(m[0])/4 );
423b7045ab2Sdrh     if( m==0 ) return -3;
424b7045ab2Sdrh   }
425b7045ab2Sdrh   cx = (char*)&m[nB+1];
426b7045ab2Sdrh 
427b7045ab2Sdrh   /* Compute the Wagner edit distance */
428b7045ab2Sdrh   m[0] = 0;
42977fac879Smistachkin   cx[0] = (char)dc;
43077fac879Smistachkin   cBprev = (char)dc;
431b7045ab2Sdrh   for(xB=1; xB<=nB; xB++){
432b7045ab2Sdrh     cBnext = zB[xB];
433b7045ab2Sdrh     cB = zB[xB-1];
434b7045ab2Sdrh     cx[xB] = cB;
435b7045ab2Sdrh     m[xB] = m[xB-1] + insertOrDeleteCost(cBprev, cB, cBnext);
436b7045ab2Sdrh     cBprev = cB;
437b7045ab2Sdrh   }
43877fac879Smistachkin   cAprev = (char)dc;
439b7045ab2Sdrh   for(xA=1; xA<=nA; xA++){
440b7045ab2Sdrh     int lastA = (xA==nA);
441b7045ab2Sdrh     cA = zA[xA-1];
442b7045ab2Sdrh     cAnext = zA[xA];
443b7045ab2Sdrh     if( cA=='*' && lastA ) break;
444b7045ab2Sdrh     d = m[0];
445b7045ab2Sdrh     dc = cx[0];
446b7045ab2Sdrh     m[0] = d + insertOrDeleteCost(cAprev, cA, cAnext);
447b7045ab2Sdrh     cBprev = 0;
448b7045ab2Sdrh     for(xB=1; xB<=nB; xB++){
449b7045ab2Sdrh       int totalCost, insCost, delCost, subCost, ncx;
450b7045ab2Sdrh       cB = zB[xB-1];
451b7045ab2Sdrh       cBnext = zB[xB];
452b7045ab2Sdrh 
453b7045ab2Sdrh       /* Cost to insert cB */
454b7045ab2Sdrh       insCost = insertOrDeleteCost(cx[xB-1], cB, cBnext);
455b7045ab2Sdrh       if( lastA ) insCost /= FINAL_INS_COST_DIV;
456b7045ab2Sdrh 
457b7045ab2Sdrh       /* Cost to delete cA */
458b7045ab2Sdrh       delCost = insertOrDeleteCost(cx[xB], cA, cBnext);
459b7045ab2Sdrh 
460b7045ab2Sdrh       /* Cost to substitute cA->cB */
461b7045ab2Sdrh       subCost = substituteCost(cx[xB-1], cA, cB);
462b7045ab2Sdrh 
463b7045ab2Sdrh       /* Best cost */
464b7045ab2Sdrh       totalCost = insCost + m[xB-1];
465b7045ab2Sdrh       ncx = cB;
466b7045ab2Sdrh       if( (delCost + m[xB])<totalCost ){
467b7045ab2Sdrh         totalCost = delCost + m[xB];
468b7045ab2Sdrh         ncx = cA;
469b7045ab2Sdrh       }
470b7045ab2Sdrh       if( (subCost + d)<totalCost ){
471b7045ab2Sdrh         totalCost = subCost + d;
472b7045ab2Sdrh       }
473b7045ab2Sdrh 
474b7045ab2Sdrh #if 0
475b7045ab2Sdrh       printf("%d,%d d=%4d u=%4d r=%4d dc=%c cA=%c cB=%c"
476b7045ab2Sdrh              " ins=%4d del=%4d sub=%4d t=%4d ncx=%c\n",
477b7045ab2Sdrh              xA, xB, d, m[xB], m[xB-1], dc?dc:' ', cA, cB,
478b7045ab2Sdrh              insCost, delCost, subCost, totalCost, ncx?ncx:' ');
479b7045ab2Sdrh #endif
480b7045ab2Sdrh 
481b7045ab2Sdrh       /* Update the matrix */
482b7045ab2Sdrh       d = m[xB];
483b7045ab2Sdrh       dc = cx[xB];
484b7045ab2Sdrh       m[xB] = totalCost;
48577fac879Smistachkin       cx[xB] = (char)ncx;
486b7045ab2Sdrh       cBprev = cB;
487b7045ab2Sdrh     }
488b7045ab2Sdrh     cAprev = cA;
489b7045ab2Sdrh   }
490b7045ab2Sdrh 
491b7045ab2Sdrh   /* Free the wagner matrix and return the result */
492b7045ab2Sdrh   if( cA=='*' ){
493b7045ab2Sdrh     res = m[1];
494b7045ab2Sdrh     for(xB=1; xB<=nB; xB++){
495b7045ab2Sdrh       if( m[xB]<res ){
496b7045ab2Sdrh         res = m[xB];
497b7045ab2Sdrh         if( pnMatch ) *pnMatch = xB+nMatch;
498b7045ab2Sdrh       }
499b7045ab2Sdrh     }
500b7045ab2Sdrh   }else{
501b7045ab2Sdrh     res = m[nB];
502b7045ab2Sdrh     /* In the current implementation, pnMatch is always NULL if zA does
503b7045ab2Sdrh     ** not end in "*" */
504b7045ab2Sdrh     assert( pnMatch==0 );
505b7045ab2Sdrh   }
506b7045ab2Sdrh   sqlite3_free(toFree);
507b7045ab2Sdrh   return res;
508b7045ab2Sdrh }
509b7045ab2Sdrh 
510b7045ab2Sdrh /*
511b7045ab2Sdrh ** Function:    editdist(A,B)
512b7045ab2Sdrh **
513b7045ab2Sdrh ** Return the cost of transforming string A into string B.  Both strings
514b7045ab2Sdrh ** must be pure ASCII text.  If A ends with '*' then it is assumed to be
515b7045ab2Sdrh ** a prefix of B and extra characters on the end of B have minimal additional
516b7045ab2Sdrh ** cost.
517b7045ab2Sdrh */
editdistSqlFunc(sqlite3_context * context,int argc,sqlite3_value ** argv)518b7045ab2Sdrh static void editdistSqlFunc(
519b7045ab2Sdrh   sqlite3_context *context,
520b7045ab2Sdrh   int argc,
521b7045ab2Sdrh   sqlite3_value **argv
522b7045ab2Sdrh ){
523b7045ab2Sdrh   int res = editdist1(
524b7045ab2Sdrh                     (const char*)sqlite3_value_text(argv[0]),
525b7045ab2Sdrh                     (const char*)sqlite3_value_text(argv[1]),
526b7045ab2Sdrh                     0);
527b7045ab2Sdrh   if( res<0 ){
528b7045ab2Sdrh     if( res==(-3) ){
529b7045ab2Sdrh       sqlite3_result_error_nomem(context);
530b7045ab2Sdrh     }else if( res==(-2) ){
531b7045ab2Sdrh       sqlite3_result_error(context, "non-ASCII input to editdist()", -1);
532b7045ab2Sdrh     }else{
533b7045ab2Sdrh       sqlite3_result_error(context, "NULL input to editdist()", -1);
534b7045ab2Sdrh     }
535b7045ab2Sdrh   }else{
536b7045ab2Sdrh     sqlite3_result_int(context, res);
537b7045ab2Sdrh   }
538b7045ab2Sdrh }
539b7045ab2Sdrh 
540b7045ab2Sdrh /* End of the fixed-cost edit distance implementation
541b7045ab2Sdrh ******************************************************************************
542b7045ab2Sdrh *****************************************************************************
543b7045ab2Sdrh ** Begin: Configurable cost unicode edit distance routines
544b7045ab2Sdrh */
545b7045ab2Sdrh /* Forward declaration of structures */
546b7045ab2Sdrh typedef struct EditDist3Cost EditDist3Cost;
547b7045ab2Sdrh typedef struct EditDist3Config EditDist3Config;
548b7045ab2Sdrh typedef struct EditDist3Point EditDist3Point;
549b7045ab2Sdrh typedef struct EditDist3From EditDist3From;
550b7045ab2Sdrh typedef struct EditDist3FromString EditDist3FromString;
551b7045ab2Sdrh typedef struct EditDist3To EditDist3To;
552b7045ab2Sdrh typedef struct EditDist3ToString EditDist3ToString;
553b7045ab2Sdrh typedef struct EditDist3Lang EditDist3Lang;
554b7045ab2Sdrh 
555b7045ab2Sdrh 
556b7045ab2Sdrh /*
557b7045ab2Sdrh ** An entry in the edit cost table
558b7045ab2Sdrh */
559b7045ab2Sdrh struct EditDist3Cost {
560b7045ab2Sdrh   EditDist3Cost *pNext;     /* Next cost element */
561b7045ab2Sdrh   u8 nFrom;                 /* Number of bytes in aFrom */
562b7045ab2Sdrh   u8 nTo;                   /* Number of bytes in aTo */
563b7045ab2Sdrh   u16 iCost;                /* Cost of this transformation */
564b7045ab2Sdrh   char a[4]    ;            /* FROM string followed by TO string */
565b7045ab2Sdrh   /* Additional TO and FROM string bytes appended as necessary */
566b7045ab2Sdrh };
567b7045ab2Sdrh 
568b7045ab2Sdrh /*
569b7045ab2Sdrh ** Edit costs for a particular language ID
570b7045ab2Sdrh */
571b7045ab2Sdrh struct EditDist3Lang {
572b7045ab2Sdrh   int iLang;             /* Language ID */
573b7045ab2Sdrh   int iInsCost;          /* Default insertion cost */
574b7045ab2Sdrh   int iDelCost;          /* Default deletion cost */
575b7045ab2Sdrh   int iSubCost;          /* Default substitution cost */
576b7045ab2Sdrh   EditDist3Cost *pCost;  /* Costs */
577b7045ab2Sdrh };
578b7045ab2Sdrh 
579b7045ab2Sdrh 
580b7045ab2Sdrh /*
581b7045ab2Sdrh ** The default EditDist3Lang object, with default costs.
582b7045ab2Sdrh */
583b7045ab2Sdrh static const EditDist3Lang editDist3Lang = { 0, 100, 100, 150, 0 };
584b7045ab2Sdrh 
585b7045ab2Sdrh /*
586b7045ab2Sdrh ** Complete configuration
587b7045ab2Sdrh */
588b7045ab2Sdrh struct EditDist3Config {
589b7045ab2Sdrh   int nLang;             /* Number of language IDs.  Size of a[] */
590b7045ab2Sdrh   EditDist3Lang *a;      /* One for each distinct language ID */
591b7045ab2Sdrh };
592b7045ab2Sdrh 
593b7045ab2Sdrh /*
594b7045ab2Sdrh ** Extra information about each character in the FROM string.
595b7045ab2Sdrh */
596b7045ab2Sdrh struct EditDist3From {
597b7045ab2Sdrh   int nSubst;              /* Number of substitution cost entries */
598b7045ab2Sdrh   int nDel;                /* Number of deletion cost entries */
599b7045ab2Sdrh   int nByte;               /* Number of bytes in this character */
600b7045ab2Sdrh   EditDist3Cost **apSubst; /* Array of substitution costs for this element */
601b7045ab2Sdrh   EditDist3Cost **apDel;   /* Array of deletion cost entries */
602b7045ab2Sdrh };
603b7045ab2Sdrh 
604b7045ab2Sdrh /*
605b7045ab2Sdrh ** A precompiled FROM string.
606b7045ab2Sdrh *
607b7045ab2Sdrh ** In the common case we expect the FROM string to be reused multiple times.
608b7045ab2Sdrh ** In other words, the common case will be to measure the edit distance
609b7045ab2Sdrh ** from a single origin string to multiple target strings.
610b7045ab2Sdrh */
611b7045ab2Sdrh struct EditDist3FromString {
612b7045ab2Sdrh   char *z;                 /* The complete text of the FROM string */
613b7045ab2Sdrh   int n;                   /* Number of characters in the FROM string */
614b7045ab2Sdrh   int isPrefix;            /* True if ends with '*' character */
615b7045ab2Sdrh   EditDist3From *a;        /* Extra info about each char of the FROM string */
616b7045ab2Sdrh };
617b7045ab2Sdrh 
618b7045ab2Sdrh /*
619b7045ab2Sdrh ** Extra information about each character in the TO string.
620b7045ab2Sdrh */
621b7045ab2Sdrh struct EditDist3To {
622b7045ab2Sdrh   int nIns;                /* Number of insertion cost entries */
623b7045ab2Sdrh   int nByte;               /* Number of bytes in this character */
624b7045ab2Sdrh   EditDist3Cost **apIns;   /* Array of deletion cost entries */
625b7045ab2Sdrh };
626b7045ab2Sdrh 
627b7045ab2Sdrh /*
628b7045ab2Sdrh ** A precompiled FROM string
629b7045ab2Sdrh */
630b7045ab2Sdrh struct EditDist3ToString {
631b7045ab2Sdrh   char *z;                 /* The complete text of the TO string */
632b7045ab2Sdrh   int n;                   /* Number of characters in the TO string */
633b7045ab2Sdrh   EditDist3To *a;          /* Extra info about each char of the TO string */
634b7045ab2Sdrh };
635b7045ab2Sdrh 
636b7045ab2Sdrh /*
637b7045ab2Sdrh ** Clear or delete an instance of the object that records all edit-distance
638b7045ab2Sdrh ** weights.
639b7045ab2Sdrh */
editDist3ConfigClear(EditDist3Config * p)640b7045ab2Sdrh static void editDist3ConfigClear(EditDist3Config *p){
641b7045ab2Sdrh   int i;
642b7045ab2Sdrh   if( p==0 ) return;
643b7045ab2Sdrh   for(i=0; i<p->nLang; i++){
644b7045ab2Sdrh     EditDist3Cost *pCost, *pNext;
645b7045ab2Sdrh     pCost = p->a[i].pCost;
646b7045ab2Sdrh     while( pCost ){
647b7045ab2Sdrh       pNext = pCost->pNext;
648b7045ab2Sdrh       sqlite3_free(pCost);
649b7045ab2Sdrh       pCost = pNext;
650b7045ab2Sdrh     }
651b7045ab2Sdrh   }
652b7045ab2Sdrh   sqlite3_free(p->a);
653b7045ab2Sdrh   memset(p, 0, sizeof(*p));
654b7045ab2Sdrh }
editDist3ConfigDelete(void * pIn)655b7045ab2Sdrh static void editDist3ConfigDelete(void *pIn){
656b7045ab2Sdrh   EditDist3Config *p = (EditDist3Config*)pIn;
657b7045ab2Sdrh   editDist3ConfigClear(p);
658b7045ab2Sdrh   sqlite3_free(p);
659b7045ab2Sdrh }
660b7045ab2Sdrh 
661f4bc6c43Sdrh /* Compare the FROM values of two EditDist3Cost objects, for sorting.
662f4bc6c43Sdrh ** Return negative, zero, or positive if the A is less than, equal to,
663f4bc6c43Sdrh ** or greater than B.
664f4bc6c43Sdrh */
editDist3CostCompare(EditDist3Cost * pA,EditDist3Cost * pB)665f4bc6c43Sdrh static int editDist3CostCompare(EditDist3Cost *pA, EditDist3Cost *pB){
666f4bc6c43Sdrh   int n = pA->nFrom;
667f4bc6c43Sdrh   int rc;
668f4bc6c43Sdrh   if( n>pB->nFrom ) n = pB->nFrom;
669f4bc6c43Sdrh   rc = strncmp(pA->a, pB->a, n);
670f4bc6c43Sdrh   if( rc==0 ) rc = pA->nFrom - pB->nFrom;
671f4bc6c43Sdrh   return rc;
672f4bc6c43Sdrh }
673f4bc6c43Sdrh 
674f4bc6c43Sdrh /*
675f4bc6c43Sdrh ** Merge together two sorted lists of EditDist3Cost objects, in order
676f4bc6c43Sdrh ** of increasing FROM.
677f4bc6c43Sdrh */
editDist3CostMerge(EditDist3Cost * pA,EditDist3Cost * pB)678f4bc6c43Sdrh static EditDist3Cost *editDist3CostMerge(
679f4bc6c43Sdrh   EditDist3Cost *pA,
680f4bc6c43Sdrh   EditDist3Cost *pB
681f4bc6c43Sdrh ){
682f4bc6c43Sdrh   EditDist3Cost *pHead = 0;
683f4bc6c43Sdrh   EditDist3Cost **ppTail = &pHead;
684f4bc6c43Sdrh   EditDist3Cost *p;
685f4bc6c43Sdrh   while( pA && pB ){
686f4bc6c43Sdrh     if( editDist3CostCompare(pA,pB)<=0 ){
687f4bc6c43Sdrh       p = pA;
688f4bc6c43Sdrh       pA = pA->pNext;
689f4bc6c43Sdrh     }else{
690f4bc6c43Sdrh       p = pB;
691f4bc6c43Sdrh       pB = pB->pNext;
692f4bc6c43Sdrh     }
693f4bc6c43Sdrh     *ppTail = p;
694f4bc6c43Sdrh     ppTail =  &p->pNext;
695f4bc6c43Sdrh   }
696f4bc6c43Sdrh   if( pA ){
697f4bc6c43Sdrh     *ppTail = pA;
698f4bc6c43Sdrh   }else{
699f4bc6c43Sdrh     *ppTail = pB;
700f4bc6c43Sdrh   }
701f4bc6c43Sdrh   return pHead;
702f4bc6c43Sdrh }
703f4bc6c43Sdrh 
704f4bc6c43Sdrh /*
705f4bc6c43Sdrh ** Sort a list of EditDist3Cost objects into order of increasing FROM
706f4bc6c43Sdrh */
editDist3CostSort(EditDist3Cost * pList)707f4bc6c43Sdrh static EditDist3Cost *editDist3CostSort(EditDist3Cost *pList){
708f4bc6c43Sdrh   EditDist3Cost *ap[60], *p;
709f4bc6c43Sdrh   int i;
710f4bc6c43Sdrh   int mx = 0;
711f4bc6c43Sdrh   ap[0] = 0;
712f4bc6c43Sdrh   ap[1] = 0;
713f4bc6c43Sdrh   while( pList ){
714f4bc6c43Sdrh     p = pList;
715f4bc6c43Sdrh     pList = p->pNext;
716f4bc6c43Sdrh     p->pNext = 0;
717f4bc6c43Sdrh     for(i=0; ap[i]; i++){
718f4bc6c43Sdrh       p = editDist3CostMerge(ap[i],p);
719f4bc6c43Sdrh       ap[i] = 0;
720f4bc6c43Sdrh     }
721f4bc6c43Sdrh     ap[i] = p;
722f4bc6c43Sdrh     if( i>mx ){
723f4bc6c43Sdrh       mx = i;
724f4bc6c43Sdrh       ap[i+1] = 0;
725f4bc6c43Sdrh     }
726f4bc6c43Sdrh   }
727f4bc6c43Sdrh   p = 0;
728f4bc6c43Sdrh   for(i=0; i<=mx; i++){
729f4bc6c43Sdrh     if( ap[i] ) p = editDist3CostMerge(p,ap[i]);
730f4bc6c43Sdrh   }
731f4bc6c43Sdrh   return p;
732f4bc6c43Sdrh }
733f4bc6c43Sdrh 
734b7045ab2Sdrh /*
735b7045ab2Sdrh ** Load all edit-distance weights from a table.
736b7045ab2Sdrh */
editDist3ConfigLoad(EditDist3Config * p,sqlite3 * db,const char * zTable)737b7045ab2Sdrh static int editDist3ConfigLoad(
738b7045ab2Sdrh   EditDist3Config *p,      /* The edit distance configuration to load */
739b7045ab2Sdrh   sqlite3 *db,            /* Load from this database */
740b7045ab2Sdrh   const char *zTable      /* Name of the table from which to load */
741b7045ab2Sdrh ){
742b7045ab2Sdrh   sqlite3_stmt *pStmt;
743b7045ab2Sdrh   int rc, rc2;
744b7045ab2Sdrh   char *zSql;
745b7045ab2Sdrh   int iLangPrev = -9999;
746b7045ab2Sdrh   EditDist3Lang *pLang = 0;
747b7045ab2Sdrh 
748b7045ab2Sdrh   zSql = sqlite3_mprintf("SELECT iLang, cFrom, cTo, iCost"
749b7045ab2Sdrh                          " FROM \"%w\" WHERE iLang>=0 ORDER BY iLang", zTable);
750b7045ab2Sdrh   if( zSql==0 ) return SQLITE_NOMEM;
751b7045ab2Sdrh   rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
752b7045ab2Sdrh   sqlite3_free(zSql);
753b7045ab2Sdrh   if( rc ) return rc;
754b7045ab2Sdrh   editDist3ConfigClear(p);
755b7045ab2Sdrh   while( sqlite3_step(pStmt)==SQLITE_ROW ){
756b7045ab2Sdrh     int iLang = sqlite3_column_int(pStmt, 0);
757b7045ab2Sdrh     const char *zFrom = (const char*)sqlite3_column_text(pStmt, 1);
758b7045ab2Sdrh     int nFrom = zFrom ? sqlite3_column_bytes(pStmt, 1) : 0;
759b7045ab2Sdrh     const char *zTo = (const char*)sqlite3_column_text(pStmt, 2);
760b7045ab2Sdrh     int nTo = zTo ? sqlite3_column_bytes(pStmt, 2) : 0;
761b7045ab2Sdrh     int iCost = sqlite3_column_int(pStmt, 3);
762b7045ab2Sdrh 
763b7045ab2Sdrh     assert( zFrom!=0 || nFrom==0 );
764b7045ab2Sdrh     assert( zTo!=0 || nTo==0 );
765b7045ab2Sdrh     if( nFrom>100 || nTo>100 ) continue;
766b7045ab2Sdrh     if( iCost<0 ) continue;
7679f95e48dSdrh     if( iCost>=10000 ) continue;  /* Costs above 10K are considered infinite */
768b7045ab2Sdrh     if( pLang==0 || iLang!=iLangPrev ){
769b7045ab2Sdrh       EditDist3Lang *pNew;
770c4703eedSdrh       pNew = sqlite3_realloc64(p->a, (p->nLang+1)*sizeof(p->a[0]));
771b7045ab2Sdrh       if( pNew==0 ){ rc = SQLITE_NOMEM; break; }
772b7045ab2Sdrh       p->a = pNew;
773b7045ab2Sdrh       pLang = &p->a[p->nLang];
774b7045ab2Sdrh       p->nLang++;
775b7045ab2Sdrh       pLang->iLang = iLang;
776b7045ab2Sdrh       pLang->iInsCost = 100;
777b7045ab2Sdrh       pLang->iDelCost = 100;
778b7045ab2Sdrh       pLang->iSubCost = 150;
779b7045ab2Sdrh       pLang->pCost = 0;
780b7045ab2Sdrh       iLangPrev = iLang;
781b7045ab2Sdrh     }
782b7045ab2Sdrh     if( nFrom==1 && zFrom[0]=='?' && nTo==0 ){
783b7045ab2Sdrh       pLang->iDelCost = iCost;
784b7045ab2Sdrh     }else if( nFrom==0 && nTo==1 && zTo[0]=='?' ){
785b7045ab2Sdrh       pLang->iInsCost = iCost;
786b7045ab2Sdrh     }else if( nFrom==1 && nTo==1 && zFrom[0]=='?' && zTo[0]=='?' ){
787b7045ab2Sdrh       pLang->iSubCost = iCost;
788b7045ab2Sdrh     }else{
789b7045ab2Sdrh       EditDist3Cost *pCost;
790b7045ab2Sdrh       int nExtra = nFrom + nTo - 4;
791b7045ab2Sdrh       if( nExtra<0 ) nExtra = 0;
792c4703eedSdrh       pCost = sqlite3_malloc64( sizeof(*pCost) + nExtra );
793b7045ab2Sdrh       if( pCost==0 ){ rc = SQLITE_NOMEM; break; }
79477fac879Smistachkin       pCost->nFrom = (u8)nFrom;
79577fac879Smistachkin       pCost->nTo = (u8)nTo;
79677fac879Smistachkin       pCost->iCost = (u16)iCost;
797b7045ab2Sdrh       memcpy(pCost->a, zFrom, nFrom);
798b7045ab2Sdrh       memcpy(pCost->a + nFrom, zTo, nTo);
799b7045ab2Sdrh       pCost->pNext = pLang->pCost;
800b7045ab2Sdrh       pLang->pCost = pCost;
801b7045ab2Sdrh     }
802b7045ab2Sdrh   }
803b7045ab2Sdrh   rc2 = sqlite3_finalize(pStmt);
804b7045ab2Sdrh   if( rc==SQLITE_OK ) rc = rc2;
805f4bc6c43Sdrh   if( rc==SQLITE_OK ){
806f4bc6c43Sdrh     int iLang;
807f4bc6c43Sdrh     for(iLang=0; iLang<p->nLang; iLang++){
808f4bc6c43Sdrh       p->a[iLang].pCost = editDist3CostSort(p->a[iLang].pCost);
809f4bc6c43Sdrh     }
810f4bc6c43Sdrh   }
811b7045ab2Sdrh   return rc;
812b7045ab2Sdrh }
813b7045ab2Sdrh 
814b7045ab2Sdrh /*
815b7045ab2Sdrh ** Return the length (in bytes) of a utf-8 character.  Or return a maximum
816b7045ab2Sdrh ** of N.
817b7045ab2Sdrh */
utf8Len(unsigned char c,int N)818b7045ab2Sdrh static int utf8Len(unsigned char c, int N){
819b7045ab2Sdrh   int len = 1;
820b7045ab2Sdrh   if( c>0x7f ){
821b7045ab2Sdrh     if( (c&0xe0)==0xc0 ){
822b7045ab2Sdrh       len = 2;
823b7045ab2Sdrh     }else if( (c&0xf0)==0xe0 ){
824b7045ab2Sdrh       len = 3;
825b7045ab2Sdrh     }else{
826b7045ab2Sdrh       len = 4;
827b7045ab2Sdrh     }
828b7045ab2Sdrh   }
829b7045ab2Sdrh   if( len>N ) len = N;
830b7045ab2Sdrh   return len;
831b7045ab2Sdrh }
832b7045ab2Sdrh 
833b7045ab2Sdrh /*
834b7045ab2Sdrh ** Return TRUE (non-zero) if the To side of the given cost matches
835b7045ab2Sdrh ** the given string.
836b7045ab2Sdrh */
matchTo(EditDist3Cost * p,const char * z,int n)837b7045ab2Sdrh static int matchTo(EditDist3Cost *p, const char *z, int n){
83858bd0332Sdrh   assert( n>0 );
83946e835a2Sdrh   if( p->a[p->nFrom]!=z[0] ) return 0;
840b7045ab2Sdrh   if( p->nTo>n ) return 0;
841b7045ab2Sdrh   if( strncmp(p->a+p->nFrom, z, p->nTo)!=0 ) return 0;
842b7045ab2Sdrh   return 1;
843b7045ab2Sdrh }
844b7045ab2Sdrh 
845b7045ab2Sdrh /*
846b7045ab2Sdrh ** Return TRUE (non-zero) if the From side of the given cost matches
847b7045ab2Sdrh ** the given string.
848b7045ab2Sdrh */
matchFrom(EditDist3Cost * p,const char * z,int n)849b7045ab2Sdrh static int matchFrom(EditDist3Cost *p, const char *z, int n){
850b7045ab2Sdrh   assert( p->nFrom<=n );
85158bd0332Sdrh   if( p->nFrom ){
85246e835a2Sdrh     if( p->a[0]!=z[0] ) return 0;
853b7045ab2Sdrh     if( strncmp(p->a, z, p->nFrom)!=0 ) return 0;
85458bd0332Sdrh   }
855b7045ab2Sdrh   return 1;
856b7045ab2Sdrh }
857b7045ab2Sdrh 
858b7045ab2Sdrh /*
859b7045ab2Sdrh ** Return TRUE (non-zero) of the next FROM character and the next TO
860b7045ab2Sdrh ** character are the same.
861b7045ab2Sdrh */
matchFromTo(EditDist3FromString * pStr,int n1,const char * z2,int n2)862b7045ab2Sdrh static int matchFromTo(
863b7045ab2Sdrh   EditDist3FromString *pStr,  /* Left hand string */
864b7045ab2Sdrh   int n1,                     /* Index of comparison character on the left */
865b7045ab2Sdrh   const char *z2,             /* Right-handl comparison character */
866b7045ab2Sdrh   int n2                      /* Bytes remaining in z2[] */
867b7045ab2Sdrh ){
868b7045ab2Sdrh   int b1 = pStr->a[n1].nByte;
869b7045ab2Sdrh   if( b1>n2 ) return 0;
87058bd0332Sdrh   assert( b1>0 );
87146e835a2Sdrh   if( pStr->z[n1]!=z2[0] ) return 0;
872d9274a8aSdrh   if( strncmp(pStr->z+n1, z2, b1)!=0 ) return 0;
873b7045ab2Sdrh   return 1;
874b7045ab2Sdrh }
875b7045ab2Sdrh 
876b7045ab2Sdrh /*
877b7045ab2Sdrh ** Delete an EditDist3FromString objecct
878b7045ab2Sdrh */
editDist3FromStringDelete(EditDist3FromString * p)879b7045ab2Sdrh static void editDist3FromStringDelete(EditDist3FromString *p){
880b7045ab2Sdrh   int i;
881b7045ab2Sdrh   if( p ){
882b7045ab2Sdrh     for(i=0; i<p->n; i++){
883b7045ab2Sdrh       sqlite3_free(p->a[i].apDel);
884b7045ab2Sdrh       sqlite3_free(p->a[i].apSubst);
885b7045ab2Sdrh     }
886b7045ab2Sdrh     sqlite3_free(p);
887b7045ab2Sdrh   }
888b7045ab2Sdrh }
889b7045ab2Sdrh 
890b7045ab2Sdrh /*
891b7045ab2Sdrh ** Create a EditDist3FromString object.
892b7045ab2Sdrh */
editDist3FromStringNew(const EditDist3Lang * pLang,const char * z,int n)893b7045ab2Sdrh static EditDist3FromString *editDist3FromStringNew(
894b7045ab2Sdrh   const EditDist3Lang *pLang,
895b7045ab2Sdrh   const char *z,
896b7045ab2Sdrh   int n
897b7045ab2Sdrh ){
898b7045ab2Sdrh   EditDist3FromString *pStr;
899b7045ab2Sdrh   EditDist3Cost *p;
900b7045ab2Sdrh   int i;
901b7045ab2Sdrh 
902b7045ab2Sdrh   if( z==0 ) return 0;
903b7045ab2Sdrh   if( n<0 ) n = (int)strlen(z);
904c4703eedSdrh   pStr = sqlite3_malloc64( sizeof(*pStr) + sizeof(pStr->a[0])*n + n + 1 );
905b7045ab2Sdrh   if( pStr==0 ) return 0;
906b7045ab2Sdrh   pStr->a = (EditDist3From*)&pStr[1];
907b7045ab2Sdrh   memset(pStr->a, 0, sizeof(pStr->a[0])*n);
908b7045ab2Sdrh   pStr->n = n;
909b7045ab2Sdrh   pStr->z = (char*)&pStr->a[n];
910b7045ab2Sdrh   memcpy(pStr->z, z, n+1);
911b7045ab2Sdrh   if( n && z[n-1]=='*' ){
912b7045ab2Sdrh     pStr->isPrefix = 1;
913b7045ab2Sdrh     n--;
914b7045ab2Sdrh     pStr->n--;
915b7045ab2Sdrh     pStr->z[n] = 0;
916b7045ab2Sdrh   }else{
917b7045ab2Sdrh     pStr->isPrefix = 0;
918b7045ab2Sdrh   }
919b7045ab2Sdrh 
920b7045ab2Sdrh   for(i=0; i<n; i++){
921b7045ab2Sdrh     EditDist3From *pFrom = &pStr->a[i];
922b7045ab2Sdrh     memset(pFrom, 0, sizeof(*pFrom));
923b7045ab2Sdrh     pFrom->nByte = utf8Len((unsigned char)z[i], n-i);
924b7045ab2Sdrh     for(p=pLang->pCost; p; p=p->pNext){
925b7045ab2Sdrh       EditDist3Cost **apNew;
926b7045ab2Sdrh       if( i+p->nFrom>n ) continue;
927b7045ab2Sdrh       if( matchFrom(p, z+i, n-i)==0 ) continue;
928b7045ab2Sdrh       if( p->nTo==0 ){
929c4703eedSdrh         apNew = sqlite3_realloc64(pFrom->apDel,
930b7045ab2Sdrh                                 sizeof(*apNew)*(pFrom->nDel+1));
931b7045ab2Sdrh         if( apNew==0 ) break;
932b7045ab2Sdrh         pFrom->apDel = apNew;
933b7045ab2Sdrh         apNew[pFrom->nDel++] = p;
934b7045ab2Sdrh       }else{
935c4703eedSdrh         apNew = sqlite3_realloc64(pFrom->apSubst,
936b7045ab2Sdrh                                 sizeof(*apNew)*(pFrom->nSubst+1));
937b7045ab2Sdrh         if( apNew==0 ) break;
938b7045ab2Sdrh         pFrom->apSubst = apNew;
939b7045ab2Sdrh         apNew[pFrom->nSubst++] = p;
940b7045ab2Sdrh       }
941b7045ab2Sdrh     }
942b7045ab2Sdrh     if( p ){
943b7045ab2Sdrh       editDist3FromStringDelete(pStr);
944b7045ab2Sdrh       pStr = 0;
945b7045ab2Sdrh       break;
946b7045ab2Sdrh     }
947b7045ab2Sdrh   }
948b7045ab2Sdrh   return pStr;
949b7045ab2Sdrh }
950b7045ab2Sdrh 
951b7045ab2Sdrh /*
952b7045ab2Sdrh ** Update entry m[i] such that it is the minimum of its current value
953b7045ab2Sdrh ** and m[j]+iCost.
954b7045ab2Sdrh */
updateCost(unsigned int * m,int i,int j,int iCost)955b7045ab2Sdrh static void updateCost(
956b7045ab2Sdrh   unsigned int *m,
957b7045ab2Sdrh   int i,
958b7045ab2Sdrh   int j,
959b7045ab2Sdrh   int iCost
960b7045ab2Sdrh ){
961d9274a8aSdrh   unsigned int b;
962b7045ab2Sdrh   assert( iCost>=0 );
963d9274a8aSdrh   assert( iCost<10000 );
964d9274a8aSdrh   b = m[j] + iCost;
965b7045ab2Sdrh   if( b<m[i] ) m[i] = b;
966b7045ab2Sdrh }
967b7045ab2Sdrh 
9689084ec1dSdrh /*
9699084ec1dSdrh ** How much stack space (int bytes) to use for Wagner matrix in
9709084ec1dSdrh ** editDist3Core().  If more space than this is required, the entire
9719084ec1dSdrh ** matrix is taken from the heap.  To reduce the load on the memory
9729084ec1dSdrh ** allocator, make this value as large as practical for the
9739084ec1dSdrh ** architecture in use.
9749084ec1dSdrh */
9759084ec1dSdrh #ifndef SQLITE_SPELLFIX_STACKALLOC_SZ
9769084ec1dSdrh # define SQLITE_SPELLFIX_STACKALLOC_SZ  (1024)
9779084ec1dSdrh #endif
9789084ec1dSdrh 
979b7045ab2Sdrh /* Compute the edit distance between two strings.
980b7045ab2Sdrh **
981b7045ab2Sdrh ** If an error occurs, return a negative number which is the error code.
982b7045ab2Sdrh **
983b7045ab2Sdrh ** If pnMatch is not NULL, then *pnMatch is set to the number of characters
984b7045ab2Sdrh ** (not bytes) in z2 that matched the search pattern in *pFrom. If pFrom does
985b7045ab2Sdrh ** not contain the pattern for a prefix-search, then this is always the number
986b7045ab2Sdrh ** of characters in z2. If pFrom does contain a prefix search pattern, then
987b7045ab2Sdrh ** it is the number of characters in the prefix of z2 that was deemed to
988b7045ab2Sdrh ** match pFrom.
989b7045ab2Sdrh */
editDist3Core(EditDist3FromString * pFrom,const char * z2,int n2,const EditDist3Lang * pLang,int * pnMatch)990b7045ab2Sdrh static int editDist3Core(
991b7045ab2Sdrh   EditDist3FromString *pFrom,  /* The FROM string */
992b7045ab2Sdrh   const char *z2,              /* The TO string */
993b7045ab2Sdrh   int n2,                      /* Length of the TO string */
994b7045ab2Sdrh   const EditDist3Lang *pLang,  /* Edit weights for a particular language ID */
995b7045ab2Sdrh   int *pnMatch                 /* OUT: Characters in matched prefix */
996b7045ab2Sdrh ){
997b7045ab2Sdrh   int k, n;
998b7045ab2Sdrh   int i1, b1;
999b7045ab2Sdrh   int i2, b2;
1000b7045ab2Sdrh   EditDist3FromString f = *pFrom;
1001b7045ab2Sdrh   EditDist3To *a2;
1002b7045ab2Sdrh   unsigned int *m;
1003c6aab321Sdrh   unsigned int *pToFree;
1004b7045ab2Sdrh   int szRow;
1005b7045ab2Sdrh   EditDist3Cost *p;
1006b7045ab2Sdrh   int res;
1007c6aab321Sdrh   sqlite3_uint64 nByte;
10089084ec1dSdrh   unsigned int stackSpace[SQLITE_SPELLFIX_STACKALLOC_SZ/sizeof(unsigned int)];
1009b7045ab2Sdrh 
1010b7045ab2Sdrh   /* allocate the Wagner matrix and the aTo[] array for the TO string */
1011b7045ab2Sdrh   n = (f.n+1)*(n2+1);
1012b7045ab2Sdrh   n = (n+1)&~1;
1013c6aab321Sdrh   nByte = n*sizeof(m[0]) + sizeof(a2[0])*n2;
1014c6aab321Sdrh   if( nByte<=sizeof(stackSpace) ){
1015c6aab321Sdrh     m = stackSpace;
1016c6aab321Sdrh     pToFree = 0;
1017c6aab321Sdrh   }else{
1018c4703eedSdrh     m = pToFree = sqlite3_malloc64( nByte );
1019b7045ab2Sdrh     if( m==0 ) return -1;            /* Out of memory */
1020c6aab321Sdrh   }
1021b7045ab2Sdrh   a2 = (EditDist3To*)&m[n];
1022b7045ab2Sdrh   memset(a2, 0, sizeof(a2[0])*n2);
1023b7045ab2Sdrh 
1024b7045ab2Sdrh   /* Fill in the a1[] matrix for all characters of the TO string */
1025b7045ab2Sdrh   for(i2=0; i2<n2; i2++){
1026b7045ab2Sdrh     a2[i2].nByte = utf8Len((unsigned char)z2[i2], n2-i2);
1027b7045ab2Sdrh     for(p=pLang->pCost; p; p=p->pNext){
1028b7045ab2Sdrh       EditDist3Cost **apNew;
1029f4bc6c43Sdrh       if( p->nFrom>0 ) break;
1030b7045ab2Sdrh       if( i2+p->nTo>n2 ) continue;
1031f4bc6c43Sdrh       if( p->a[0]>z2[i2] ) break;
1032b7045ab2Sdrh       if( matchTo(p, z2+i2, n2-i2)==0 ) continue;
1033b7045ab2Sdrh       a2[i2].nIns++;
1034c4703eedSdrh       apNew = sqlite3_realloc64(a2[i2].apIns, sizeof(*apNew)*a2[i2].nIns);
1035b7045ab2Sdrh       if( apNew==0 ){
1036b7045ab2Sdrh         res = -1;  /* Out of memory */
1037b7045ab2Sdrh         goto editDist3Abort;
1038b7045ab2Sdrh       }
1039b7045ab2Sdrh       a2[i2].apIns = apNew;
1040b7045ab2Sdrh       a2[i2].apIns[a2[i2].nIns-1] = p;
1041b7045ab2Sdrh     }
1042b7045ab2Sdrh   }
1043b7045ab2Sdrh 
1044b7045ab2Sdrh   /* Prepare to compute the minimum edit distance */
1045b7045ab2Sdrh   szRow = f.n+1;
1046b7045ab2Sdrh   memset(m, 0x01, (n2+1)*szRow*sizeof(m[0]));
1047b7045ab2Sdrh   m[0] = 0;
1048b7045ab2Sdrh 
1049b7045ab2Sdrh   /* First fill in the top-row of the matrix with FROM deletion costs */
1050b7045ab2Sdrh   for(i1=0; i1<f.n; i1 += b1){
1051b7045ab2Sdrh     b1 = f.a[i1].nByte;
1052b7045ab2Sdrh     updateCost(m, i1+b1, i1, pLang->iDelCost);
1053b7045ab2Sdrh     for(k=0; k<f.a[i1].nDel; k++){
1054b7045ab2Sdrh       p = f.a[i1].apDel[k];
1055b7045ab2Sdrh       updateCost(m, i1+p->nFrom, i1, p->iCost);
1056b7045ab2Sdrh     }
1057b7045ab2Sdrh   }
1058b7045ab2Sdrh 
1059b7045ab2Sdrh   /* Fill in all subsequent rows, top-to-bottom, left-to-right */
1060b7045ab2Sdrh   for(i2=0; i2<n2; i2 += b2){
1061b7045ab2Sdrh     int rx;      /* Starting index for current row */
1062b7045ab2Sdrh     int rxp;     /* Starting index for previous row */
1063b7045ab2Sdrh     b2 = a2[i2].nByte;
1064b7045ab2Sdrh     rx = szRow*(i2+b2);
1065b7045ab2Sdrh     rxp = szRow*i2;
1066b7045ab2Sdrh     updateCost(m, rx, rxp, pLang->iInsCost);
1067b7045ab2Sdrh     for(k=0; k<a2[i2].nIns; k++){
1068b7045ab2Sdrh       p = a2[i2].apIns[k];
1069b7045ab2Sdrh       updateCost(m, szRow*(i2+p->nTo), rxp, p->iCost);
1070b7045ab2Sdrh     }
1071b7045ab2Sdrh     for(i1=0; i1<f.n; i1+=b1){
1072b7045ab2Sdrh       int cx;    /* Index of current cell */
1073b7045ab2Sdrh       int cxp;   /* Index of cell immediately to the left */
1074b7045ab2Sdrh       int cxd;   /* Index of cell to the left and one row above */
1075b7045ab2Sdrh       int cxu;   /* Index of cell immediately above */
1076b7045ab2Sdrh       b1 = f.a[i1].nByte;
1077b7045ab2Sdrh       cxp = rx + i1;
1078b7045ab2Sdrh       cx = cxp + b1;
1079b7045ab2Sdrh       cxd = rxp + i1;
1080b7045ab2Sdrh       cxu = cxd + b1;
1081b7045ab2Sdrh       updateCost(m, cx, cxp, pLang->iDelCost);
1082b7045ab2Sdrh       for(k=0; k<f.a[i1].nDel; k++){
1083b7045ab2Sdrh         p = f.a[i1].apDel[k];
1084b7045ab2Sdrh         updateCost(m, cxp+p->nFrom, cxp, p->iCost);
1085b7045ab2Sdrh       }
1086b7045ab2Sdrh       updateCost(m, cx, cxu, pLang->iInsCost);
1087b7045ab2Sdrh       if( matchFromTo(&f, i1, z2+i2, n2-i2) ){
1088b7045ab2Sdrh         updateCost(m, cx, cxd, 0);
1089b7045ab2Sdrh       }
1090b7045ab2Sdrh       updateCost(m, cx, cxd, pLang->iSubCost);
1091b7045ab2Sdrh       for(k=0; k<f.a[i1].nSubst; k++){
1092b7045ab2Sdrh         p = f.a[i1].apSubst[k];
1093b7045ab2Sdrh         if( matchTo(p, z2+i2, n2-i2) ){
1094b7045ab2Sdrh           updateCost(m, cxd+p->nFrom+szRow*p->nTo, cxd, p->iCost);
1095b7045ab2Sdrh         }
1096b7045ab2Sdrh       }
1097b7045ab2Sdrh     }
1098b7045ab2Sdrh   }
1099b7045ab2Sdrh 
1100b7045ab2Sdrh #if 0  /* Enable for debugging */
1101b7045ab2Sdrh   printf("         ^");
1102b7045ab2Sdrh   for(i1=0; i1<f.n; i1++) printf(" %c-%2x", f.z[i1], f.z[i1]&0xff);
1103b7045ab2Sdrh   printf("\n   ^:");
1104b7045ab2Sdrh   for(i1=0; i1<szRow; i1++){
1105b7045ab2Sdrh     int v = m[i1];
1106b7045ab2Sdrh     if( v>9999 ) printf(" ****");
1107b7045ab2Sdrh     else         printf(" %4d", v);
1108b7045ab2Sdrh   }
1109b7045ab2Sdrh   printf("\n");
1110b7045ab2Sdrh   for(i2=0; i2<n2; i2++){
1111b7045ab2Sdrh     printf("%c-%02x:", z2[i2], z2[i2]&0xff);
1112b7045ab2Sdrh     for(i1=0; i1<szRow; i1++){
1113b7045ab2Sdrh       int v = m[(i2+1)*szRow+i1];
1114b7045ab2Sdrh       if( v>9999 ) printf(" ****");
1115b7045ab2Sdrh       else         printf(" %4d", v);
1116b7045ab2Sdrh     }
1117b7045ab2Sdrh     printf("\n");
1118b7045ab2Sdrh   }
1119b7045ab2Sdrh #endif
1120b7045ab2Sdrh 
1121b7045ab2Sdrh   /* Free memory allocations and return the result */
1122b7045ab2Sdrh   res = (int)m[szRow*(n2+1)-1];
1123b7045ab2Sdrh   n = n2;
1124b7045ab2Sdrh   if( f.isPrefix ){
1125b7045ab2Sdrh     for(i2=1; i2<=n2; i2++){
1126b7045ab2Sdrh       int b = m[szRow*i2-1];
1127b7045ab2Sdrh       if( b<=res ){
1128b7045ab2Sdrh         res = b;
1129b7045ab2Sdrh         n = i2 - 1;
1130b7045ab2Sdrh       }
1131b7045ab2Sdrh     }
1132b7045ab2Sdrh   }
1133b7045ab2Sdrh   if( pnMatch ){
1134b7045ab2Sdrh     int nExtra = 0;
1135b7045ab2Sdrh     for(k=0; k<n; k++){
1136b7045ab2Sdrh       if( (z2[k] & 0xc0)==0x80 ) nExtra++;
1137b7045ab2Sdrh     }
1138b7045ab2Sdrh     *pnMatch = n - nExtra;
1139b7045ab2Sdrh   }
1140b7045ab2Sdrh 
1141b7045ab2Sdrh editDist3Abort:
1142b7045ab2Sdrh   for(i2=0; i2<n2; i2++) sqlite3_free(a2[i2].apIns);
1143c6aab321Sdrh   sqlite3_free(pToFree);
1144b7045ab2Sdrh   return res;
1145b7045ab2Sdrh }
1146b7045ab2Sdrh 
1147b7045ab2Sdrh /*
1148b7045ab2Sdrh ** Get an appropriate EditDist3Lang object.
1149b7045ab2Sdrh */
editDist3FindLang(EditDist3Config * pConfig,int iLang)1150b7045ab2Sdrh static const EditDist3Lang *editDist3FindLang(
1151b7045ab2Sdrh   EditDist3Config *pConfig,
1152b7045ab2Sdrh   int iLang
1153b7045ab2Sdrh ){
1154b7045ab2Sdrh   int i;
1155b7045ab2Sdrh   for(i=0; i<pConfig->nLang; i++){
1156b7045ab2Sdrh     if( pConfig->a[i].iLang==iLang ) return &pConfig->a[i];
1157b7045ab2Sdrh   }
1158b7045ab2Sdrh   return &editDist3Lang;
1159b7045ab2Sdrh }
1160b7045ab2Sdrh 
1161b7045ab2Sdrh /*
1162b7045ab2Sdrh ** Function:    editdist3(A,B,iLang)
1163b7045ab2Sdrh **              editdist3(tablename)
1164b7045ab2Sdrh **
1165b7045ab2Sdrh ** Return the cost of transforming string A into string B using edit
1166b7045ab2Sdrh ** weights for iLang.
1167b7045ab2Sdrh **
1168b7045ab2Sdrh ** The second form loads edit weights into memory from a table.
1169b7045ab2Sdrh */
editDist3SqlFunc(sqlite3_context * context,int argc,sqlite3_value ** argv)1170b7045ab2Sdrh static void editDist3SqlFunc(
1171b7045ab2Sdrh   sqlite3_context *context,
1172b7045ab2Sdrh   int argc,
1173b7045ab2Sdrh   sqlite3_value **argv
1174b7045ab2Sdrh ){
1175b7045ab2Sdrh   EditDist3Config *pConfig = (EditDist3Config*)sqlite3_user_data(context);
1176b7045ab2Sdrh   sqlite3 *db = sqlite3_context_db_handle(context);
1177b7045ab2Sdrh   int rc;
1178b7045ab2Sdrh   if( argc==1 ){
1179b7045ab2Sdrh     const char *zTable = (const char*)sqlite3_value_text(argv[0]);
1180b7045ab2Sdrh     rc = editDist3ConfigLoad(pConfig, db, zTable);
1181b7045ab2Sdrh     if( rc ) sqlite3_result_error_code(context, rc);
1182b7045ab2Sdrh   }else{
1183b7045ab2Sdrh     const char *zA = (const char*)sqlite3_value_text(argv[0]);
1184b7045ab2Sdrh     const char *zB = (const char*)sqlite3_value_text(argv[1]);
1185b7045ab2Sdrh     int nA = sqlite3_value_bytes(argv[0]);
1186b7045ab2Sdrh     int nB = sqlite3_value_bytes(argv[1]);
1187b7045ab2Sdrh     int iLang = argc==3 ? sqlite3_value_int(argv[2]) : 0;
1188b7045ab2Sdrh     const EditDist3Lang *pLang = editDist3FindLang(pConfig, iLang);
1189b7045ab2Sdrh     EditDist3FromString *pFrom;
1190b7045ab2Sdrh     int dist;
1191b7045ab2Sdrh 
1192b7045ab2Sdrh     pFrom = editDist3FromStringNew(pLang, zA, nA);
1193b7045ab2Sdrh     if( pFrom==0 ){
1194b7045ab2Sdrh       sqlite3_result_error_nomem(context);
1195b7045ab2Sdrh       return;
1196b7045ab2Sdrh     }
1197b7045ab2Sdrh     dist = editDist3Core(pFrom, zB, nB, pLang, 0);
1198b7045ab2Sdrh     editDist3FromStringDelete(pFrom);
1199b7045ab2Sdrh     if( dist==(-1) ){
1200b7045ab2Sdrh       sqlite3_result_error_nomem(context);
1201b7045ab2Sdrh     }else{
1202b7045ab2Sdrh       sqlite3_result_int(context, dist);
1203b7045ab2Sdrh     }
1204b7045ab2Sdrh   }
1205b7045ab2Sdrh }
1206b7045ab2Sdrh 
1207b7045ab2Sdrh /*
1208b7045ab2Sdrh ** Register the editDist3 function with SQLite
1209b7045ab2Sdrh */
editDist3Install(sqlite3 * db)1210b7045ab2Sdrh static int editDist3Install(sqlite3 *db){
1211b7045ab2Sdrh   int rc;
1212c4703eedSdrh   EditDist3Config *pConfig = sqlite3_malloc64( sizeof(*pConfig) );
1213b7045ab2Sdrh   if( pConfig==0 ) return SQLITE_NOMEM;
1214b7045ab2Sdrh   memset(pConfig, 0, sizeof(*pConfig));
1215b7045ab2Sdrh   rc = sqlite3_create_function_v2(db, "editdist3",
1216537e7028Sdrh               2, SQLITE_UTF8|SQLITE_DETERMINISTIC, pConfig,
1217537e7028Sdrh               editDist3SqlFunc, 0, 0, 0);
1218b7045ab2Sdrh   if( rc==SQLITE_OK ){
1219b7045ab2Sdrh     rc = sqlite3_create_function_v2(db, "editdist3",
1220537e7028Sdrh                 3, SQLITE_UTF8|SQLITE_DETERMINISTIC, pConfig,
1221537e7028Sdrh                 editDist3SqlFunc, 0, 0, 0);
1222b7045ab2Sdrh   }
1223b7045ab2Sdrh   if( rc==SQLITE_OK ){
1224b7045ab2Sdrh     rc = sqlite3_create_function_v2(db, "editdist3",
1225537e7028Sdrh                 1, SQLITE_UTF8|SQLITE_DETERMINISTIC, pConfig,
1226537e7028Sdrh                 editDist3SqlFunc, 0, 0, editDist3ConfigDelete);
1227b7045ab2Sdrh   }else{
1228b7045ab2Sdrh     sqlite3_free(pConfig);
1229b7045ab2Sdrh   }
1230b7045ab2Sdrh   return rc;
1231b7045ab2Sdrh }
1232b7045ab2Sdrh /* End configurable cost unicode edit distance routines
1233b7045ab2Sdrh ******************************************************************************
1234b7045ab2Sdrh ******************************************************************************
1235b7045ab2Sdrh ** Begin transliterate unicode-to-ascii implementation
1236b7045ab2Sdrh */
1237b7045ab2Sdrh 
1238b7045ab2Sdrh #if !SQLITE_AMALGAMATION
1239b7045ab2Sdrh /*
1240b7045ab2Sdrh ** This lookup table is used to help decode the first byte of
1241b7045ab2Sdrh ** a multi-byte UTF8 character.
1242b7045ab2Sdrh */
1243b7045ab2Sdrh static const unsigned char sqlite3Utf8Trans1[] = {
1244b7045ab2Sdrh   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1245b7045ab2Sdrh   0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1246b7045ab2Sdrh   0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
1247b7045ab2Sdrh   0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
1248b7045ab2Sdrh   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1249b7045ab2Sdrh   0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
1250b7045ab2Sdrh   0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
1251b7045ab2Sdrh   0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
1252b7045ab2Sdrh };
1253b7045ab2Sdrh #endif
1254b7045ab2Sdrh 
1255b7045ab2Sdrh /*
1256b7045ab2Sdrh ** Return the value of the first UTF-8 character in the string.
1257b7045ab2Sdrh */
utf8Read(const unsigned char * z,int n,int * pSize)1258b7045ab2Sdrh static int utf8Read(const unsigned char *z, int n, int *pSize){
1259b7045ab2Sdrh   int c, i;
1260b7045ab2Sdrh 
1261b7045ab2Sdrh   /* All callers to this routine (in the current implementation)
1262b7045ab2Sdrh   ** always have n>0. */
1263b7045ab2Sdrh   if( NEVER(n==0) ){
1264b7045ab2Sdrh     c = i = 0;
1265b7045ab2Sdrh   }else{
1266b7045ab2Sdrh     c = z[0];
1267b7045ab2Sdrh     i = 1;
1268b7045ab2Sdrh     if( c>=0xc0 ){
1269b7045ab2Sdrh       c = sqlite3Utf8Trans1[c-0xc0];
1270b7045ab2Sdrh       while( i<n && (z[i] & 0xc0)==0x80 ){
1271b7045ab2Sdrh         c = (c<<6) + (0x3f & z[i++]);
1272b7045ab2Sdrh       }
1273b7045ab2Sdrh     }
1274b7045ab2Sdrh   }
1275b7045ab2Sdrh   *pSize = i;
1276b7045ab2Sdrh   return c;
1277b7045ab2Sdrh }
1278b7045ab2Sdrh 
1279b7045ab2Sdrh /*
1280b7045ab2Sdrh ** Return the number of characters in the utf-8 string in the nIn byte
1281b7045ab2Sdrh ** buffer pointed to by zIn.
1282b7045ab2Sdrh */
utf8Charlen(const char * zIn,int nIn)1283b7045ab2Sdrh static int utf8Charlen(const char *zIn, int nIn){
1284b7045ab2Sdrh   int i;
1285b7045ab2Sdrh   int nChar = 0;
1286b7045ab2Sdrh   for(i=0; i<nIn; nChar++){
1287b7045ab2Sdrh     int sz;
1288b7045ab2Sdrh     utf8Read((const unsigned char *)&zIn[i], nIn-i, &sz);
1289b7045ab2Sdrh     i += sz;
1290b7045ab2Sdrh   }
1291b7045ab2Sdrh   return nChar;
1292b7045ab2Sdrh }
1293b7045ab2Sdrh 
12941a0e5b37Sdan typedef struct Transliteration Transliteration;
12951a0e5b37Sdan struct Transliteration {
12961a0e5b37Sdan  unsigned short int cFrom;
12971a0e5b37Sdan  unsigned char cTo0, cTo1, cTo2, cTo3;
12982e5e0e10Sdan #ifdef SQLITE_SPELLFIX_5BYTE_MAPPINGS
12992e5e0e10Sdan  unsigned char cTo4;
13002e5e0e10Sdan #endif
13011a0e5b37Sdan };
13021a0e5b37Sdan 
1303b7045ab2Sdrh /*
1304b7045ab2Sdrh ** Table of translations from unicode characters into ASCII.
1305b7045ab2Sdrh */
13061a0e5b37Sdan static const Transliteration translit[] = {
13071a0e5b37Sdan   { 0x00A0,  0x20, 0x00, 0x00, 0x00 },  /*   to   */
13081a0e5b37Sdan   { 0x00B5,  0x75, 0x00, 0x00, 0x00 },  /* µ to u */
13091a0e5b37Sdan   { 0x00C0,  0x41, 0x00, 0x00, 0x00 },  /* À to A */
13101a0e5b37Sdan   { 0x00C1,  0x41, 0x00, 0x00, 0x00 },  /* Á to A */
13111a0e5b37Sdan   { 0x00C2,  0x41, 0x00, 0x00, 0x00 },  /* Â to A */
13121a0e5b37Sdan   { 0x00C3,  0x41, 0x00, 0x00, 0x00 },  /* Ã to A */
13131a0e5b37Sdan   { 0x00C4,  0x41, 0x65, 0x00, 0x00 },  /* Ä to Ae */
13141a0e5b37Sdan   { 0x00C5,  0x41, 0x61, 0x00, 0x00 },  /* Å to Aa */
13151a0e5b37Sdan   { 0x00C6,  0x41, 0x45, 0x00, 0x00 },  /* Æ to AE */
13161a0e5b37Sdan   { 0x00C7,  0x43, 0x00, 0x00, 0x00 },  /* Ç to C */
13171a0e5b37Sdan   { 0x00C8,  0x45, 0x00, 0x00, 0x00 },  /* È to E */
13181a0e5b37Sdan   { 0x00C9,  0x45, 0x00, 0x00, 0x00 },  /* É to E */
13191a0e5b37Sdan   { 0x00CA,  0x45, 0x00, 0x00, 0x00 },  /* Ê to E */
13201a0e5b37Sdan   { 0x00CB,  0x45, 0x00, 0x00, 0x00 },  /* Ë to E */
13211a0e5b37Sdan   { 0x00CC,  0x49, 0x00, 0x00, 0x00 },  /* Ì to I */
13221a0e5b37Sdan   { 0x00CD,  0x49, 0x00, 0x00, 0x00 },  /* Í to I */
13231a0e5b37Sdan   { 0x00CE,  0x49, 0x00, 0x00, 0x00 },  /* Î to I */
13241a0e5b37Sdan   { 0x00CF,  0x49, 0x00, 0x00, 0x00 },  /* Ï to I */
13251a0e5b37Sdan   { 0x00D0,  0x44, 0x00, 0x00, 0x00 },  /* Ð to D */
13261a0e5b37Sdan   { 0x00D1,  0x4E, 0x00, 0x00, 0x00 },  /* Ñ to N */
13271a0e5b37Sdan   { 0x00D2,  0x4F, 0x00, 0x00, 0x00 },  /* Ò to O */
13281a0e5b37Sdan   { 0x00D3,  0x4F, 0x00, 0x00, 0x00 },  /* Ó to O */
13291a0e5b37Sdan   { 0x00D4,  0x4F, 0x00, 0x00, 0x00 },  /* Ô to O */
13301a0e5b37Sdan   { 0x00D5,  0x4F, 0x00, 0x00, 0x00 },  /* Õ to O */
13311a0e5b37Sdan   { 0x00D6,  0x4F, 0x65, 0x00, 0x00 },  /* Ö to Oe */
13321a0e5b37Sdan   { 0x00D7,  0x78, 0x00, 0x00, 0x00 },  /* × to x */
13331a0e5b37Sdan   { 0x00D8,  0x4F, 0x00, 0x00, 0x00 },  /* Ø to O */
13341a0e5b37Sdan   { 0x00D9,  0x55, 0x00, 0x00, 0x00 },  /* Ù to U */
13351a0e5b37Sdan   { 0x00DA,  0x55, 0x00, 0x00, 0x00 },  /* Ú to U */
13361a0e5b37Sdan   { 0x00DB,  0x55, 0x00, 0x00, 0x00 },  /* Û to U */
13371a0e5b37Sdan   { 0x00DC,  0x55, 0x65, 0x00, 0x00 },  /* Ü to Ue */
13381a0e5b37Sdan   { 0x00DD,  0x59, 0x00, 0x00, 0x00 },  /* Ý to Y */
13391a0e5b37Sdan   { 0x00DE,  0x54, 0x68, 0x00, 0x00 },  /* Þ to Th */
13401a0e5b37Sdan   { 0x00DF,  0x73, 0x73, 0x00, 0x00 },  /* ß to ss */
13411a0e5b37Sdan   { 0x00E0,  0x61, 0x00, 0x00, 0x00 },  /* à to a */
13421a0e5b37Sdan   { 0x00E1,  0x61, 0x00, 0x00, 0x00 },  /* á to a */
13431a0e5b37Sdan   { 0x00E2,  0x61, 0x00, 0x00, 0x00 },  /* â to a */
13441a0e5b37Sdan   { 0x00E3,  0x61, 0x00, 0x00, 0x00 },  /* ã to a */
13451a0e5b37Sdan   { 0x00E4,  0x61, 0x65, 0x00, 0x00 },  /* ä to ae */
13461a0e5b37Sdan   { 0x00E5,  0x61, 0x61, 0x00, 0x00 },  /* å to aa */
13471a0e5b37Sdan   { 0x00E6,  0x61, 0x65, 0x00, 0x00 },  /* æ to ae */
13481a0e5b37Sdan   { 0x00E7,  0x63, 0x00, 0x00, 0x00 },  /* ç to c */
13491a0e5b37Sdan   { 0x00E8,  0x65, 0x00, 0x00, 0x00 },  /* è to e */
13501a0e5b37Sdan   { 0x00E9,  0x65, 0x00, 0x00, 0x00 },  /* é to e */
13511a0e5b37Sdan   { 0x00EA,  0x65, 0x00, 0x00, 0x00 },  /* ê to e */
13521a0e5b37Sdan   { 0x00EB,  0x65, 0x00, 0x00, 0x00 },  /* ë to e */
13531a0e5b37Sdan   { 0x00EC,  0x69, 0x00, 0x00, 0x00 },  /* ì to i */
13541a0e5b37Sdan   { 0x00ED,  0x69, 0x00, 0x00, 0x00 },  /* í to i */
13551a0e5b37Sdan   { 0x00EE,  0x69, 0x00, 0x00, 0x00 },  /* î to i */
13561a0e5b37Sdan   { 0x00EF,  0x69, 0x00, 0x00, 0x00 },  /* ï to i */
13571a0e5b37Sdan   { 0x00F0,  0x64, 0x00, 0x00, 0x00 },  /* ð to d */
13581a0e5b37Sdan   { 0x00F1,  0x6E, 0x00, 0x00, 0x00 },  /* ñ to n */
13591a0e5b37Sdan   { 0x00F2,  0x6F, 0x00, 0x00, 0x00 },  /* ò to o */
13601a0e5b37Sdan   { 0x00F3,  0x6F, 0x00, 0x00, 0x00 },  /* ó to o */
13611a0e5b37Sdan   { 0x00F4,  0x6F, 0x00, 0x00, 0x00 },  /* ô to o */
13621a0e5b37Sdan   { 0x00F5,  0x6F, 0x00, 0x00, 0x00 },  /* õ to o */
13631a0e5b37Sdan   { 0x00F6,  0x6F, 0x65, 0x00, 0x00 },  /* ö to oe */
13641a0e5b37Sdan   { 0x00F7,  0x3A, 0x00, 0x00, 0x00 },  /* ÷ to : */
13651a0e5b37Sdan   { 0x00F8,  0x6F, 0x00, 0x00, 0x00 },  /* ø to o */
13661a0e5b37Sdan   { 0x00F9,  0x75, 0x00, 0x00, 0x00 },  /* ù to u */
13671a0e5b37Sdan   { 0x00FA,  0x75, 0x00, 0x00, 0x00 },  /* ú to u */
13681a0e5b37Sdan   { 0x00FB,  0x75, 0x00, 0x00, 0x00 },  /* û to u */
13691a0e5b37Sdan   { 0x00FC,  0x75, 0x65, 0x00, 0x00 },  /* ü to ue */
13701a0e5b37Sdan   { 0x00FD,  0x79, 0x00, 0x00, 0x00 },  /* ý to y */
13711a0e5b37Sdan   { 0x00FE,  0x74, 0x68, 0x00, 0x00 },  /* þ to th */
13721a0e5b37Sdan   { 0x00FF,  0x79, 0x00, 0x00, 0x00 },  /* ÿ to y */
13731a0e5b37Sdan   { 0x0100,  0x41, 0x00, 0x00, 0x00 },  /* Ā to A */
13741a0e5b37Sdan   { 0x0101,  0x61, 0x00, 0x00, 0x00 },  /* ā to a */
13751a0e5b37Sdan   { 0x0102,  0x41, 0x00, 0x00, 0x00 },  /* Ă to A */
13761a0e5b37Sdan   { 0x0103,  0x61, 0x00, 0x00, 0x00 },  /* ă to a */
13771a0e5b37Sdan   { 0x0104,  0x41, 0x00, 0x00, 0x00 },  /* Ą to A */
13781a0e5b37Sdan   { 0x0105,  0x61, 0x00, 0x00, 0x00 },  /* ą to a */
13791a0e5b37Sdan   { 0x0106,  0x43, 0x00, 0x00, 0x00 },  /* Ć to C */
13801a0e5b37Sdan   { 0x0107,  0x63, 0x00, 0x00, 0x00 },  /* ć to c */
13811a0e5b37Sdan   { 0x0108,  0x43, 0x68, 0x00, 0x00 },  /* Ĉ to Ch */
13821a0e5b37Sdan   { 0x0109,  0x63, 0x68, 0x00, 0x00 },  /* ĉ to ch */
13831a0e5b37Sdan   { 0x010A,  0x43, 0x00, 0x00, 0x00 },  /* Ċ to C */
13841a0e5b37Sdan   { 0x010B,  0x63, 0x00, 0x00, 0x00 },  /* ċ to c */
13851a0e5b37Sdan   { 0x010C,  0x43, 0x00, 0x00, 0x00 },  /* Č to C */
13861a0e5b37Sdan   { 0x010D,  0x63, 0x00, 0x00, 0x00 },  /* č to c */
13871a0e5b37Sdan   { 0x010E,  0x44, 0x00, 0x00, 0x00 },  /* Ď to D */
13881a0e5b37Sdan   { 0x010F,  0x64, 0x00, 0x00, 0x00 },  /* ď to d */
13891a0e5b37Sdan   { 0x0110,  0x44, 0x00, 0x00, 0x00 },  /* Đ to D */
13901a0e5b37Sdan   { 0x0111,  0x64, 0x00, 0x00, 0x00 },  /* đ to d */
13911a0e5b37Sdan   { 0x0112,  0x45, 0x00, 0x00, 0x00 },  /* Ē to E */
13921a0e5b37Sdan   { 0x0113,  0x65, 0x00, 0x00, 0x00 },  /* ē to e */
13931a0e5b37Sdan   { 0x0114,  0x45, 0x00, 0x00, 0x00 },  /* Ĕ to E */
13941a0e5b37Sdan   { 0x0115,  0x65, 0x00, 0x00, 0x00 },  /* ĕ to e */
13951a0e5b37Sdan   { 0x0116,  0x45, 0x00, 0x00, 0x00 },  /* Ė to E */
13961a0e5b37Sdan   { 0x0117,  0x65, 0x00, 0x00, 0x00 },  /* ė to e */
13971a0e5b37Sdan   { 0x0118,  0x45, 0x00, 0x00, 0x00 },  /* Ę to E */
13981a0e5b37Sdan   { 0x0119,  0x65, 0x00, 0x00, 0x00 },  /* ę to e */
13991a0e5b37Sdan   { 0x011A,  0x45, 0x00, 0x00, 0x00 },  /* Ě to E */
14001a0e5b37Sdan   { 0x011B,  0x65, 0x00, 0x00, 0x00 },  /* ě to e */
14011a0e5b37Sdan   { 0x011C,  0x47, 0x68, 0x00, 0x00 },  /* Ĝ to Gh */
14021a0e5b37Sdan   { 0x011D,  0x67, 0x68, 0x00, 0x00 },  /* ĝ to gh */
14031a0e5b37Sdan   { 0x011E,  0x47, 0x00, 0x00, 0x00 },  /* Ğ to G */
14041a0e5b37Sdan   { 0x011F,  0x67, 0x00, 0x00, 0x00 },  /* ğ to g */
14051a0e5b37Sdan   { 0x0120,  0x47, 0x00, 0x00, 0x00 },  /* Ġ to G */
14061a0e5b37Sdan   { 0x0121,  0x67, 0x00, 0x00, 0x00 },  /* ġ to g */
14071a0e5b37Sdan   { 0x0122,  0x47, 0x00, 0x00, 0x00 },  /* Ģ to G */
14081a0e5b37Sdan   { 0x0123,  0x67, 0x00, 0x00, 0x00 },  /* ģ to g */
14091a0e5b37Sdan   { 0x0124,  0x48, 0x68, 0x00, 0x00 },  /* Ĥ to Hh */
14101a0e5b37Sdan   { 0x0125,  0x68, 0x68, 0x00, 0x00 },  /* ĥ to hh */
14111a0e5b37Sdan   { 0x0126,  0x48, 0x00, 0x00, 0x00 },  /* Ħ to H */
14121a0e5b37Sdan   { 0x0127,  0x68, 0x00, 0x00, 0x00 },  /* ħ to h */
14131a0e5b37Sdan   { 0x0128,  0x49, 0x00, 0x00, 0x00 },  /* Ĩ to I */
14141a0e5b37Sdan   { 0x0129,  0x69, 0x00, 0x00, 0x00 },  /* ĩ to i */
14151a0e5b37Sdan   { 0x012A,  0x49, 0x00, 0x00, 0x00 },  /* Ī to I */
14161a0e5b37Sdan   { 0x012B,  0x69, 0x00, 0x00, 0x00 },  /* ī to i */
14171a0e5b37Sdan   { 0x012C,  0x49, 0x00, 0x00, 0x00 },  /* Ĭ to I */
14181a0e5b37Sdan   { 0x012D,  0x69, 0x00, 0x00, 0x00 },  /* ĭ to i */
14191a0e5b37Sdan   { 0x012E,  0x49, 0x00, 0x00, 0x00 },  /* Į to I */
14201a0e5b37Sdan   { 0x012F,  0x69, 0x00, 0x00, 0x00 },  /* į to i */
14211a0e5b37Sdan   { 0x0130,  0x49, 0x00, 0x00, 0x00 },  /* İ to I */
14221a0e5b37Sdan   { 0x0131,  0x69, 0x00, 0x00, 0x00 },  /* ı to i */
14231a0e5b37Sdan   { 0x0132,  0x49, 0x4A, 0x00, 0x00 },  /* IJ to IJ */
14241a0e5b37Sdan   { 0x0133,  0x69, 0x6A, 0x00, 0x00 },  /* ij to ij */
14251a0e5b37Sdan   { 0x0134,  0x4A, 0x68, 0x00, 0x00 },  /* Ĵ to Jh */
14261a0e5b37Sdan   { 0x0135,  0x6A, 0x68, 0x00, 0x00 },  /* ĵ to jh */
14271a0e5b37Sdan   { 0x0136,  0x4B, 0x00, 0x00, 0x00 },  /* Ķ to K */
14281a0e5b37Sdan   { 0x0137,  0x6B, 0x00, 0x00, 0x00 },  /* ķ to k */
14291a0e5b37Sdan   { 0x0138,  0x6B, 0x00, 0x00, 0x00 },  /* ĸ to k */
14301a0e5b37Sdan   { 0x0139,  0x4C, 0x00, 0x00, 0x00 },  /* Ĺ to L */
14311a0e5b37Sdan   { 0x013A,  0x6C, 0x00, 0x00, 0x00 },  /* ĺ to l */
14321a0e5b37Sdan   { 0x013B,  0x4C, 0x00, 0x00, 0x00 },  /* Ļ to L */
14331a0e5b37Sdan   { 0x013C,  0x6C, 0x00, 0x00, 0x00 },  /* ļ to l */
14341a0e5b37Sdan   { 0x013D,  0x4C, 0x00, 0x00, 0x00 },  /* Ľ to L */
14351a0e5b37Sdan   { 0x013E,  0x6C, 0x00, 0x00, 0x00 },  /* ľ to l */
14361a0e5b37Sdan   { 0x013F,  0x4C, 0x2E, 0x00, 0x00 },  /* Ŀ to L. */
14371a0e5b37Sdan   { 0x0140,  0x6C, 0x2E, 0x00, 0x00 },  /* ŀ to l. */
14381a0e5b37Sdan   { 0x0141,  0x4C, 0x00, 0x00, 0x00 },  /* Ł to L */
14391a0e5b37Sdan   { 0x0142,  0x6C, 0x00, 0x00, 0x00 },  /* ł to l */
14401a0e5b37Sdan   { 0x0143,  0x4E, 0x00, 0x00, 0x00 },  /* Ń to N */
14411a0e5b37Sdan   { 0x0144,  0x6E, 0x00, 0x00, 0x00 },  /* ń to n */
14421a0e5b37Sdan   { 0x0145,  0x4E, 0x00, 0x00, 0x00 },  /* Ņ to N */
14431a0e5b37Sdan   { 0x0146,  0x6E, 0x00, 0x00, 0x00 },  /* ņ to n */
14441a0e5b37Sdan   { 0x0147,  0x4E, 0x00, 0x00, 0x00 },  /* Ň to N */
14451a0e5b37Sdan   { 0x0148,  0x6E, 0x00, 0x00, 0x00 },  /* ň to n */
14461a0e5b37Sdan   { 0x0149,  0x27, 0x6E, 0x00, 0x00 },  /* ʼn to 'n */
14471a0e5b37Sdan   { 0x014A,  0x4E, 0x47, 0x00, 0x00 },  /* Ŋ to NG */
14481a0e5b37Sdan   { 0x014B,  0x6E, 0x67, 0x00, 0x00 },  /* ŋ to ng */
14491a0e5b37Sdan   { 0x014C,  0x4F, 0x00, 0x00, 0x00 },  /* Ō to O */
14501a0e5b37Sdan   { 0x014D,  0x6F, 0x00, 0x00, 0x00 },  /* ō to o */
14511a0e5b37Sdan   { 0x014E,  0x4F, 0x00, 0x00, 0x00 },  /* Ŏ to O */
14521a0e5b37Sdan   { 0x014F,  0x6F, 0x00, 0x00, 0x00 },  /* ŏ to o */
14531a0e5b37Sdan   { 0x0150,  0x4F, 0x00, 0x00, 0x00 },  /* Ő to O */
14541a0e5b37Sdan   { 0x0151,  0x6F, 0x00, 0x00, 0x00 },  /* ő to o */
14551a0e5b37Sdan   { 0x0152,  0x4F, 0x45, 0x00, 0x00 },  /* Œ to OE */
14561a0e5b37Sdan   { 0x0153,  0x6F, 0x65, 0x00, 0x00 },  /* œ to oe */
14571a0e5b37Sdan   { 0x0154,  0x52, 0x00, 0x00, 0x00 },  /* Ŕ to R */
14581a0e5b37Sdan   { 0x0155,  0x72, 0x00, 0x00, 0x00 },  /* ŕ to r */
14591a0e5b37Sdan   { 0x0156,  0x52, 0x00, 0x00, 0x00 },  /* Ŗ to R */
14601a0e5b37Sdan   { 0x0157,  0x72, 0x00, 0x00, 0x00 },  /* ŗ to r */
14611a0e5b37Sdan   { 0x0158,  0x52, 0x00, 0x00, 0x00 },  /* Ř to R */
14621a0e5b37Sdan   { 0x0159,  0x72, 0x00, 0x00, 0x00 },  /* ř to r */
14631a0e5b37Sdan   { 0x015A,  0x53, 0x00, 0x00, 0x00 },  /* Ś to S */
14641a0e5b37Sdan   { 0x015B,  0x73, 0x00, 0x00, 0x00 },  /* ś to s */
14651a0e5b37Sdan   { 0x015C,  0x53, 0x68, 0x00, 0x00 },  /* Ŝ to Sh */
14661a0e5b37Sdan   { 0x015D,  0x73, 0x68, 0x00, 0x00 },  /* ŝ to sh */
14671a0e5b37Sdan   { 0x015E,  0x53, 0x00, 0x00, 0x00 },  /* Ş to S */
14681a0e5b37Sdan   { 0x015F,  0x73, 0x00, 0x00, 0x00 },  /* ş to s */
14691a0e5b37Sdan   { 0x0160,  0x53, 0x00, 0x00, 0x00 },  /* Š to S */
14701a0e5b37Sdan   { 0x0161,  0x73, 0x00, 0x00, 0x00 },  /* š to s */
14711a0e5b37Sdan   { 0x0162,  0x54, 0x00, 0x00, 0x00 },  /* Ţ to T */
14721a0e5b37Sdan   { 0x0163,  0x74, 0x00, 0x00, 0x00 },  /* ţ to t */
14731a0e5b37Sdan   { 0x0164,  0x54, 0x00, 0x00, 0x00 },  /* Ť to T */
14741a0e5b37Sdan   { 0x0165,  0x74, 0x00, 0x00, 0x00 },  /* ť to t */
14751a0e5b37Sdan   { 0x0166,  0x54, 0x00, 0x00, 0x00 },  /* Ŧ to T */
14761a0e5b37Sdan   { 0x0167,  0x74, 0x00, 0x00, 0x00 },  /* ŧ to t */
14771a0e5b37Sdan   { 0x0168,  0x55, 0x00, 0x00, 0x00 },  /* Ũ to U */
14781a0e5b37Sdan   { 0x0169,  0x75, 0x00, 0x00, 0x00 },  /* ũ to u */
14791a0e5b37Sdan   { 0x016A,  0x55, 0x00, 0x00, 0x00 },  /* Ū to U */
14801a0e5b37Sdan   { 0x016B,  0x75, 0x00, 0x00, 0x00 },  /* ū to u */
14811a0e5b37Sdan   { 0x016C,  0x55, 0x00, 0x00, 0x00 },  /* Ŭ to U */
14821a0e5b37Sdan   { 0x016D,  0x75, 0x00, 0x00, 0x00 },  /* ŭ to u */
14831a0e5b37Sdan   { 0x016E,  0x55, 0x00, 0x00, 0x00 },  /* Ů to U */
14841a0e5b37Sdan   { 0x016F,  0x75, 0x00, 0x00, 0x00 },  /* ů to u */
14851a0e5b37Sdan   { 0x0170,  0x55, 0x00, 0x00, 0x00 },  /* Ű to U */
14861a0e5b37Sdan   { 0x0171,  0x75, 0x00, 0x00, 0x00 },  /* ű to u */
14871a0e5b37Sdan   { 0x0172,  0x55, 0x00, 0x00, 0x00 },  /* Ų to U */
14881a0e5b37Sdan   { 0x0173,  0x75, 0x00, 0x00, 0x00 },  /* ų to u */
14891a0e5b37Sdan   { 0x0174,  0x57, 0x00, 0x00, 0x00 },  /* Ŵ to W */
14901a0e5b37Sdan   { 0x0175,  0x77, 0x00, 0x00, 0x00 },  /* ŵ to w */
14911a0e5b37Sdan   { 0x0176,  0x59, 0x00, 0x00, 0x00 },  /* Ŷ to Y */
14921a0e5b37Sdan   { 0x0177,  0x79, 0x00, 0x00, 0x00 },  /* ŷ to y */
14931a0e5b37Sdan   { 0x0178,  0x59, 0x00, 0x00, 0x00 },  /* Ÿ to Y */
14941a0e5b37Sdan   { 0x0179,  0x5A, 0x00, 0x00, 0x00 },  /* Ź to Z */
14951a0e5b37Sdan   { 0x017A,  0x7A, 0x00, 0x00, 0x00 },  /* ź to z */
14961a0e5b37Sdan   { 0x017B,  0x5A, 0x00, 0x00, 0x00 },  /* Ż to Z */
14971a0e5b37Sdan   { 0x017C,  0x7A, 0x00, 0x00, 0x00 },  /* ż to z */
14981a0e5b37Sdan   { 0x017D,  0x5A, 0x00, 0x00, 0x00 },  /* Ž to Z */
14991a0e5b37Sdan   { 0x017E,  0x7A, 0x00, 0x00, 0x00 },  /* ž to z */
15001a0e5b37Sdan   { 0x017F,  0x73, 0x00, 0x00, 0x00 },  /* ſ to s */
15011a0e5b37Sdan   { 0x0192,  0x66, 0x00, 0x00, 0x00 },  /* ƒ to f */
15021a0e5b37Sdan   { 0x0218,  0x53, 0x00, 0x00, 0x00 },  /* Ș to S */
15031a0e5b37Sdan   { 0x0219,  0x73, 0x00, 0x00, 0x00 },  /* ș to s */
15041a0e5b37Sdan   { 0x021A,  0x54, 0x00, 0x00, 0x00 },  /* Ț to T */
15051a0e5b37Sdan   { 0x021B,  0x74, 0x00, 0x00, 0x00 },  /* ț to t */
15061a0e5b37Sdan   { 0x0386,  0x41, 0x00, 0x00, 0x00 },  /* Ά to A */
15071a0e5b37Sdan   { 0x0388,  0x45, 0x00, 0x00, 0x00 },  /* Έ to E */
15081a0e5b37Sdan   { 0x0389,  0x49, 0x00, 0x00, 0x00 },  /* Ή to I */
15091a0e5b37Sdan   { 0x038A,  0x49, 0x00, 0x00, 0x00 },  /* Ί to I */
15101a0e5b37Sdan   { 0x038C,  0x4f, 0x00, 0x00, 0x00 },  /* Ό to O */
15111a0e5b37Sdan   { 0x038E,  0x59, 0x00, 0x00, 0x00 },  /* Ύ to Y */
15121a0e5b37Sdan   { 0x038F,  0x4f, 0x00, 0x00, 0x00 },  /* Ώ to O */
15131a0e5b37Sdan   { 0x0390,  0x69, 0x00, 0x00, 0x00 },  /* ΐ to i */
15141a0e5b37Sdan   { 0x0391,  0x41, 0x00, 0x00, 0x00 },  /* Α to A */
15151a0e5b37Sdan   { 0x0392,  0x42, 0x00, 0x00, 0x00 },  /* Β to B */
15161a0e5b37Sdan   { 0x0393,  0x47, 0x00, 0x00, 0x00 },  /* Γ to G */
15171a0e5b37Sdan   { 0x0394,  0x44, 0x00, 0x00, 0x00 },  /* Δ to D */
15181a0e5b37Sdan   { 0x0395,  0x45, 0x00, 0x00, 0x00 },  /* Ε to E */
15191a0e5b37Sdan   { 0x0396,  0x5a, 0x00, 0x00, 0x00 },  /* Ζ to Z */
15201a0e5b37Sdan   { 0x0397,  0x49, 0x00, 0x00, 0x00 },  /* Η to I */
15211a0e5b37Sdan   { 0x0398,  0x54, 0x68, 0x00, 0x00 },  /* Θ to Th */
15221a0e5b37Sdan   { 0x0399,  0x49, 0x00, 0x00, 0x00 },  /* Ι to I */
15231a0e5b37Sdan   { 0x039A,  0x4b, 0x00, 0x00, 0x00 },  /* Κ to K */
15241a0e5b37Sdan   { 0x039B,  0x4c, 0x00, 0x00, 0x00 },  /* Λ to L */
15251a0e5b37Sdan   { 0x039C,  0x4d, 0x00, 0x00, 0x00 },  /* Μ to M */
15261a0e5b37Sdan   { 0x039D,  0x4e, 0x00, 0x00, 0x00 },  /* Ν to N */
15271a0e5b37Sdan   { 0x039E,  0x58, 0x00, 0x00, 0x00 },  /* Ξ to X */
15281a0e5b37Sdan   { 0x039F,  0x4f, 0x00, 0x00, 0x00 },  /* Ο to O */
15291a0e5b37Sdan   { 0x03A0,  0x50, 0x00, 0x00, 0x00 },  /* Π to P */
15301a0e5b37Sdan   { 0x03A1,  0x52, 0x00, 0x00, 0x00 },  /* Ρ to R */
15311a0e5b37Sdan   { 0x03A3,  0x53, 0x00, 0x00, 0x00 },  /* Σ to S */
15321a0e5b37Sdan   { 0x03A4,  0x54, 0x00, 0x00, 0x00 },  /* Τ to T */
15331a0e5b37Sdan   { 0x03A5,  0x59, 0x00, 0x00, 0x00 },  /* Υ to Y */
15341a0e5b37Sdan   { 0x03A6,  0x46, 0x00, 0x00, 0x00 },  /* Φ to F */
15351a0e5b37Sdan   { 0x03A7,  0x43, 0x68, 0x00, 0x00 },  /* Χ to Ch */
15361a0e5b37Sdan   { 0x03A8,  0x50, 0x73, 0x00, 0x00 },  /* Ψ to Ps */
15371a0e5b37Sdan   { 0x03A9,  0x4f, 0x00, 0x00, 0x00 },  /* Ω to O */
15381a0e5b37Sdan   { 0x03AA,  0x49, 0x00, 0x00, 0x00 },  /* Ϊ to I */
15391a0e5b37Sdan   { 0x03AB,  0x59, 0x00, 0x00, 0x00 },  /* Ϋ to Y */
15401a0e5b37Sdan   { 0x03AC,  0x61, 0x00, 0x00, 0x00 },  /* ά to a */
15411a0e5b37Sdan   { 0x03AD,  0x65, 0x00, 0x00, 0x00 },  /* έ to e */
15421a0e5b37Sdan   { 0x03AE,  0x69, 0x00, 0x00, 0x00 },  /* ή to i */
15431a0e5b37Sdan   { 0x03AF,  0x69, 0x00, 0x00, 0x00 },  /* ί to i */
15441a0e5b37Sdan   { 0x03B1,  0x61, 0x00, 0x00, 0x00 },  /* α to a */
15451a0e5b37Sdan   { 0x03B2,  0x62, 0x00, 0x00, 0x00 },  /* β to b */
15461a0e5b37Sdan   { 0x03B3,  0x67, 0x00, 0x00, 0x00 },  /* γ to g */
15471a0e5b37Sdan   { 0x03B4,  0x64, 0x00, 0x00, 0x00 },  /* δ to d */
15481a0e5b37Sdan   { 0x03B5,  0x65, 0x00, 0x00, 0x00 },  /* ε to e */
15491a0e5b37Sdan   { 0x03B6,  0x7a, 0x00, 0x00, 0x00 },  /* ζ to z */
15501a0e5b37Sdan   { 0x03B7,  0x69, 0x00, 0x00, 0x00 },  /* η to i */
15511a0e5b37Sdan   { 0x03B8,  0x74, 0x68, 0x00, 0x00 },  /* θ to th */
15521a0e5b37Sdan   { 0x03B9,  0x69, 0x00, 0x00, 0x00 },  /* ι to i */
15531a0e5b37Sdan   { 0x03BA,  0x6b, 0x00, 0x00, 0x00 },  /* κ to k */
15541a0e5b37Sdan   { 0x03BB,  0x6c, 0x00, 0x00, 0x00 },  /* λ to l */
15551a0e5b37Sdan   { 0x03BC,  0x6d, 0x00, 0x00, 0x00 },  /* μ to m */
15561a0e5b37Sdan   { 0x03BD,  0x6e, 0x00, 0x00, 0x00 },  /* ν to n */
15571a0e5b37Sdan   { 0x03BE,  0x78, 0x00, 0x00, 0x00 },  /* ξ to x */
15581a0e5b37Sdan   { 0x03BF,  0x6f, 0x00, 0x00, 0x00 },  /* ο to o */
15591a0e5b37Sdan   { 0x03C0,  0x70, 0x00, 0x00, 0x00 },  /* π to p */
15601a0e5b37Sdan   { 0x03C1,  0x72, 0x00, 0x00, 0x00 },  /* ρ to r */
15611a0e5b37Sdan   { 0x03C3,  0x73, 0x00, 0x00, 0x00 },  /* σ to s */
15621a0e5b37Sdan   { 0x03C4,  0x74, 0x00, 0x00, 0x00 },  /* τ to t */
15631a0e5b37Sdan   { 0x03C5,  0x79, 0x00, 0x00, 0x00 },  /* υ to y */
15641a0e5b37Sdan   { 0x03C6,  0x66, 0x00, 0x00, 0x00 },  /* φ to f */
15651a0e5b37Sdan   { 0x03C7,  0x63, 0x68, 0x00, 0x00 },  /* χ to ch */
15661a0e5b37Sdan   { 0x03C8,  0x70, 0x73, 0x00, 0x00 },  /* ψ to ps */
15671a0e5b37Sdan   { 0x03C9,  0x6f, 0x00, 0x00, 0x00 },  /* ω to o */
15681a0e5b37Sdan   { 0x03CA,  0x69, 0x00, 0x00, 0x00 },  /* ϊ to i */
15691a0e5b37Sdan   { 0x03CB,  0x79, 0x00, 0x00, 0x00 },  /* ϋ to y */
15701a0e5b37Sdan   { 0x03CC,  0x6f, 0x00, 0x00, 0x00 },  /* ό to o */
15711a0e5b37Sdan   { 0x03CD,  0x79, 0x00, 0x00, 0x00 },  /* ύ to y */
15721a0e5b37Sdan   { 0x03CE,  0x69, 0x00, 0x00, 0x00 },  /* ώ to i */
15731a0e5b37Sdan   { 0x0400,  0x45, 0x00, 0x00, 0x00 },  /* Ѐ to E */
15741a0e5b37Sdan   { 0x0401,  0x45, 0x00, 0x00, 0x00 },  /* Ё to E */
15751a0e5b37Sdan   { 0x0402,  0x44, 0x00, 0x00, 0x00 },  /* Ђ to D */
15761a0e5b37Sdan   { 0x0403,  0x47, 0x00, 0x00, 0x00 },  /* Ѓ to G */
15771a0e5b37Sdan   { 0x0404,  0x45, 0x00, 0x00, 0x00 },  /* Є to E */
15781a0e5b37Sdan   { 0x0405,  0x5a, 0x00, 0x00, 0x00 },  /* Ѕ to Z */
15791a0e5b37Sdan   { 0x0406,  0x49, 0x00, 0x00, 0x00 },  /* І to I */
15801a0e5b37Sdan   { 0x0407,  0x49, 0x00, 0x00, 0x00 },  /* Ї to I */
15811a0e5b37Sdan   { 0x0408,  0x4a, 0x00, 0x00, 0x00 },  /* Ј to J */
15821a0e5b37Sdan   { 0x0409,  0x49, 0x00, 0x00, 0x00 },  /* Љ to I */
15831a0e5b37Sdan   { 0x040A,  0x4e, 0x00, 0x00, 0x00 },  /* Њ to N */
15841a0e5b37Sdan   { 0x040B,  0x44, 0x00, 0x00, 0x00 },  /* Ћ to D */
15851a0e5b37Sdan   { 0x040C,  0x4b, 0x00, 0x00, 0x00 },  /* Ќ to K */
15861a0e5b37Sdan   { 0x040D,  0x49, 0x00, 0x00, 0x00 },  /* Ѝ to I */
15871a0e5b37Sdan   { 0x040E,  0x55, 0x00, 0x00, 0x00 },  /* Ў to U */
15881a0e5b37Sdan   { 0x040F,  0x44, 0x00, 0x00, 0x00 },  /* Џ to D */
15891a0e5b37Sdan   { 0x0410,  0x41, 0x00, 0x00, 0x00 },  /* А to A */
15901a0e5b37Sdan   { 0x0411,  0x42, 0x00, 0x00, 0x00 },  /* Б to B */
15911a0e5b37Sdan   { 0x0412,  0x56, 0x00, 0x00, 0x00 },  /* В to V */
15921a0e5b37Sdan   { 0x0413,  0x47, 0x00, 0x00, 0x00 },  /* Г to G */
15931a0e5b37Sdan   { 0x0414,  0x44, 0x00, 0x00, 0x00 },  /* Д to D */
15941a0e5b37Sdan   { 0x0415,  0x45, 0x00, 0x00, 0x00 },  /* Е to E */
15951a0e5b37Sdan   { 0x0416,  0x5a, 0x68, 0x00, 0x00 },  /* Ж to Zh */
15961a0e5b37Sdan   { 0x0417,  0x5a, 0x00, 0x00, 0x00 },  /* З to Z */
15971a0e5b37Sdan   { 0x0418,  0x49, 0x00, 0x00, 0x00 },  /* И to I */
15981a0e5b37Sdan   { 0x0419,  0x49, 0x00, 0x00, 0x00 },  /* Й to I */
15991a0e5b37Sdan   { 0x041A,  0x4b, 0x00, 0x00, 0x00 },  /* К to K */
16001a0e5b37Sdan   { 0x041B,  0x4c, 0x00, 0x00, 0x00 },  /* Л to L */
16011a0e5b37Sdan   { 0x041C,  0x4d, 0x00, 0x00, 0x00 },  /* М to M */
16021a0e5b37Sdan   { 0x041D,  0x4e, 0x00, 0x00, 0x00 },  /* Н to N */
16031a0e5b37Sdan   { 0x041E,  0x4f, 0x00, 0x00, 0x00 },  /* О to O */
16041a0e5b37Sdan   { 0x041F,  0x50, 0x00, 0x00, 0x00 },  /* П to P */
16051a0e5b37Sdan   { 0x0420,  0x52, 0x00, 0x00, 0x00 },  /* Р to R */
16061a0e5b37Sdan   { 0x0421,  0x53, 0x00, 0x00, 0x00 },  /* С to S */
16071a0e5b37Sdan   { 0x0422,  0x54, 0x00, 0x00, 0x00 },  /* Т to T */
16081a0e5b37Sdan   { 0x0423,  0x55, 0x00, 0x00, 0x00 },  /* У to U */
16091a0e5b37Sdan   { 0x0424,  0x46, 0x00, 0x00, 0x00 },  /* Ф to F */
16101a0e5b37Sdan   { 0x0425,  0x4b, 0x68, 0x00, 0x00 },  /* Х to Kh */
16111a0e5b37Sdan   { 0x0426,  0x54, 0x63, 0x00, 0x00 },  /* Ц to Tc */
16121a0e5b37Sdan   { 0x0427,  0x43, 0x68, 0x00, 0x00 },  /* Ч to Ch */
16131a0e5b37Sdan   { 0x0428,  0x53, 0x68, 0x00, 0x00 },  /* Ш to Sh */
16141a0e5b37Sdan   { 0x0429,  0x53, 0x68, 0x63, 0x68 },  /* Щ to Shch */
16151a0e5b37Sdan   { 0x042A,  0x61, 0x00, 0x00, 0x00 },  /*  to A */
16161a0e5b37Sdan   { 0x042B,  0x59, 0x00, 0x00, 0x00 },  /* Ы to Y */
16171a0e5b37Sdan   { 0x042C,  0x59, 0x00, 0x00, 0x00 },  /*  to Y */
16181a0e5b37Sdan   { 0x042D,  0x45, 0x00, 0x00, 0x00 },  /* Э to E */
16191a0e5b37Sdan   { 0x042E,  0x49, 0x75, 0x00, 0x00 },  /* Ю to Iu */
16201a0e5b37Sdan   { 0x042F,  0x49, 0x61, 0x00, 0x00 },  /* Я to Ia */
16211a0e5b37Sdan   { 0x0430,  0x61, 0x00, 0x00, 0x00 },  /* а to a */
16221a0e5b37Sdan   { 0x0431,  0x62, 0x00, 0x00, 0x00 },  /* б to b */
16231a0e5b37Sdan   { 0x0432,  0x76, 0x00, 0x00, 0x00 },  /* в to v */
16241a0e5b37Sdan   { 0x0433,  0x67, 0x00, 0x00, 0x00 },  /* г to g */
16251a0e5b37Sdan   { 0x0434,  0x64, 0x00, 0x00, 0x00 },  /* д to d */
16261a0e5b37Sdan   { 0x0435,  0x65, 0x00, 0x00, 0x00 },  /* е to e */
16271a0e5b37Sdan   { 0x0436,  0x7a, 0x68, 0x00, 0x00 },  /* ж to zh */
16281a0e5b37Sdan   { 0x0437,  0x7a, 0x00, 0x00, 0x00 },  /* з to z */
16291a0e5b37Sdan   { 0x0438,  0x69, 0x00, 0x00, 0x00 },  /* и to i */
16301a0e5b37Sdan   { 0x0439,  0x69, 0x00, 0x00, 0x00 },  /* й to i */
16311a0e5b37Sdan   { 0x043A,  0x6b, 0x00, 0x00, 0x00 },  /* к to k */
16321a0e5b37Sdan   { 0x043B,  0x6c, 0x00, 0x00, 0x00 },  /* л to l */
16331a0e5b37Sdan   { 0x043C,  0x6d, 0x00, 0x00, 0x00 },  /* м to m */
16341a0e5b37Sdan   { 0x043D,  0x6e, 0x00, 0x00, 0x00 },  /* н to n */
16351a0e5b37Sdan   { 0x043E,  0x6f, 0x00, 0x00, 0x00 },  /* о to o */
16361a0e5b37Sdan   { 0x043F,  0x70, 0x00, 0x00, 0x00 },  /* п to p */
16371a0e5b37Sdan   { 0x0440,  0x72, 0x00, 0x00, 0x00 },  /* р to r */
16381a0e5b37Sdan   { 0x0441,  0x73, 0x00, 0x00, 0x00 },  /* с to s */
16391a0e5b37Sdan   { 0x0442,  0x74, 0x00, 0x00, 0x00 },  /* т to t */
16401a0e5b37Sdan   { 0x0443,  0x75, 0x00, 0x00, 0x00 },  /* у to u */
16411a0e5b37Sdan   { 0x0444,  0x66, 0x00, 0x00, 0x00 },  /* ф to f */
16421a0e5b37Sdan   { 0x0445,  0x6b, 0x68, 0x00, 0x00 },  /* х to kh */
16431a0e5b37Sdan   { 0x0446,  0x74, 0x63, 0x00, 0x00 },  /* ц to tc */
16441a0e5b37Sdan   { 0x0447,  0x63, 0x68, 0x00, 0x00 },  /* ч to ch */
16451a0e5b37Sdan   { 0x0448,  0x73, 0x68, 0x00, 0x00 },  /* ш to sh */
16461a0e5b37Sdan   { 0x0449,  0x73, 0x68, 0x63, 0x68 },  /* щ to shch */
16471a0e5b37Sdan   { 0x044A,  0x61, 0x00, 0x00, 0x00 },  /*  to a */
16481a0e5b37Sdan   { 0x044B,  0x79, 0x00, 0x00, 0x00 },  /* ы to y */
16491a0e5b37Sdan   { 0x044C,  0x79, 0x00, 0x00, 0x00 },  /*  to y */
16501a0e5b37Sdan   { 0x044D,  0x65, 0x00, 0x00, 0x00 },  /* э to e */
16511a0e5b37Sdan   { 0x044E,  0x69, 0x75, 0x00, 0x00 },  /* ю to iu */
16521a0e5b37Sdan   { 0x044F,  0x69, 0x61, 0x00, 0x00 },  /* я to ia */
16531a0e5b37Sdan   { 0x0450,  0x65, 0x00, 0x00, 0x00 },  /* ѐ to e */
16541a0e5b37Sdan   { 0x0451,  0x65, 0x00, 0x00, 0x00 },  /* ё to e */
16551a0e5b37Sdan   { 0x0452,  0x64, 0x00, 0x00, 0x00 },  /* ђ to d */
16561a0e5b37Sdan   { 0x0453,  0x67, 0x00, 0x00, 0x00 },  /* ѓ to g */
16571a0e5b37Sdan   { 0x0454,  0x65, 0x00, 0x00, 0x00 },  /* є to e */
16581a0e5b37Sdan   { 0x0455,  0x7a, 0x00, 0x00, 0x00 },  /* ѕ to z */
16591a0e5b37Sdan   { 0x0456,  0x69, 0x00, 0x00, 0x00 },  /* і to i */
16601a0e5b37Sdan   { 0x0457,  0x69, 0x00, 0x00, 0x00 },  /* ї to i */
16611a0e5b37Sdan   { 0x0458,  0x6a, 0x00, 0x00, 0x00 },  /* ј to j */
16621a0e5b37Sdan   { 0x0459,  0x69, 0x00, 0x00, 0x00 },  /* љ to i */
16631a0e5b37Sdan   { 0x045A,  0x6e, 0x00, 0x00, 0x00 },  /* њ to n */
16641a0e5b37Sdan   { 0x045B,  0x64, 0x00, 0x00, 0x00 },  /* ћ to d */
16651a0e5b37Sdan   { 0x045C,  0x6b, 0x00, 0x00, 0x00 },  /* ќ to k */
16661a0e5b37Sdan   { 0x045D,  0x69, 0x00, 0x00, 0x00 },  /* ѝ to i */
16671a0e5b37Sdan   { 0x045E,  0x75, 0x00, 0x00, 0x00 },  /* ў to u */
16681a0e5b37Sdan   { 0x045F,  0x64, 0x00, 0x00, 0x00 },  /* џ to d */
16691a0e5b37Sdan   { 0x1E02,  0x42, 0x00, 0x00, 0x00 },  /* Ḃ to B */
16701a0e5b37Sdan   { 0x1E03,  0x62, 0x00, 0x00, 0x00 },  /* ḃ to b */
16711a0e5b37Sdan   { 0x1E0A,  0x44, 0x00, 0x00, 0x00 },  /* Ḋ to D */
16721a0e5b37Sdan   { 0x1E0B,  0x64, 0x00, 0x00, 0x00 },  /* ḋ to d */
16731a0e5b37Sdan   { 0x1E1E,  0x46, 0x00, 0x00, 0x00 },  /* Ḟ to F */
16741a0e5b37Sdan   { 0x1E1F,  0x66, 0x00, 0x00, 0x00 },  /* ḟ to f */
16751a0e5b37Sdan   { 0x1E40,  0x4D, 0x00, 0x00, 0x00 },  /* Ṁ to M */
16761a0e5b37Sdan   { 0x1E41,  0x6D, 0x00, 0x00, 0x00 },  /* ṁ to m */
16771a0e5b37Sdan   { 0x1E56,  0x50, 0x00, 0x00, 0x00 },  /* Ṗ to P */
16781a0e5b37Sdan   { 0x1E57,  0x70, 0x00, 0x00, 0x00 },  /* ṗ to p */
16791a0e5b37Sdan   { 0x1E60,  0x53, 0x00, 0x00, 0x00 },  /* Ṡ to S */
16801a0e5b37Sdan   { 0x1E61,  0x73, 0x00, 0x00, 0x00 },  /* ṡ to s */
16811a0e5b37Sdan   { 0x1E6A,  0x54, 0x00, 0x00, 0x00 },  /* Ṫ to T */
16821a0e5b37Sdan   { 0x1E6B,  0x74, 0x00, 0x00, 0x00 },  /* ṫ to t */
16831a0e5b37Sdan   { 0x1E80,  0x57, 0x00, 0x00, 0x00 },  /* Ẁ to W */
16841a0e5b37Sdan   { 0x1E81,  0x77, 0x00, 0x00, 0x00 },  /* ẁ to w */
16851a0e5b37Sdan   { 0x1E82,  0x57, 0x00, 0x00, 0x00 },  /* Ẃ to W */
16861a0e5b37Sdan   { 0x1E83,  0x77, 0x00, 0x00, 0x00 },  /* ẃ to w */
16871a0e5b37Sdan   { 0x1E84,  0x57, 0x00, 0x00, 0x00 },  /* Ẅ to W */
16881a0e5b37Sdan   { 0x1E85,  0x77, 0x00, 0x00, 0x00 },  /* ẅ to w */
16891a0e5b37Sdan   { 0x1EF2,  0x59, 0x00, 0x00, 0x00 },  /* Ỳ to Y */
16901a0e5b37Sdan   { 0x1EF3,  0x79, 0x00, 0x00, 0x00 },  /* ỳ to y */
16911a0e5b37Sdan   { 0xFB00,  0x66, 0x66, 0x00, 0x00 },  /* ff to ff */
16921a0e5b37Sdan   { 0xFB01,  0x66, 0x69, 0x00, 0x00 },  /* fi to fi */
16931a0e5b37Sdan   { 0xFB02,  0x66, 0x6C, 0x00, 0x00 },  /* fl to fl */
16941a0e5b37Sdan   { 0xFB05,  0x73, 0x74, 0x00, 0x00 },  /* ſt to st */
16951a0e5b37Sdan   { 0xFB06,  0x73, 0x74, 0x00, 0x00 },  /* st to st */
1696b7045ab2Sdrh };
1697b7045ab2Sdrh 
spellfixFindTranslit(int c,int * pxTop)16981a0e5b37Sdan static const Transliteration *spellfixFindTranslit(int c, int *pxTop){
16991a0e5b37Sdan   *pxTop = (sizeof(translit)/sizeof(translit[0])) - 1;
17001a0e5b37Sdan   return translit;
17011a0e5b37Sdan }
17021a0e5b37Sdan 
1703b7045ab2Sdrh /*
1704b7045ab2Sdrh ** Convert the input string from UTF-8 into pure ASCII by converting
1705b7045ab2Sdrh ** all non-ASCII characters to some combination of characters in the
1706b7045ab2Sdrh ** ASCII subset.
1707b7045ab2Sdrh **
1708b7045ab2Sdrh ** The returned string might contain more characters than the input.
1709b7045ab2Sdrh **
1710b7045ab2Sdrh ** Space to hold the returned string comes from sqlite3_malloc() and
1711b7045ab2Sdrh ** should be freed by the caller.
1712b7045ab2Sdrh */
transliterate(const unsigned char * zIn,int nIn)1713b7045ab2Sdrh static unsigned char *transliterate(const unsigned char *zIn, int nIn){
17142e5e0e10Sdan #ifdef SQLITE_SPELLFIX_5BYTE_MAPPINGS
17152e5e0e10Sdan   unsigned char *zOut = sqlite3_malloc64( nIn*5 + 1 );
17162e5e0e10Sdan #else
1717c4703eedSdrh   unsigned char *zOut = sqlite3_malloc64( nIn*4 + 1 );
17182e5e0e10Sdan #endif
1719b7045ab2Sdrh   int c, sz, nOut;
1720b7045ab2Sdrh   if( zOut==0 ) return 0;
1721b7045ab2Sdrh   nOut = 0;
1722b7045ab2Sdrh   while( nIn>0 ){
1723b7045ab2Sdrh     c = utf8Read(zIn, nIn, &sz);
1724b7045ab2Sdrh     zIn += sz;
1725b7045ab2Sdrh     nIn -= sz;
1726b7045ab2Sdrh     if( c<=127 ){
172777fac879Smistachkin       zOut[nOut++] = (unsigned char)c;
1728b7045ab2Sdrh     }else{
1729b7045ab2Sdrh       int xTop, xBtm, x;
17301a0e5b37Sdan       const Transliteration *tbl = spellfixFindTranslit(c, &xTop);
1731b7045ab2Sdrh       xBtm = 0;
1732b7045ab2Sdrh       while( xTop>=xBtm ){
1733b7045ab2Sdrh         x = (xTop + xBtm)/2;
17341a0e5b37Sdan         if( tbl[x].cFrom==c ){
17351a0e5b37Sdan           zOut[nOut++] = tbl[x].cTo0;
17361a0e5b37Sdan           if( tbl[x].cTo1 ){
17371a0e5b37Sdan             zOut[nOut++] = tbl[x].cTo1;
17381a0e5b37Sdan             if( tbl[x].cTo2 ){
17391a0e5b37Sdan               zOut[nOut++] = tbl[x].cTo2;
17401a0e5b37Sdan               if( tbl[x].cTo3 ){
17411a0e5b37Sdan                 zOut[nOut++] = tbl[x].cTo3;
17422e5e0e10Sdan #ifdef SQLITE_SPELLFIX_5BYTE_MAPPINGS
17432e5e0e10Sdan                 if( tbl[x].cTo4 ){
17442e5e0e10Sdan                   zOut[nOut++] = tbl[x].cTo4;
17452e5e0e10Sdan                 }
17462e5e0e10Sdan #endif /* SQLITE_SPELLFIX_5BYTE_MAPPINGS */
17471a0e5b37Sdan               }
1748b7045ab2Sdrh             }
1749b7045ab2Sdrh           }
1750b7045ab2Sdrh           c = 0;
1751b7045ab2Sdrh           break;
17521a0e5b37Sdan         }else if( tbl[x].cFrom>c ){
1753b7045ab2Sdrh           xTop = x-1;
1754b7045ab2Sdrh         }else{
1755b7045ab2Sdrh           xBtm = x+1;
1756b7045ab2Sdrh         }
1757b7045ab2Sdrh       }
1758b7045ab2Sdrh       if( c ) zOut[nOut++] = '?';
1759b7045ab2Sdrh     }
1760b7045ab2Sdrh   }
1761b7045ab2Sdrh   zOut[nOut] = 0;
1762b7045ab2Sdrh   return zOut;
1763b7045ab2Sdrh }
1764b7045ab2Sdrh 
1765b7045ab2Sdrh /*
1766b7045ab2Sdrh ** Return the number of characters in the shortest prefix of the input
1767b7045ab2Sdrh ** string that transliterates to an ASCII string nTrans bytes or longer.
1768b7045ab2Sdrh ** Or, if the transliteration of the input string is less than nTrans
1769b7045ab2Sdrh ** bytes in size, return the number of characters in the input string.
1770b7045ab2Sdrh */
translen_to_charlen(const char * zIn,int nIn,int nTrans)1771b7045ab2Sdrh static int translen_to_charlen(const char *zIn, int nIn, int nTrans){
1772b7045ab2Sdrh   int i, c, sz, nOut;
1773b7045ab2Sdrh   int nChar;
1774b7045ab2Sdrh 
1775b7045ab2Sdrh   i = nOut = 0;
1776b7045ab2Sdrh   for(nChar=0; i<nIn && nOut<nTrans; nChar++){
1777b7045ab2Sdrh     c = utf8Read((const unsigned char *)&zIn[i], nIn-i, &sz);
1778b7045ab2Sdrh     i += sz;
1779b7045ab2Sdrh 
1780b7045ab2Sdrh     nOut++;
1781b7045ab2Sdrh     if( c>=128 ){
1782b7045ab2Sdrh       int xTop, xBtm, x;
17831a0e5b37Sdan       const Transliteration *tbl = spellfixFindTranslit(c, &xTop);
1784b7045ab2Sdrh       xBtm = 0;
1785b7045ab2Sdrh       while( xTop>=xBtm ){
1786b7045ab2Sdrh         x = (xTop + xBtm)/2;
17871a0e5b37Sdan         if( tbl[x].cFrom==c ){
17881a0e5b37Sdan           if( tbl[x].cTo1 ){
17891a0e5b37Sdan             nOut++;
17901a0e5b37Sdan             if( tbl[x].cTo2 ){
17911a0e5b37Sdan               nOut++;
17921a0e5b37Sdan               if( tbl[x].cTo3 ){
17931a0e5b37Sdan                 nOut++;
17941a0e5b37Sdan               }
17951a0e5b37Sdan             }
17961a0e5b37Sdan           }
1797b7045ab2Sdrh           break;
17981a0e5b37Sdan         }else if( tbl[x].cFrom>c ){
1799b7045ab2Sdrh           xTop = x-1;
1800b7045ab2Sdrh         }else{
1801b7045ab2Sdrh           xBtm = x+1;
1802b7045ab2Sdrh         }
1803b7045ab2Sdrh       }
1804b7045ab2Sdrh     }
1805b7045ab2Sdrh   }
1806b7045ab2Sdrh 
1807b7045ab2Sdrh   return nChar;
1808b7045ab2Sdrh }
1809b7045ab2Sdrh 
1810b7045ab2Sdrh 
1811b7045ab2Sdrh /*
1812b7045ab2Sdrh **    spellfix1_translit(X)
1813b7045ab2Sdrh **
1814b7045ab2Sdrh ** Convert a string that contains non-ASCII Roman characters into
1815b7045ab2Sdrh ** pure ASCII.
1816b7045ab2Sdrh */
transliterateSqlFunc(sqlite3_context * context,int argc,sqlite3_value ** argv)1817b7045ab2Sdrh static void transliterateSqlFunc(
1818b7045ab2Sdrh   sqlite3_context *context,
1819b7045ab2Sdrh   int argc,
1820b7045ab2Sdrh   sqlite3_value **argv
1821b7045ab2Sdrh ){
1822b7045ab2Sdrh   const unsigned char *zIn = sqlite3_value_text(argv[0]);
1823b7045ab2Sdrh   int nIn = sqlite3_value_bytes(argv[0]);
1824b7045ab2Sdrh   unsigned char *zOut = transliterate(zIn, nIn);
1825b7045ab2Sdrh   if( zOut==0 ){
1826b7045ab2Sdrh     sqlite3_result_error_nomem(context);
1827b7045ab2Sdrh   }else{
1828b7045ab2Sdrh     sqlite3_result_text(context, (char*)zOut, -1, sqlite3_free);
1829b7045ab2Sdrh   }
1830b7045ab2Sdrh }
1831b7045ab2Sdrh 
1832b7045ab2Sdrh /*
1833b7045ab2Sdrh **    spellfix1_scriptcode(X)
1834b7045ab2Sdrh **
1835b7045ab2Sdrh ** Try to determine the dominant script used by the word X and return
1836b7045ab2Sdrh ** its ISO 15924 numeric code.
1837b7045ab2Sdrh **
1838b7045ab2Sdrh ** The current implementation only understands the following scripts:
1839b7045ab2Sdrh **
1840b7045ab2Sdrh **    215  (Latin)
1841b7045ab2Sdrh **    220  (Cyrillic)
1842b7045ab2Sdrh **    200  (Greek)
1843b7045ab2Sdrh **
1844b7045ab2Sdrh ** This routine will return 998 if the input X contains characters from
1845b7045ab2Sdrh ** two or more of the above scripts or 999 if X contains no characters
1846b7045ab2Sdrh ** from any of the above scripts.
1847b7045ab2Sdrh */
scriptCodeSqlFunc(sqlite3_context * context,int argc,sqlite3_value ** argv)1848b7045ab2Sdrh static void scriptCodeSqlFunc(
1849b7045ab2Sdrh   sqlite3_context *context,
1850b7045ab2Sdrh   int argc,
1851b7045ab2Sdrh   sqlite3_value **argv
1852b7045ab2Sdrh ){
1853b7045ab2Sdrh   const unsigned char *zIn = sqlite3_value_text(argv[0]);
1854b7045ab2Sdrh   int nIn = sqlite3_value_bytes(argv[0]);
1855b7045ab2Sdrh   int c, sz;
1856b7045ab2Sdrh   int scriptMask = 0;
1857b7045ab2Sdrh   int res;
1858811f17baSdrh   int seenDigit = 0;
1859b7045ab2Sdrh # define SCRIPT_LATIN       0x0001
1860b7045ab2Sdrh # define SCRIPT_CYRILLIC    0x0002
1861b7045ab2Sdrh # define SCRIPT_GREEK       0x0004
18621db0a72bSdrh # define SCRIPT_HEBREW      0x0008
18631db0a72bSdrh # define SCRIPT_ARABIC      0x0010
1864b7045ab2Sdrh 
1865b7045ab2Sdrh   while( nIn>0 ){
1866b7045ab2Sdrh     c = utf8Read(zIn, nIn, &sz);
1867b7045ab2Sdrh     zIn += sz;
1868b7045ab2Sdrh     nIn -= sz;
1869811f17baSdrh     if( c<0x02af ){
1870811f17baSdrh       if( c>=0x80 || midClass[c&0x7f]<CCLASS_DIGIT ){
1871b7045ab2Sdrh         scriptMask |= SCRIPT_LATIN;
1872811f17baSdrh       }else if( c>='0' && c<='9' ){
1873811f17baSdrh         seenDigit = 1;
1874811f17baSdrh       }
1875b7045ab2Sdrh     }else if( c>=0x0400 && c<=0x04ff ){
1876b7045ab2Sdrh       scriptMask |= SCRIPT_CYRILLIC;
1877b7045ab2Sdrh     }else if( c>=0x0386 && c<=0x03ce ){
1878b7045ab2Sdrh       scriptMask |= SCRIPT_GREEK;
18791db0a72bSdrh     }else if( c>=0x0590 && c<=0x05ff ){
18801db0a72bSdrh       scriptMask |= SCRIPT_HEBREW;
18811db0a72bSdrh     }else if( c>=0x0600 && c<=0x06ff ){
18821db0a72bSdrh       scriptMask |= SCRIPT_ARABIC;
1883b7045ab2Sdrh     }
1884b7045ab2Sdrh   }
1885811f17baSdrh   if( scriptMask==0 && seenDigit ) scriptMask = SCRIPT_LATIN;
1886b7045ab2Sdrh   switch( scriptMask ){
1887b7045ab2Sdrh     case 0:                res = 999; break;
1888b7045ab2Sdrh     case SCRIPT_LATIN:     res = 215; break;
1889b7045ab2Sdrh     case SCRIPT_CYRILLIC:  res = 220; break;
1890b7045ab2Sdrh     case SCRIPT_GREEK:     res = 200; break;
18911db0a72bSdrh     case SCRIPT_HEBREW:    res = 125; break;
18921db0a72bSdrh     case SCRIPT_ARABIC:    res = 160; break;
1893b7045ab2Sdrh     default:               res = 998; break;
1894b7045ab2Sdrh   }
1895b7045ab2Sdrh   sqlite3_result_int(context, res);
1896b7045ab2Sdrh }
1897b7045ab2Sdrh 
1898b7045ab2Sdrh /* End transliterate
1899b7045ab2Sdrh ******************************************************************************
1900b7045ab2Sdrh ******************************************************************************
1901b7045ab2Sdrh ** Begin spellfix1 virtual table.
1902b7045ab2Sdrh */
1903b7045ab2Sdrh 
1904b7045ab2Sdrh /* Maximum length of a phonehash used for querying the shadow table */
19052396fce5Sdrh #define SPELLFIX_MX_HASH  32
1906b7045ab2Sdrh 
1907b7045ab2Sdrh /* Maximum number of hash strings to examine per query */
1908b7045ab2Sdrh #define SPELLFIX_MX_RUN   1
1909b7045ab2Sdrh 
1910b7045ab2Sdrh typedef struct spellfix1_vtab spellfix1_vtab;
1911b7045ab2Sdrh typedef struct spellfix1_cursor spellfix1_cursor;
1912b7045ab2Sdrh 
1913b7045ab2Sdrh /* Fuzzy-search virtual table object */
1914b7045ab2Sdrh struct spellfix1_vtab {
1915b7045ab2Sdrh   sqlite3_vtab base;         /* Base class - must be first */
1916b7045ab2Sdrh   sqlite3 *db;               /* Database connection */
1917b7045ab2Sdrh   char *zDbName;             /* Name of database holding this table */
1918b7045ab2Sdrh   char *zTableName;          /* Name of the virtual table */
1919b7045ab2Sdrh   char *zCostTable;          /* Table holding edit-distance cost numbers */
1920b7045ab2Sdrh   EditDist3Config *pConfig3; /* Parsed edit distance costs */
1921b7045ab2Sdrh };
1922b7045ab2Sdrh 
1923b7045ab2Sdrh /* Fuzzy-search cursor object */
1924b7045ab2Sdrh struct spellfix1_cursor {
1925b7045ab2Sdrh   sqlite3_vtab_cursor base;    /* Base class - must be first */
1926b7045ab2Sdrh   spellfix1_vtab *pVTab;       /* The table to which this cursor belongs */
1927b7045ab2Sdrh   char *zPattern;              /* rhs of MATCH clause */
1928b20a42e3Sdan   int idxNum;                  /* idxNum value passed to xFilter() */
1929b7045ab2Sdrh   int nRow;                    /* Number of rows of content */
1930b7045ab2Sdrh   int nAlloc;                  /* Number of allocated rows */
1931b7045ab2Sdrh   int iRow;                    /* Current row of content */
1932b7045ab2Sdrh   int iLang;                   /* Value of the langid= constraint */
1933b7045ab2Sdrh   int iTop;                    /* Value of the top= constraint */
1934b7045ab2Sdrh   int iScope;                  /* Value of the scope= constraint */
1935b7045ab2Sdrh   int nSearch;                 /* Number of vocabulary items checked */
1936b7045ab2Sdrh   sqlite3_stmt *pFullScan;     /* Shadow query for a full table scan */
1937b7045ab2Sdrh   struct spellfix1_row {       /* For each row of content */
1938b7045ab2Sdrh     sqlite3_int64 iRowid;         /* Rowid for this row */
1939b7045ab2Sdrh     char *zWord;                  /* Text for this row */
1940b7045ab2Sdrh     int iRank;                    /* Rank for this row */
1941b7045ab2Sdrh     int iDistance;                /* Distance from pattern for this row */
1942b7045ab2Sdrh     int iScore;                   /* Score for sorting */
1943b7045ab2Sdrh     int iMatchlen;                /* Value of matchlen column (or -1) */
1944b7045ab2Sdrh     char zHash[SPELLFIX_MX_HASH]; /* the phonehash used for this match */
1945b7045ab2Sdrh   } *a;
1946b7045ab2Sdrh };
1947b7045ab2Sdrh 
1948b7045ab2Sdrh /*
1949b7045ab2Sdrh ** Construct one or more SQL statements from the format string given
1950b7045ab2Sdrh ** and then evaluate those statements. The success code is written
1951b7045ab2Sdrh ** into *pRc.
1952b7045ab2Sdrh **
1953b7045ab2Sdrh ** If *pRc is initially non-zero then this routine is a no-op.
1954b7045ab2Sdrh */
spellfix1DbExec(int * pRc,sqlite3 * db,const char * zFormat,...)1955b7045ab2Sdrh static void spellfix1DbExec(
1956b7045ab2Sdrh   int *pRc,              /* Success code */
1957b7045ab2Sdrh   sqlite3 *db,           /* Database in which to run SQL */
1958b7045ab2Sdrh   const char *zFormat,   /* Format string for SQL */
1959b7045ab2Sdrh   ...                    /* Arguments to the format string */
1960b7045ab2Sdrh ){
1961b7045ab2Sdrh   va_list ap;
1962b7045ab2Sdrh   char *zSql;
1963b7045ab2Sdrh   if( *pRc ) return;
1964b7045ab2Sdrh   va_start(ap, zFormat);
1965b7045ab2Sdrh   zSql = sqlite3_vmprintf(zFormat, ap);
1966b7045ab2Sdrh   va_end(ap);
1967b7045ab2Sdrh   if( zSql==0 ){
1968b7045ab2Sdrh     *pRc = SQLITE_NOMEM;
1969b7045ab2Sdrh   }else{
1970b7045ab2Sdrh     *pRc = sqlite3_exec(db, zSql, 0, 0, 0);
1971b7045ab2Sdrh     sqlite3_free(zSql);
1972b7045ab2Sdrh   }
1973b7045ab2Sdrh }
1974b7045ab2Sdrh 
1975b7045ab2Sdrh /*
1976b7045ab2Sdrh ** xDisconnect/xDestroy method for the fuzzy-search module.
1977b7045ab2Sdrh */
spellfix1Uninit(int isDestroy,sqlite3_vtab * pVTab)1978b7045ab2Sdrh static int spellfix1Uninit(int isDestroy, sqlite3_vtab *pVTab){
1979b7045ab2Sdrh   spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
1980b7045ab2Sdrh   int rc = SQLITE_OK;
1981b7045ab2Sdrh   if( isDestroy ){
1982b7045ab2Sdrh     sqlite3 *db = p->db;
1983b7045ab2Sdrh     spellfix1DbExec(&rc, db, "DROP TABLE IF EXISTS \"%w\".\"%w_vocab\"",
1984b7045ab2Sdrh                   p->zDbName, p->zTableName);
1985b7045ab2Sdrh   }
1986b7045ab2Sdrh   if( rc==SQLITE_OK ){
1987b7045ab2Sdrh     sqlite3_free(p->zTableName);
1988b7045ab2Sdrh     editDist3ConfigDelete(p->pConfig3);
1989b7045ab2Sdrh     sqlite3_free(p->zCostTable);
1990b7045ab2Sdrh     sqlite3_free(p);
1991b7045ab2Sdrh   }
1992b7045ab2Sdrh   return rc;
1993b7045ab2Sdrh }
spellfix1Disconnect(sqlite3_vtab * pVTab)1994b7045ab2Sdrh static int spellfix1Disconnect(sqlite3_vtab *pVTab){
1995b7045ab2Sdrh   return spellfix1Uninit(0, pVTab);
1996b7045ab2Sdrh }
spellfix1Destroy(sqlite3_vtab * pVTab)1997b7045ab2Sdrh static int spellfix1Destroy(sqlite3_vtab *pVTab){
1998b7045ab2Sdrh   return spellfix1Uninit(1, pVTab);
1999b7045ab2Sdrh }
2000b7045ab2Sdrh 
2001b7045ab2Sdrh /*
2002b7045ab2Sdrh ** Make a copy of a string.  Remove leading and trailing whitespace
2003b7045ab2Sdrh ** and dequote it.
2004b7045ab2Sdrh */
spellfix1Dequote(const char * zIn)2005b7045ab2Sdrh static char *spellfix1Dequote(const char *zIn){
2006b7045ab2Sdrh   char *zOut;
2007b7045ab2Sdrh   int i, j;
2008b7045ab2Sdrh   char c;
2009c56fac74Sdrh   while( isspace((unsigned char)zIn[0]) ) zIn++;
2010b7045ab2Sdrh   zOut = sqlite3_mprintf("%s", zIn);
2011b7045ab2Sdrh   if( zOut==0 ) return 0;
2012b7045ab2Sdrh   i = (int)strlen(zOut);
2013b7045ab2Sdrh #if 0  /* The parser will never leave spaces at the end */
2014b7045ab2Sdrh   while( i>0 && isspace(zOut[i-1]) ){ i--; }
2015b7045ab2Sdrh #endif
2016b7045ab2Sdrh   zOut[i] = 0;
2017b7045ab2Sdrh   c = zOut[0];
2018b7045ab2Sdrh   if( c=='\'' || c=='"' ){
2019b7045ab2Sdrh     for(i=1, j=0; ALWAYS(zOut[i]); i++){
2020b7045ab2Sdrh       zOut[j++] = zOut[i];
2021b7045ab2Sdrh       if( zOut[i]==c ){
2022b7045ab2Sdrh         if( zOut[i+1]==c ){
2023b7045ab2Sdrh           i++;
2024b7045ab2Sdrh         }else{
2025b7045ab2Sdrh           zOut[j-1] = 0;
2026b7045ab2Sdrh           break;
2027b7045ab2Sdrh         }
2028b7045ab2Sdrh       }
2029b7045ab2Sdrh     }
2030b7045ab2Sdrh   }
2031b7045ab2Sdrh   return zOut;
2032b7045ab2Sdrh }
2033b7045ab2Sdrh 
2034b7045ab2Sdrh 
2035b7045ab2Sdrh /*
2036b7045ab2Sdrh ** xConnect/xCreate method for the spellfix1 module. Arguments are:
2037b7045ab2Sdrh **
2038b7045ab2Sdrh **   argv[0]   -> module name  ("spellfix1")
2039b7045ab2Sdrh **   argv[1]   -> database name
2040b7045ab2Sdrh **   argv[2]   -> table name
2041b7045ab2Sdrh **   argv[3].. -> optional arguments (i.e. "edit_cost_table" parameter)
2042b7045ab2Sdrh */
spellfix1Init(int isCreate,sqlite3 * db,void * pAux,int argc,const char * const * argv,sqlite3_vtab ** ppVTab,char ** pzErr)2043b7045ab2Sdrh static int spellfix1Init(
2044b7045ab2Sdrh   int isCreate,
2045b7045ab2Sdrh   sqlite3 *db,
2046b7045ab2Sdrh   void *pAux,
2047b7045ab2Sdrh   int argc, const char *const*argv,
2048b7045ab2Sdrh   sqlite3_vtab **ppVTab,
2049b7045ab2Sdrh   char **pzErr
2050b7045ab2Sdrh ){
2051b7045ab2Sdrh   spellfix1_vtab *pNew = 0;
2052f5d87f77Sdrh   /* const char *zModule = argv[0]; // not used */
2053b7045ab2Sdrh   const char *zDbName = argv[1];
2054b7045ab2Sdrh   const char *zTableName = argv[2];
2055b7045ab2Sdrh   int nDbName;
2056b7045ab2Sdrh   int rc = SQLITE_OK;
2057b7045ab2Sdrh   int i;
2058b7045ab2Sdrh 
2059b7045ab2Sdrh   nDbName = (int)strlen(zDbName);
2060c4703eedSdrh   pNew = sqlite3_malloc64( sizeof(*pNew) + nDbName + 1);
2061b7045ab2Sdrh   if( pNew==0 ){
2062b7045ab2Sdrh     rc = SQLITE_NOMEM;
2063b7045ab2Sdrh   }else{
2064b7045ab2Sdrh     memset(pNew, 0, sizeof(*pNew));
2065b7045ab2Sdrh     pNew->zDbName = (char*)&pNew[1];
2066b7045ab2Sdrh     memcpy(pNew->zDbName, zDbName, nDbName+1);
2067b7045ab2Sdrh     pNew->zTableName = sqlite3_mprintf("%s", zTableName);
2068b7045ab2Sdrh     pNew->db = db;
2069b7045ab2Sdrh     if( pNew->zTableName==0 ){
2070b7045ab2Sdrh       rc = SQLITE_NOMEM;
2071b7045ab2Sdrh     }else{
2072*988af251Sdrh       sqlite3_vtab_config(db, SQLITE_VTAB_INNOCUOUS);
2073b7045ab2Sdrh       rc = sqlite3_declare_vtab(db,
2074b7045ab2Sdrh            "CREATE TABLE x(word,rank,distance,langid, "
2075b7045ab2Sdrh            "score, matchlen, phonehash HIDDEN, "
2076b7045ab2Sdrh            "top HIDDEN, scope HIDDEN, srchcnt HIDDEN, "
2077b7045ab2Sdrh            "soundslike HIDDEN, command HIDDEN)"
2078b7045ab2Sdrh       );
2079b7045ab2Sdrh #define SPELLFIX_COL_WORD            0
2080b7045ab2Sdrh #define SPELLFIX_COL_RANK            1
2081b7045ab2Sdrh #define SPELLFIX_COL_DISTANCE        2
2082b7045ab2Sdrh #define SPELLFIX_COL_LANGID          3
2083b7045ab2Sdrh #define SPELLFIX_COL_SCORE           4
2084b7045ab2Sdrh #define SPELLFIX_COL_MATCHLEN        5
2085b7045ab2Sdrh #define SPELLFIX_COL_PHONEHASH       6
2086b7045ab2Sdrh #define SPELLFIX_COL_TOP             7
2087b7045ab2Sdrh #define SPELLFIX_COL_SCOPE           8
2088b7045ab2Sdrh #define SPELLFIX_COL_SRCHCNT         9
2089b7045ab2Sdrh #define SPELLFIX_COL_SOUNDSLIKE     10
2090b7045ab2Sdrh #define SPELLFIX_COL_COMMAND        11
2091b7045ab2Sdrh     }
2092b7045ab2Sdrh     if( rc==SQLITE_OK && isCreate ){
2093b7045ab2Sdrh       spellfix1DbExec(&rc, db,
2094b7045ab2Sdrh          "CREATE TABLE IF NOT EXISTS \"%w\".\"%w_vocab\"(\n"
2095b7045ab2Sdrh          "  id INTEGER PRIMARY KEY,\n"
2096b7045ab2Sdrh          "  rank INT,\n"
2097b7045ab2Sdrh          "  langid INT,\n"
2098b7045ab2Sdrh          "  word TEXT,\n"
2099b7045ab2Sdrh          "  k1 TEXT,\n"
2100b7045ab2Sdrh          "  k2 TEXT\n"
2101b7045ab2Sdrh          ");\n",
2102b7045ab2Sdrh          zDbName, zTableName
2103b7045ab2Sdrh       );
2104b7045ab2Sdrh       spellfix1DbExec(&rc, db,
21050211d8bcSdrh          "CREATE INDEX IF NOT EXISTS \"%w\".\"%w_vocab_index_langid_k2\" "
2106b7045ab2Sdrh             "ON \"%w_vocab\"(langid,k2);",
2107f5d87f77Sdrh          zDbName, zTableName, zTableName
2108b7045ab2Sdrh       );
2109b7045ab2Sdrh     }
2110b7045ab2Sdrh     for(i=3; rc==SQLITE_OK && i<argc; i++){
2111b7045ab2Sdrh       if( strncmp(argv[i],"edit_cost_table=",16)==0 && pNew->zCostTable==0 ){
2112b7045ab2Sdrh         pNew->zCostTable = spellfix1Dequote(&argv[i][16]);
2113b7045ab2Sdrh         if( pNew->zCostTable==0 ) rc = SQLITE_NOMEM;
2114b7045ab2Sdrh         continue;
2115b7045ab2Sdrh       }
2116b7045ab2Sdrh       *pzErr = sqlite3_mprintf("bad argument to spellfix1(): \"%s\"", argv[i]);
2117b7045ab2Sdrh       rc = SQLITE_ERROR;
2118b7045ab2Sdrh     }
2119b7045ab2Sdrh   }
2120b7045ab2Sdrh 
2121b7045ab2Sdrh   if( rc && pNew ){
2122b7045ab2Sdrh     *ppVTab = 0;
2123b7045ab2Sdrh     spellfix1Uninit(0, &pNew->base);
2124b7045ab2Sdrh   }else{
2125b7045ab2Sdrh     *ppVTab = (sqlite3_vtab *)pNew;
2126b7045ab2Sdrh   }
2127b7045ab2Sdrh   return rc;
2128b7045ab2Sdrh }
2129b7045ab2Sdrh 
2130b7045ab2Sdrh /*
2131b7045ab2Sdrh ** The xConnect and xCreate methods
2132b7045ab2Sdrh */
spellfix1Connect(sqlite3 * db,void * pAux,int argc,const char * const * argv,sqlite3_vtab ** ppVTab,char ** pzErr)2133b7045ab2Sdrh static int spellfix1Connect(
2134b7045ab2Sdrh   sqlite3 *db,
2135b7045ab2Sdrh   void *pAux,
2136b7045ab2Sdrh   int argc, const char *const*argv,
2137b7045ab2Sdrh   sqlite3_vtab **ppVTab,
2138b7045ab2Sdrh   char **pzErr
2139b7045ab2Sdrh ){
2140b7045ab2Sdrh   return spellfix1Init(0, db, pAux, argc, argv, ppVTab, pzErr);
2141b7045ab2Sdrh }
spellfix1Create(sqlite3 * db,void * pAux,int argc,const char * const * argv,sqlite3_vtab ** ppVTab,char ** pzErr)2142b7045ab2Sdrh static int spellfix1Create(
2143b7045ab2Sdrh   sqlite3 *db,
2144b7045ab2Sdrh   void *pAux,
2145b7045ab2Sdrh   int argc, const char *const*argv,
2146b7045ab2Sdrh   sqlite3_vtab **ppVTab,
2147b7045ab2Sdrh   char **pzErr
2148b7045ab2Sdrh ){
2149b7045ab2Sdrh   return spellfix1Init(1, db, pAux, argc, argv, ppVTab, pzErr);
2150b7045ab2Sdrh }
2151b7045ab2Sdrh 
2152b7045ab2Sdrh /*
2153b7045ab2Sdrh ** Clear all of the content from a cursor.
2154b7045ab2Sdrh */
spellfix1ResetCursor(spellfix1_cursor * pCur)2155b7045ab2Sdrh static void spellfix1ResetCursor(spellfix1_cursor *pCur){
2156b7045ab2Sdrh   int i;
2157b7045ab2Sdrh   for(i=0; i<pCur->nRow; i++){
2158b7045ab2Sdrh     sqlite3_free(pCur->a[i].zWord);
2159b7045ab2Sdrh   }
2160b7045ab2Sdrh   pCur->nRow = 0;
2161b7045ab2Sdrh   pCur->iRow = 0;
2162b7045ab2Sdrh   pCur->nSearch = 0;
2163b7045ab2Sdrh   if( pCur->pFullScan ){
2164b7045ab2Sdrh     sqlite3_finalize(pCur->pFullScan);
2165b7045ab2Sdrh     pCur->pFullScan = 0;
2166b7045ab2Sdrh   }
2167b7045ab2Sdrh }
2168b7045ab2Sdrh 
2169b7045ab2Sdrh /*
2170b7045ab2Sdrh ** Resize the cursor to hold up to N rows of content
2171b7045ab2Sdrh */
spellfix1ResizeCursor(spellfix1_cursor * pCur,int N)2172b7045ab2Sdrh static void spellfix1ResizeCursor(spellfix1_cursor *pCur, int N){
2173b7045ab2Sdrh   struct spellfix1_row *aNew;
2174b7045ab2Sdrh   assert( N>=pCur->nRow );
2175c4703eedSdrh   aNew = sqlite3_realloc64(pCur->a, sizeof(pCur->a[0])*N);
2176b7045ab2Sdrh   if( aNew==0 && N>0 ){
2177b7045ab2Sdrh     spellfix1ResetCursor(pCur);
2178b7045ab2Sdrh     sqlite3_free(pCur->a);
2179b7045ab2Sdrh     pCur->nAlloc = 0;
2180b7045ab2Sdrh     pCur->a = 0;
2181b7045ab2Sdrh   }else{
2182b7045ab2Sdrh     pCur->nAlloc = N;
2183b7045ab2Sdrh     pCur->a = aNew;
2184b7045ab2Sdrh   }
2185b7045ab2Sdrh }
2186b7045ab2Sdrh 
2187b7045ab2Sdrh 
2188b7045ab2Sdrh /*
2189b7045ab2Sdrh ** Close a fuzzy-search cursor.
2190b7045ab2Sdrh */
spellfix1Close(sqlite3_vtab_cursor * cur)2191b7045ab2Sdrh static int spellfix1Close(sqlite3_vtab_cursor *cur){
2192b7045ab2Sdrh   spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2193b7045ab2Sdrh   spellfix1ResetCursor(pCur);
2194b7045ab2Sdrh   spellfix1ResizeCursor(pCur, 0);
2195b7045ab2Sdrh   sqlite3_free(pCur->zPattern);
2196b7045ab2Sdrh   sqlite3_free(pCur);
2197b7045ab2Sdrh   return SQLITE_OK;
2198b7045ab2Sdrh }
2199b7045ab2Sdrh 
2200b20a42e3Sdan #define SPELLFIX_IDXNUM_MATCH  0x01         /* word MATCH $str */
2201b20a42e3Sdan #define SPELLFIX_IDXNUM_LANGID 0x02         /* langid == $langid */
2202b20a42e3Sdan #define SPELLFIX_IDXNUM_TOP    0x04         /* top = $top */
2203b20a42e3Sdan #define SPELLFIX_IDXNUM_SCOPE  0x08         /* scope = $scope */
2204b20a42e3Sdan #define SPELLFIX_IDXNUM_DISTLT 0x10         /* distance < $distance */
2205b20a42e3Sdan #define SPELLFIX_IDXNUM_DISTLE 0x20         /* distance <= $distance */
2206b20a42e3Sdan #define SPELLFIX_IDXNUM_ROWID  0x40         /* rowid = $rowid */
2207b20a42e3Sdan #define SPELLFIX_IDXNUM_DIST   (0x10|0x20)  /* DISTLT and DISTLE */
2208b20a42e3Sdan 
2209b7045ab2Sdrh /*
2210b7045ab2Sdrh **
2211b20a42e3Sdan ** The plan number is a bitmask of the SPELLFIX_IDXNUM_* values defined
2212b20a42e3Sdan ** above.
2213b7045ab2Sdrh **
2214a8a0617eSdan ** filter.argv[*] values contains $str, $langid, $top, $scope and $rowid
2215b7045ab2Sdrh ** if specified and in that order.
2216b7045ab2Sdrh */
spellfix1BestIndex(sqlite3_vtab * tab,sqlite3_index_info * pIdxInfo)2217b7045ab2Sdrh static int spellfix1BestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){
2218b7045ab2Sdrh   int iPlan = 0;
2219b7045ab2Sdrh   int iLangTerm = -1;
2220b7045ab2Sdrh   int iTopTerm = -1;
2221b7045ab2Sdrh   int iScopeTerm = -1;
2222b7045ab2Sdrh   int iDistTerm = -1;
2223a8a0617eSdan   int iRowidTerm = -1;
2224b7045ab2Sdrh   int i;
2225b7045ab2Sdrh   const struct sqlite3_index_constraint *pConstraint;
2226b7045ab2Sdrh   pConstraint = pIdxInfo->aConstraint;
2227b7045ab2Sdrh   for(i=0; i<pIdxInfo->nConstraint; i++, pConstraint++){
2228b7045ab2Sdrh     if( pConstraint->usable==0 ) continue;
2229b7045ab2Sdrh 
2230b7045ab2Sdrh     /* Terms of the form:  word MATCH $str */
2231b20a42e3Sdan     if( (iPlan & SPELLFIX_IDXNUM_MATCH)==0
2232b7045ab2Sdrh      && pConstraint->iColumn==SPELLFIX_COL_WORD
2233b7045ab2Sdrh      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH
2234b7045ab2Sdrh     ){
2235b20a42e3Sdan       iPlan |= SPELLFIX_IDXNUM_MATCH;
2236b7045ab2Sdrh       pIdxInfo->aConstraintUsage[i].argvIndex = 1;
2237b7045ab2Sdrh       pIdxInfo->aConstraintUsage[i].omit = 1;
2238b7045ab2Sdrh     }
2239b7045ab2Sdrh 
2240b7045ab2Sdrh     /* Terms of the form:  langid = $langid  */
2241b20a42e3Sdan     if( (iPlan & SPELLFIX_IDXNUM_LANGID)==0
2242b7045ab2Sdrh      && pConstraint->iColumn==SPELLFIX_COL_LANGID
2243b7045ab2Sdrh      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2244b7045ab2Sdrh     ){
2245b20a42e3Sdan       iPlan |= SPELLFIX_IDXNUM_LANGID;
2246b7045ab2Sdrh       iLangTerm = i;
2247b7045ab2Sdrh     }
2248b7045ab2Sdrh 
2249b7045ab2Sdrh     /* Terms of the form:  top = $top */
2250b20a42e3Sdan     if( (iPlan & SPELLFIX_IDXNUM_TOP)==0
2251b7045ab2Sdrh      && pConstraint->iColumn==SPELLFIX_COL_TOP
2252b7045ab2Sdrh      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2253b7045ab2Sdrh     ){
2254b20a42e3Sdan       iPlan |= SPELLFIX_IDXNUM_TOP;
2255b7045ab2Sdrh       iTopTerm = i;
2256b7045ab2Sdrh     }
2257b7045ab2Sdrh 
2258b7045ab2Sdrh     /* Terms of the form:  scope = $scope */
2259b20a42e3Sdan     if( (iPlan & SPELLFIX_IDXNUM_SCOPE)==0
2260b7045ab2Sdrh      && pConstraint->iColumn==SPELLFIX_COL_SCOPE
2261b7045ab2Sdrh      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2262b7045ab2Sdrh     ){
2263b20a42e3Sdan       iPlan |= SPELLFIX_IDXNUM_SCOPE;
2264b7045ab2Sdrh       iScopeTerm = i;
2265b7045ab2Sdrh     }
2266b7045ab2Sdrh 
2267b7045ab2Sdrh     /* Terms of the form:  distance < $dist or distance <= $dist */
2268b20a42e3Sdan     if( (iPlan & SPELLFIX_IDXNUM_DIST)==0
2269b7045ab2Sdrh      && pConstraint->iColumn==SPELLFIX_COL_DISTANCE
2270b7045ab2Sdrh      && (pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT
2271b7045ab2Sdrh           || pConstraint->op==SQLITE_INDEX_CONSTRAINT_LE)
2272b7045ab2Sdrh     ){
2273b20a42e3Sdan       if( pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT ){
2274b20a42e3Sdan         iPlan |= SPELLFIX_IDXNUM_DISTLT;
2275b20a42e3Sdan       }else{
2276b20a42e3Sdan         iPlan |= SPELLFIX_IDXNUM_DISTLE;
2277b20a42e3Sdan       }
2278b7045ab2Sdrh       iDistTerm = i;
2279b7045ab2Sdrh     }
2280a8a0617eSdan 
2281a8a0617eSdan     /* Terms of the form:  distance < $dist or distance <= $dist */
2282b20a42e3Sdan     if( (iPlan & SPELLFIX_IDXNUM_ROWID)==0
2283a8a0617eSdan      && pConstraint->iColumn<0
2284a8a0617eSdan      && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ
2285a8a0617eSdan     ){
2286b20a42e3Sdan       iPlan |= SPELLFIX_IDXNUM_ROWID;
2287a8a0617eSdan       iRowidTerm = i;
2288a8a0617eSdan     }
2289b7045ab2Sdrh   }
2290b20a42e3Sdan   if( iPlan&SPELLFIX_IDXNUM_MATCH ){
2291b7045ab2Sdrh     int idx = 2;
2292b7045ab2Sdrh     pIdxInfo->idxNum = iPlan;
2293b7045ab2Sdrh     if( pIdxInfo->nOrderBy==1
2294b7045ab2Sdrh      && pIdxInfo->aOrderBy[0].iColumn==SPELLFIX_COL_SCORE
2295b7045ab2Sdrh      && pIdxInfo->aOrderBy[0].desc==0
2296b7045ab2Sdrh     ){
2297b7045ab2Sdrh       pIdxInfo->orderByConsumed = 1;  /* Default order by iScore */
2298b7045ab2Sdrh     }
2299b20a42e3Sdan     if( iPlan&SPELLFIX_IDXNUM_LANGID ){
2300b7045ab2Sdrh       pIdxInfo->aConstraintUsage[iLangTerm].argvIndex = idx++;
2301b7045ab2Sdrh       pIdxInfo->aConstraintUsage[iLangTerm].omit = 1;
2302b7045ab2Sdrh     }
2303b20a42e3Sdan     if( iPlan&SPELLFIX_IDXNUM_TOP ){
2304b7045ab2Sdrh       pIdxInfo->aConstraintUsage[iTopTerm].argvIndex = idx++;
2305b7045ab2Sdrh       pIdxInfo->aConstraintUsage[iTopTerm].omit = 1;
2306b7045ab2Sdrh     }
2307b20a42e3Sdan     if( iPlan&SPELLFIX_IDXNUM_SCOPE ){
2308b7045ab2Sdrh       pIdxInfo->aConstraintUsage[iScopeTerm].argvIndex = idx++;
2309b7045ab2Sdrh       pIdxInfo->aConstraintUsage[iScopeTerm].omit = 1;
2310b7045ab2Sdrh     }
2311b20a42e3Sdan     if( iPlan&SPELLFIX_IDXNUM_DIST ){
2312b7045ab2Sdrh       pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = idx++;
2313b7045ab2Sdrh       pIdxInfo->aConstraintUsage[iDistTerm].omit = 1;
2314b7045ab2Sdrh     }
2315580d7dc7Sdrh     pIdxInfo->estimatedCost = 1e5;
2316b20a42e3Sdan   }else if( (iPlan & SPELLFIX_IDXNUM_ROWID) ){
2317b20a42e3Sdan     pIdxInfo->idxNum = SPELLFIX_IDXNUM_ROWID;
2318a8a0617eSdan     pIdxInfo->aConstraintUsage[iRowidTerm].argvIndex = 1;
2319a8a0617eSdan     pIdxInfo->aConstraintUsage[iRowidTerm].omit = 1;
2320a8a0617eSdan     pIdxInfo->estimatedCost = 5;
2321b7045ab2Sdrh   }else{
2322b7045ab2Sdrh     pIdxInfo->idxNum = 0;
2323580d7dc7Sdrh     pIdxInfo->estimatedCost = 1e50;
2324b7045ab2Sdrh   }
2325b7045ab2Sdrh   return SQLITE_OK;
2326b7045ab2Sdrh }
2327b7045ab2Sdrh 
2328b7045ab2Sdrh /*
2329b7045ab2Sdrh ** Open a new fuzzy-search cursor.
2330b7045ab2Sdrh */
spellfix1Open(sqlite3_vtab * pVTab,sqlite3_vtab_cursor ** ppCursor)2331b7045ab2Sdrh static int spellfix1Open(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
2332b7045ab2Sdrh   spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
2333b7045ab2Sdrh   spellfix1_cursor *pCur;
2334c4703eedSdrh   pCur = sqlite3_malloc64( sizeof(*pCur) );
2335b7045ab2Sdrh   if( pCur==0 ) return SQLITE_NOMEM;
2336b7045ab2Sdrh   memset(pCur, 0, sizeof(*pCur));
2337b7045ab2Sdrh   pCur->pVTab = p;
2338b7045ab2Sdrh   *ppCursor = &pCur->base;
2339b7045ab2Sdrh   return SQLITE_OK;
2340b7045ab2Sdrh }
2341b7045ab2Sdrh 
2342b7045ab2Sdrh /*
2343b7045ab2Sdrh ** Adjust a distance measurement by the words rank in order to show
2344b7045ab2Sdrh ** preference to common words.
2345b7045ab2Sdrh */
spellfix1Score(int iDistance,int iRank)2346b7045ab2Sdrh static int spellfix1Score(int iDistance, int iRank){
2347b7045ab2Sdrh   int iLog2;
2348b7045ab2Sdrh   for(iLog2=0; iRank>0; iLog2++, iRank>>=1){}
2349b7045ab2Sdrh   return iDistance + 32 - iLog2;
2350b7045ab2Sdrh }
2351b7045ab2Sdrh 
2352b7045ab2Sdrh /*
2353b7045ab2Sdrh ** Compare two spellfix1_row objects for sorting purposes in qsort() such
2354b7045ab2Sdrh ** that they sort in order of increasing distance.
2355b7045ab2Sdrh */
spellfix1RowCompare(const void * A,const void * B)235669def7ffSmistachkin static int SQLITE_CDECL spellfix1RowCompare(const void *A, const void *B){
2357b7045ab2Sdrh   const struct spellfix1_row *a = (const struct spellfix1_row*)A;
2358b7045ab2Sdrh   const struct spellfix1_row *b = (const struct spellfix1_row*)B;
2359b7045ab2Sdrh   return a->iScore - b->iScore;
2360b7045ab2Sdrh }
2361b7045ab2Sdrh 
2362b7045ab2Sdrh /*
2363b7045ab2Sdrh ** A structure used to pass information from spellfix1FilterForMatch()
2364b7045ab2Sdrh ** into spellfix1RunQuery().
2365b7045ab2Sdrh */
2366b7045ab2Sdrh typedef struct MatchQuery {
2367b7045ab2Sdrh   spellfix1_cursor *pCur;          /* The cursor being queried */
2368b7045ab2Sdrh   sqlite3_stmt *pStmt;             /* shadow table query statment */
2369b7045ab2Sdrh   char zHash[SPELLFIX_MX_HASH];    /* The current phonehash for zPattern */
2370b7045ab2Sdrh   const char *zPattern;            /* Transliterated input string */
2371b7045ab2Sdrh   int nPattern;                    /* Length of zPattern */
2372b7045ab2Sdrh   EditDist3FromString *pMatchStr3; /* Original unicode string */
2373b7045ab2Sdrh   EditDist3Config *pConfig3;       /* Edit-distance cost coefficients */
2374b7045ab2Sdrh   const EditDist3Lang *pLang;      /* The selected language coefficients */
2375b7045ab2Sdrh   int iLang;                       /* The language id */
2376b7045ab2Sdrh   int iScope;                      /* Default scope */
2377b7045ab2Sdrh   int iMaxDist;                    /* Maximum allowed edit distance, or -1 */
2378b7045ab2Sdrh   int rc;                          /* Error code */
2379b7045ab2Sdrh   int nRun;                  /* Number of prior runs for the same zPattern */
2380b7045ab2Sdrh   char azPrior[SPELLFIX_MX_RUN][SPELLFIX_MX_HASH];  /* Prior hashes */
2381b7045ab2Sdrh } MatchQuery;
2382b7045ab2Sdrh 
2383b7045ab2Sdrh /*
2384b7045ab2Sdrh ** Run a query looking for the best matches against zPattern using
2385b7045ab2Sdrh ** zHash as the character class seed hash.
2386b7045ab2Sdrh */
spellfix1RunQuery(MatchQuery * p,const char * zQuery,int nQuery)2387b7045ab2Sdrh static void spellfix1RunQuery(MatchQuery *p, const char *zQuery, int nQuery){
2388b7045ab2Sdrh   const char *zK1;
2389b7045ab2Sdrh   const char *zWord;
2390b7045ab2Sdrh   int iDist;
2391b7045ab2Sdrh   int iRank;
2392b7045ab2Sdrh   int iScore;
2393b7045ab2Sdrh   int iWorst = 0;
2394b7045ab2Sdrh   int idx;
2395b7045ab2Sdrh   int idxWorst = -1;
2396b7045ab2Sdrh   int i;
2397b7045ab2Sdrh   int iScope = p->iScope;
2398b7045ab2Sdrh   spellfix1_cursor *pCur = p->pCur;
2399b7045ab2Sdrh   sqlite3_stmt *pStmt = p->pStmt;
2400b7045ab2Sdrh   char zHash1[SPELLFIX_MX_HASH];
2401b7045ab2Sdrh   char zHash2[SPELLFIX_MX_HASH];
2402b7045ab2Sdrh   char *zClass;
2403b7045ab2Sdrh   int nClass;
2404b7045ab2Sdrh   int rc;
2405b7045ab2Sdrh 
2406b7045ab2Sdrh   if( pCur->a==0 || p->rc ) return;   /* Prior memory allocation failure */
2407b7045ab2Sdrh   zClass = (char*)phoneticHash((unsigned char*)zQuery, nQuery);
2408b7045ab2Sdrh   if( zClass==0 ){
2409b7045ab2Sdrh     p->rc = SQLITE_NOMEM;
2410b7045ab2Sdrh     return;
2411b7045ab2Sdrh   }
2412b7045ab2Sdrh   nClass = (int)strlen(zClass);
2413b7045ab2Sdrh   if( nClass>SPELLFIX_MX_HASH-2 ){
2414b7045ab2Sdrh     nClass = SPELLFIX_MX_HASH-2;
2415b7045ab2Sdrh     zClass[nClass] = 0;
2416b7045ab2Sdrh   }
2417b7045ab2Sdrh   if( nClass<=iScope ){
2418b7045ab2Sdrh     if( nClass>2 ){
2419b7045ab2Sdrh       iScope = nClass-1;
2420b7045ab2Sdrh     }else{
2421b7045ab2Sdrh       iScope = nClass;
2422b7045ab2Sdrh     }
2423b7045ab2Sdrh   }
2424b7045ab2Sdrh   memcpy(zHash1, zClass, iScope);
2425b7045ab2Sdrh   sqlite3_free(zClass);
2426b7045ab2Sdrh   zHash1[iScope] = 0;
2427b7045ab2Sdrh   memcpy(zHash2, zHash1, iScope);
2428b7045ab2Sdrh   zHash2[iScope] = 'Z';
2429b7045ab2Sdrh   zHash2[iScope+1] = 0;
2430b7045ab2Sdrh #if SPELLFIX_MX_RUN>1
2431b7045ab2Sdrh   for(i=0; i<p->nRun; i++){
2432b7045ab2Sdrh     if( strcmp(p->azPrior[i], zHash1)==0 ) return;
2433b7045ab2Sdrh   }
2434b7045ab2Sdrh #endif
2435b7045ab2Sdrh   assert( p->nRun<SPELLFIX_MX_RUN );
2436b7045ab2Sdrh   memcpy(p->azPrior[p->nRun++], zHash1, iScope+1);
2437b7045ab2Sdrh   if( sqlite3_bind_text(pStmt, 1, zHash1, -1, SQLITE_STATIC)==SQLITE_NOMEM
2438b7045ab2Sdrh    || sqlite3_bind_text(pStmt, 2, zHash2, -1, SQLITE_STATIC)==SQLITE_NOMEM
2439b7045ab2Sdrh   ){
2440b7045ab2Sdrh     p->rc = SQLITE_NOMEM;
2441b7045ab2Sdrh     return;
2442b7045ab2Sdrh   }
2443b7045ab2Sdrh #if SPELLFIX_MX_RUN>1
2444b7045ab2Sdrh   for(i=0; i<pCur->nRow; i++){
2445b7045ab2Sdrh     if( pCur->a[i].iScore>iWorst ){
2446b7045ab2Sdrh       iWorst = pCur->a[i].iScore;
2447b7045ab2Sdrh       idxWorst = i;
2448b7045ab2Sdrh     }
2449b7045ab2Sdrh   }
2450b7045ab2Sdrh #endif
2451b7045ab2Sdrh   while( sqlite3_step(pStmt)==SQLITE_ROW ){
2452b7045ab2Sdrh     int iMatchlen = -1;
2453b7045ab2Sdrh     iRank = sqlite3_column_int(pStmt, 2);
2454b7045ab2Sdrh     if( p->pMatchStr3 ){
2455b7045ab2Sdrh       int nWord = sqlite3_column_bytes(pStmt, 1);
2456b7045ab2Sdrh       zWord = (const char*)sqlite3_column_text(pStmt, 1);
2457b7045ab2Sdrh       iDist = editDist3Core(p->pMatchStr3, zWord, nWord, p->pLang, &iMatchlen);
2458b7045ab2Sdrh     }else{
2459b7045ab2Sdrh       zK1 = (const char*)sqlite3_column_text(pStmt, 3);
2460b7045ab2Sdrh       if( zK1==0 ) continue;
2461b7045ab2Sdrh       iDist = editdist1(p->zPattern, zK1, 0);
2462b7045ab2Sdrh     }
2463b7045ab2Sdrh     if( iDist<0 ){
2464b7045ab2Sdrh       p->rc = SQLITE_NOMEM;
2465b7045ab2Sdrh       break;
2466b7045ab2Sdrh     }
2467b7045ab2Sdrh     pCur->nSearch++;
2468b20a42e3Sdan 
2469b20a42e3Sdan     /* If there is a "distance < $dist" or "distance <= $dist" constraint,
2470b20a42e3Sdan     ** check if this row meets it. If not, jump back up to the top of the
2471b20a42e3Sdan     ** loop to process the next row. Otherwise, if the row does match the
2472b20a42e3Sdan     ** distance constraint, check if the pCur->a[] array is already full.
2473b20a42e3Sdan     ** If it is and no explicit "top = ?" constraint was present in the
2474b20a42e3Sdan     ** query, grow the array to ensure there is room for the new entry. */
2475b20a42e3Sdan     assert( (p->iMaxDist>=0)==((pCur->idxNum & SPELLFIX_IDXNUM_DIST) ? 1 : 0) );
2476b7045ab2Sdrh     if( p->iMaxDist>=0 ){
2477b7045ab2Sdrh       if( iDist>p->iMaxDist ) continue;
2478b20a42e3Sdan       if( pCur->nRow>=pCur->nAlloc && (pCur->idxNum & SPELLFIX_IDXNUM_TOP)==0 ){
2479b7045ab2Sdrh         spellfix1ResizeCursor(pCur, pCur->nAlloc*2 + 10);
2480b7045ab2Sdrh         if( pCur->a==0 ) break;
2481b7045ab2Sdrh       }
2482b20a42e3Sdan     }
2483b20a42e3Sdan 
2484b20a42e3Sdan     iScore = spellfix1Score(iDist,iRank);
2485b20a42e3Sdan     if( pCur->nRow<pCur->nAlloc ){
2486b7045ab2Sdrh       idx = pCur->nRow;
2487b7045ab2Sdrh     }else if( iScore<iWorst ){
2488b7045ab2Sdrh       idx = idxWorst;
2489b7045ab2Sdrh       sqlite3_free(pCur->a[idx].zWord);
2490b7045ab2Sdrh     }else{
2491b7045ab2Sdrh       continue;
2492b7045ab2Sdrh     }
2493b20a42e3Sdan 
2494b7045ab2Sdrh     pCur->a[idx].zWord = sqlite3_mprintf("%s", sqlite3_column_text(pStmt, 1));
2495b7045ab2Sdrh     if( pCur->a[idx].zWord==0 ){
2496b7045ab2Sdrh       p->rc = SQLITE_NOMEM;
2497b7045ab2Sdrh       break;
2498b7045ab2Sdrh     }
2499b7045ab2Sdrh     pCur->a[idx].iRowid = sqlite3_column_int64(pStmt, 0);
2500b7045ab2Sdrh     pCur->a[idx].iRank = iRank;
2501b7045ab2Sdrh     pCur->a[idx].iDistance = iDist;
2502b7045ab2Sdrh     pCur->a[idx].iScore = iScore;
2503b7045ab2Sdrh     pCur->a[idx].iMatchlen = iMatchlen;
2504b7045ab2Sdrh     memcpy(pCur->a[idx].zHash, zHash1, iScope+1);
2505b7045ab2Sdrh     if( pCur->nRow<pCur->nAlloc ) pCur->nRow++;
2506b7045ab2Sdrh     if( pCur->nRow==pCur->nAlloc ){
2507b7045ab2Sdrh       iWorst = pCur->a[0].iScore;
2508b7045ab2Sdrh       idxWorst = 0;
2509b7045ab2Sdrh       for(i=1; i<pCur->nRow; i++){
2510b7045ab2Sdrh         iScore = pCur->a[i].iScore;
2511b7045ab2Sdrh         if( iWorst<iScore ){
2512b7045ab2Sdrh           iWorst = iScore;
2513b7045ab2Sdrh           idxWorst = i;
2514b7045ab2Sdrh         }
2515b7045ab2Sdrh       }
2516b7045ab2Sdrh     }
2517b7045ab2Sdrh   }
2518b7045ab2Sdrh   rc = sqlite3_reset(pStmt);
2519b7045ab2Sdrh   if( rc ) p->rc = rc;
2520b7045ab2Sdrh }
2521b7045ab2Sdrh 
2522b7045ab2Sdrh /*
2523b7045ab2Sdrh ** This version of the xFilter method work if the MATCH term is present
2524b7045ab2Sdrh ** and we are doing a scan.
2525b7045ab2Sdrh */
spellfix1FilterForMatch(spellfix1_cursor * pCur,int argc,sqlite3_value ** argv)2526b7045ab2Sdrh static int spellfix1FilterForMatch(
2527b7045ab2Sdrh   spellfix1_cursor *pCur,
2528b7045ab2Sdrh   int argc,
2529b7045ab2Sdrh   sqlite3_value **argv
2530b7045ab2Sdrh ){
2531b20a42e3Sdan   int idxNum = pCur->idxNum;
2532b7045ab2Sdrh   const unsigned char *zMatchThis;   /* RHS of the MATCH operator */
2533b7045ab2Sdrh   EditDist3FromString *pMatchStr3 = 0; /* zMatchThis as an editdist string */
2534b7045ab2Sdrh   char *zPattern;                    /* Transliteration of zMatchThis */
2535b7045ab2Sdrh   int nPattern;                      /* Length of zPattern */
2536b7045ab2Sdrh   int iLimit = 20;                   /* Max number of rows of output */
2537b7045ab2Sdrh   int iScope = 3;                    /* Use this many characters of zClass */
2538b7045ab2Sdrh   int iLang = 0;                     /* Language code */
2539b7045ab2Sdrh   char *zSql;                        /* SQL of shadow table query */
2540b7045ab2Sdrh   sqlite3_stmt *pStmt = 0;           /* Shadow table query */
2541b7045ab2Sdrh   int rc;                            /* Result code */
2542b7045ab2Sdrh   int idx = 1;                       /* Next available filter parameter */
2543b7045ab2Sdrh   spellfix1_vtab *p = pCur->pVTab;   /* The virtual table that owns pCur */
2544b7045ab2Sdrh   MatchQuery x;                      /* For passing info to RunQuery() */
2545b7045ab2Sdrh 
2546b7045ab2Sdrh   /* Load the cost table if we have not already done so */
2547b7045ab2Sdrh   if( p->zCostTable!=0 && p->pConfig3==0 ){
2548c4703eedSdrh     p->pConfig3 = sqlite3_malloc64( sizeof(p->pConfig3[0]) );
2549b7045ab2Sdrh     if( p->pConfig3==0 ) return SQLITE_NOMEM;
2550b7045ab2Sdrh     memset(p->pConfig3, 0, sizeof(p->pConfig3[0]));
2551b7045ab2Sdrh     rc = editDist3ConfigLoad(p->pConfig3, p->db, p->zCostTable);
2552b7045ab2Sdrh     if( rc ) return rc;
2553b7045ab2Sdrh   }
2554b7045ab2Sdrh   memset(&x, 0, sizeof(x));
2555b7045ab2Sdrh   x.iScope = 3;  /* Default scope if none specified by "WHERE scope=N" */
2556b7045ab2Sdrh   x.iMaxDist = -1;   /* Maximum allowed edit distance */
2557b7045ab2Sdrh 
2558b7045ab2Sdrh   if( idxNum&2 ){
2559b7045ab2Sdrh     iLang = sqlite3_value_int(argv[idx++]);
2560b7045ab2Sdrh   }
2561b7045ab2Sdrh   if( idxNum&4 ){
2562b7045ab2Sdrh     iLimit = sqlite3_value_int(argv[idx++]);
2563b7045ab2Sdrh     if( iLimit<1 ) iLimit = 1;
2564b7045ab2Sdrh   }
2565b7045ab2Sdrh   if( idxNum&8 ){
2566b7045ab2Sdrh     x.iScope = sqlite3_value_int(argv[idx++]);
2567b7045ab2Sdrh     if( x.iScope<1 ) x.iScope = 1;
2568b7045ab2Sdrh     if( x.iScope>SPELLFIX_MX_HASH-2 ) x.iScope = SPELLFIX_MX_HASH-2;
2569b7045ab2Sdrh   }
2570b7045ab2Sdrh   if( idxNum&(16|32) ){
2571b7045ab2Sdrh     x.iMaxDist = sqlite3_value_int(argv[idx++]);
2572b7045ab2Sdrh     if( idxNum&16 ) x.iMaxDist--;
2573b7045ab2Sdrh     if( x.iMaxDist<0 ) x.iMaxDist = 0;
2574b7045ab2Sdrh   }
2575b7045ab2Sdrh   spellfix1ResetCursor(pCur);
2576b7045ab2Sdrh   spellfix1ResizeCursor(pCur, iLimit);
2577b7045ab2Sdrh   zMatchThis = sqlite3_value_text(argv[0]);
2578b7045ab2Sdrh   if( zMatchThis==0 ) return SQLITE_OK;
2579b7045ab2Sdrh   if( p->pConfig3 ){
2580b7045ab2Sdrh     x.pLang = editDist3FindLang(p->pConfig3, iLang);
2581b7045ab2Sdrh     pMatchStr3 = editDist3FromStringNew(x.pLang, (const char*)zMatchThis, -1);
2582b7045ab2Sdrh     if( pMatchStr3==0 ){
2583b7045ab2Sdrh       x.rc = SQLITE_NOMEM;
2584b7045ab2Sdrh       goto filter_exit;
2585b7045ab2Sdrh     }
2586b7045ab2Sdrh   }else{
2587b7045ab2Sdrh     x.pLang = 0;
2588b7045ab2Sdrh   }
2589b7045ab2Sdrh   zPattern = (char*)transliterate(zMatchThis, sqlite3_value_bytes(argv[0]));
2590b7045ab2Sdrh   sqlite3_free(pCur->zPattern);
2591b7045ab2Sdrh   pCur->zPattern = zPattern;
2592b7045ab2Sdrh   if( zPattern==0 ){
2593b7045ab2Sdrh     x.rc = SQLITE_NOMEM;
2594b7045ab2Sdrh     goto filter_exit;
2595b7045ab2Sdrh   }
2596b7045ab2Sdrh   nPattern = (int)strlen(zPattern);
2597b7045ab2Sdrh   if( zPattern[nPattern-1]=='*' ) nPattern--;
2598b7045ab2Sdrh   zSql = sqlite3_mprintf(
2599e2d27e02Sdrh      "SELECT id, word, rank, coalesce(k1,word)"
2600b7045ab2Sdrh      "  FROM \"%w\".\"%w_vocab\""
2601b7045ab2Sdrh      " WHERE langid=%d AND k2>=?1 AND k2<?2",
2602b7045ab2Sdrh      p->zDbName, p->zTableName, iLang
2603b7045ab2Sdrh   );
2604b7045ab2Sdrh   if( zSql==0 ){
2605b7045ab2Sdrh     x.rc = SQLITE_NOMEM;
2606b7045ab2Sdrh     pStmt = 0;
2607b7045ab2Sdrh     goto filter_exit;
2608b7045ab2Sdrh   }
2609b7045ab2Sdrh   rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0);
2610b7045ab2Sdrh   sqlite3_free(zSql);
2611b7045ab2Sdrh   pCur->iLang = iLang;
2612b7045ab2Sdrh   x.pCur = pCur;
2613b7045ab2Sdrh   x.pStmt = pStmt;
2614b7045ab2Sdrh   x.zPattern = zPattern;
2615b7045ab2Sdrh   x.nPattern = nPattern;
2616b7045ab2Sdrh   x.pMatchStr3 = pMatchStr3;
2617b7045ab2Sdrh   x.iLang = iLang;
2618b7045ab2Sdrh   x.rc = rc;
2619b7045ab2Sdrh   x.pConfig3 = p->pConfig3;
2620b7045ab2Sdrh   if( x.rc==SQLITE_OK ){
2621b7045ab2Sdrh     spellfix1RunQuery(&x, zPattern, nPattern);
2622b7045ab2Sdrh   }
2623b7045ab2Sdrh 
2624b7045ab2Sdrh   if( pCur->a ){
2625b7045ab2Sdrh     qsort(pCur->a, pCur->nRow, sizeof(pCur->a[0]), spellfix1RowCompare);
2626b7045ab2Sdrh     pCur->iTop = iLimit;
2627b7045ab2Sdrh     pCur->iScope = iScope;
2628b7045ab2Sdrh   }else{
2629b7045ab2Sdrh     x.rc = SQLITE_NOMEM;
2630b7045ab2Sdrh   }
2631b7045ab2Sdrh 
2632b7045ab2Sdrh filter_exit:
2633b7045ab2Sdrh   sqlite3_finalize(pStmt);
2634b7045ab2Sdrh   editDist3FromStringDelete(pMatchStr3);
2635b7045ab2Sdrh   return x.rc;
2636b7045ab2Sdrh }
2637b7045ab2Sdrh 
2638b7045ab2Sdrh /*
2639b7045ab2Sdrh ** This version of xFilter handles a full-table scan case
2640b7045ab2Sdrh */
spellfix1FilterForFullScan(spellfix1_cursor * pCur,int argc,sqlite3_value ** argv)2641b7045ab2Sdrh static int spellfix1FilterForFullScan(
2642b7045ab2Sdrh   spellfix1_cursor *pCur,
2643b7045ab2Sdrh   int argc,
2644b7045ab2Sdrh   sqlite3_value **argv
2645b7045ab2Sdrh ){
2646a8a0617eSdan   int rc = SQLITE_OK;
2647b20a42e3Sdan   int idxNum = pCur->idxNum;
2648b7045ab2Sdrh   char *zSql;
2649b7045ab2Sdrh   spellfix1_vtab *pVTab = pCur->pVTab;
2650b7045ab2Sdrh   spellfix1ResetCursor(pCur);
2651a8a0617eSdan   assert( idxNum==0 || idxNum==64 );
2652b7045ab2Sdrh   zSql = sqlite3_mprintf(
2653a8a0617eSdan      "SELECT word, rank, NULL, langid, id FROM \"%w\".\"%w_vocab\"%s",
2654a8a0617eSdan      pVTab->zDbName, pVTab->zTableName,
2655a8a0617eSdan      ((idxNum & 64) ? " WHERE rowid=?" : "")
2656a8a0617eSdan   );
2657b7045ab2Sdrh   if( zSql==0 ) return SQLITE_NOMEM;
2658b7045ab2Sdrh   rc = sqlite3_prepare_v2(pVTab->db, zSql, -1, &pCur->pFullScan, 0);
2659b7045ab2Sdrh   sqlite3_free(zSql);
2660a8a0617eSdan   if( rc==SQLITE_OK && (idxNum & 64) ){
2661a8a0617eSdan     assert( argc==1 );
2662a8a0617eSdan     rc = sqlite3_bind_value(pCur->pFullScan, 1, argv[0]);
2663a8a0617eSdan   }
2664b7045ab2Sdrh   pCur->nRow = pCur->iRow = 0;
2665b7045ab2Sdrh   if( rc==SQLITE_OK ){
2666b7045ab2Sdrh     rc = sqlite3_step(pCur->pFullScan);
2667b7045ab2Sdrh     if( rc==SQLITE_ROW ){ pCur->iRow = -1; rc = SQLITE_OK; }
2668b7045ab2Sdrh     if( rc==SQLITE_DONE ){ rc = SQLITE_OK; }
2669b7045ab2Sdrh   }else{
2670b7045ab2Sdrh     pCur->iRow = 0;
2671b7045ab2Sdrh   }
2672b7045ab2Sdrh   return rc;
2673b7045ab2Sdrh }
2674b7045ab2Sdrh 
2675b7045ab2Sdrh 
2676b7045ab2Sdrh /*
2677b7045ab2Sdrh ** Called to "rewind" a cursor back to the beginning so that
2678b7045ab2Sdrh ** it starts its output over again.  Always called at least once
2679b7045ab2Sdrh ** prior to any spellfix1Column, spellfix1Rowid, or spellfix1Eof call.
2680b7045ab2Sdrh */
spellfix1Filter(sqlite3_vtab_cursor * cur,int idxNum,const char * idxStr,int argc,sqlite3_value ** argv)2681b7045ab2Sdrh static int spellfix1Filter(
2682b7045ab2Sdrh   sqlite3_vtab_cursor *cur,
2683b7045ab2Sdrh   int idxNum, const char *idxStr,
2684b7045ab2Sdrh   int argc, sqlite3_value **argv
2685b7045ab2Sdrh ){
2686b7045ab2Sdrh   spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2687b7045ab2Sdrh   int rc;
2688b20a42e3Sdan   pCur->idxNum = idxNum;
2689b7045ab2Sdrh   if( idxNum & 1 ){
2690b20a42e3Sdan     rc = spellfix1FilterForMatch(pCur, argc, argv);
2691b7045ab2Sdrh   }else{
2692b20a42e3Sdan     rc = spellfix1FilterForFullScan(pCur, argc, argv);
2693b7045ab2Sdrh   }
2694b7045ab2Sdrh   return rc;
2695b7045ab2Sdrh }
2696b7045ab2Sdrh 
2697b7045ab2Sdrh 
2698b7045ab2Sdrh /*
2699b7045ab2Sdrh ** Advance a cursor to its next row of output
2700b7045ab2Sdrh */
spellfix1Next(sqlite3_vtab_cursor * cur)2701b7045ab2Sdrh static int spellfix1Next(sqlite3_vtab_cursor *cur){
2702b7045ab2Sdrh   spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2703b7045ab2Sdrh   int rc = SQLITE_OK;
2704b7045ab2Sdrh   if( pCur->iRow < pCur->nRow ){
2705b7045ab2Sdrh     if( pCur->pFullScan ){
2706b7045ab2Sdrh       rc = sqlite3_step(pCur->pFullScan);
2707b7045ab2Sdrh       if( rc!=SQLITE_ROW ) pCur->iRow = pCur->nRow;
2708b7045ab2Sdrh       if( rc==SQLITE_ROW || rc==SQLITE_DONE ) rc = SQLITE_OK;
2709b7045ab2Sdrh     }else{
2710b7045ab2Sdrh       pCur->iRow++;
2711b7045ab2Sdrh     }
2712b7045ab2Sdrh   }
2713b7045ab2Sdrh   return rc;
2714b7045ab2Sdrh }
2715b7045ab2Sdrh 
2716b7045ab2Sdrh /*
2717b7045ab2Sdrh ** Return TRUE if we are at the end-of-file
2718b7045ab2Sdrh */
spellfix1Eof(sqlite3_vtab_cursor * cur)2719b7045ab2Sdrh static int spellfix1Eof(sqlite3_vtab_cursor *cur){
2720b7045ab2Sdrh   spellfix1_cursor *pCur = (spellfix1_cursor *)cur;
2721b7045ab2Sdrh   return pCur->iRow>=pCur->nRow;
2722b7045ab2Sdrh }
2723b7045ab2Sdrh 
2724b7045ab2Sdrh /*
2725b7045ab2Sdrh ** Return columns from the current row.
2726b7045ab2Sdrh */
spellfix1Column(sqlite3_vtab_cursor * cur,sqlite3_context * ctx,int i)2727b7045ab2Sdrh static int spellfix1Column(
2728b7045ab2Sdrh   sqlite3_vtab_cursor *cur,
2729b7045ab2Sdrh   sqlite3_context *ctx,
2730b7045ab2Sdrh   int i
2731b7045ab2Sdrh ){
2732b7045ab2Sdrh   spellfix1_cursor *pCur = (spellfix1_cursor*)cur;
2733b7045ab2Sdrh   if( pCur->pFullScan ){
2734b7045ab2Sdrh     if( i<=SPELLFIX_COL_LANGID ){
2735b7045ab2Sdrh       sqlite3_result_value(ctx, sqlite3_column_value(pCur->pFullScan, i));
2736b7045ab2Sdrh     }else{
2737b7045ab2Sdrh       sqlite3_result_null(ctx);
2738b7045ab2Sdrh     }
2739b7045ab2Sdrh     return SQLITE_OK;
2740b7045ab2Sdrh   }
2741b7045ab2Sdrh   switch( i ){
2742b7045ab2Sdrh     case SPELLFIX_COL_WORD: {
2743b7045ab2Sdrh       sqlite3_result_text(ctx, pCur->a[pCur->iRow].zWord, -1, SQLITE_STATIC);
2744b7045ab2Sdrh       break;
2745b7045ab2Sdrh     }
2746b7045ab2Sdrh     case SPELLFIX_COL_RANK: {
2747b7045ab2Sdrh       sqlite3_result_int(ctx, pCur->a[pCur->iRow].iRank);
2748b7045ab2Sdrh       break;
2749b7045ab2Sdrh     }
2750b7045ab2Sdrh     case SPELLFIX_COL_DISTANCE: {
2751b7045ab2Sdrh       sqlite3_result_int(ctx, pCur->a[pCur->iRow].iDistance);
2752b7045ab2Sdrh       break;
2753b7045ab2Sdrh     }
2754b7045ab2Sdrh     case SPELLFIX_COL_LANGID: {
2755b7045ab2Sdrh       sqlite3_result_int(ctx, pCur->iLang);
2756b7045ab2Sdrh       break;
2757b7045ab2Sdrh     }
2758b7045ab2Sdrh     case SPELLFIX_COL_SCORE: {
2759b7045ab2Sdrh       sqlite3_result_int(ctx, pCur->a[pCur->iRow].iScore);
2760b7045ab2Sdrh       break;
2761b7045ab2Sdrh     }
2762b7045ab2Sdrh     case SPELLFIX_COL_MATCHLEN: {
2763b7045ab2Sdrh       int iMatchlen = pCur->a[pCur->iRow].iMatchlen;
2764b7045ab2Sdrh       if( iMatchlen<0 ){
2765b7045ab2Sdrh         int nPattern = (int)strlen(pCur->zPattern);
2766b7045ab2Sdrh         char *zWord = pCur->a[pCur->iRow].zWord;
2767b7045ab2Sdrh         int nWord = (int)strlen(zWord);
2768b7045ab2Sdrh 
2769b7045ab2Sdrh         if( nPattern>0 && pCur->zPattern[nPattern-1]=='*' ){
2770b7045ab2Sdrh           char *zTranslit;
2771b7045ab2Sdrh           int res;
2772b7045ab2Sdrh           zTranslit = (char *)transliterate((unsigned char *)zWord, nWord);
2773b7045ab2Sdrh           if( !zTranslit ) return SQLITE_NOMEM;
2774b7045ab2Sdrh           res = editdist1(pCur->zPattern, zTranslit, &iMatchlen);
2775b7045ab2Sdrh           sqlite3_free(zTranslit);
2776b7045ab2Sdrh           if( res<0 ) return SQLITE_NOMEM;
2777b7045ab2Sdrh           iMatchlen = translen_to_charlen(zWord, nWord, iMatchlen);
2778b7045ab2Sdrh         }else{
2779b7045ab2Sdrh           iMatchlen = utf8Charlen(zWord, nWord);
2780b7045ab2Sdrh         }
2781b7045ab2Sdrh       }
2782b7045ab2Sdrh 
2783b7045ab2Sdrh       sqlite3_result_int(ctx, iMatchlen);
2784b7045ab2Sdrh       break;
2785b7045ab2Sdrh     }
2786b7045ab2Sdrh     case SPELLFIX_COL_PHONEHASH: {
2787b7045ab2Sdrh       sqlite3_result_text(ctx, pCur->a[pCur->iRow].zHash, -1, SQLITE_STATIC);
2788b7045ab2Sdrh       break;
2789b7045ab2Sdrh     }
2790b7045ab2Sdrh     case SPELLFIX_COL_TOP: {
2791b7045ab2Sdrh       sqlite3_result_int(ctx, pCur->iTop);
2792b7045ab2Sdrh       break;
2793b7045ab2Sdrh     }
2794b7045ab2Sdrh     case SPELLFIX_COL_SCOPE: {
2795b7045ab2Sdrh       sqlite3_result_int(ctx, pCur->iScope);
2796b7045ab2Sdrh       break;
2797b7045ab2Sdrh     }
2798b7045ab2Sdrh     case SPELLFIX_COL_SRCHCNT: {
2799b7045ab2Sdrh       sqlite3_result_int(ctx, pCur->nSearch);
2800b7045ab2Sdrh       break;
2801b7045ab2Sdrh     }
2802b7045ab2Sdrh     default: {
2803b7045ab2Sdrh       sqlite3_result_null(ctx);
2804b7045ab2Sdrh       break;
2805b7045ab2Sdrh     }
2806b7045ab2Sdrh   }
2807b7045ab2Sdrh   return SQLITE_OK;
2808b7045ab2Sdrh }
2809b7045ab2Sdrh 
2810b7045ab2Sdrh /*
2811b7045ab2Sdrh ** The rowid.
2812b7045ab2Sdrh */
spellfix1Rowid(sqlite3_vtab_cursor * cur,sqlite_int64 * pRowid)2813b7045ab2Sdrh static int spellfix1Rowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){
2814b7045ab2Sdrh   spellfix1_cursor *pCur = (spellfix1_cursor*)cur;
2815b7045ab2Sdrh   if( pCur->pFullScan ){
2816b7045ab2Sdrh     *pRowid = sqlite3_column_int64(pCur->pFullScan, 4);
2817b7045ab2Sdrh   }else{
2818b7045ab2Sdrh     *pRowid = pCur->a[pCur->iRow].iRowid;
2819b7045ab2Sdrh   }
2820b7045ab2Sdrh   return SQLITE_OK;
2821b7045ab2Sdrh }
2822b7045ab2Sdrh 
2823b7045ab2Sdrh /*
282488d702e6Sdan ** This function is called by the xUpdate() method. It returns a string
282588d702e6Sdan ** containing the conflict mode that xUpdate() should use for the current
282688d702e6Sdan ** operation. One of: "ROLLBACK", "IGNORE", "ABORT" or "REPLACE".
282788d702e6Sdan */
spellfix1GetConflict(sqlite3 * db)282888d702e6Sdan static const char *spellfix1GetConflict(sqlite3 *db){
282988d702e6Sdan   static const char *azConflict[] = {
283088d702e6Sdan     /* Note: Instead of "FAIL" - "ABORT". */
283188d702e6Sdan     "ROLLBACK", "IGNORE", "ABORT", "ABORT", "REPLACE"
283288d702e6Sdan   };
283388d702e6Sdan   int eConflict = sqlite3_vtab_on_conflict(db);
283488d702e6Sdan 
283588d702e6Sdan   assert( eConflict==SQLITE_ROLLBACK || eConflict==SQLITE_IGNORE
283688d702e6Sdan        || eConflict==SQLITE_FAIL || eConflict==SQLITE_ABORT
283788d702e6Sdan        || eConflict==SQLITE_REPLACE
283888d702e6Sdan   );
283988d702e6Sdan   assert( SQLITE_ROLLBACK==1 );
284088d702e6Sdan   assert( SQLITE_IGNORE==2 );
284188d702e6Sdan   assert( SQLITE_FAIL==3 );
284288d702e6Sdan   assert( SQLITE_ABORT==4 );
284388d702e6Sdan   assert( SQLITE_REPLACE==5 );
284488d702e6Sdan 
284588d702e6Sdan   return azConflict[eConflict-1];
284688d702e6Sdan }
284788d702e6Sdan 
284888d702e6Sdan /*
2849b7045ab2Sdrh ** The xUpdate() method.
2850b7045ab2Sdrh */
spellfix1Update(sqlite3_vtab * pVTab,int argc,sqlite3_value ** argv,sqlite_int64 * pRowid)2851b7045ab2Sdrh static int spellfix1Update(
2852b7045ab2Sdrh   sqlite3_vtab *pVTab,
2853b7045ab2Sdrh   int argc,
2854b7045ab2Sdrh   sqlite3_value **argv,
2855b7045ab2Sdrh   sqlite_int64 *pRowid
2856b7045ab2Sdrh ){
2857b7045ab2Sdrh   int rc = SQLITE_OK;
2858b7045ab2Sdrh   sqlite3_int64 rowid, newRowid;
2859b7045ab2Sdrh   spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
2860b7045ab2Sdrh   sqlite3 *db = p->db;
2861b7045ab2Sdrh 
2862b7045ab2Sdrh   if( argc==1 ){
2863b7045ab2Sdrh     /* A delete operation on the rowid given by argv[0] */
2864b7045ab2Sdrh     rowid = *pRowid = sqlite3_value_int64(argv[0]);
2865b7045ab2Sdrh     spellfix1DbExec(&rc, db, "DELETE FROM \"%w\".\"%w_vocab\" "
2866b7045ab2Sdrh                            " WHERE id=%lld",
2867b7045ab2Sdrh                   p->zDbName, p->zTableName, rowid);
2868b7045ab2Sdrh   }else{
2869b7045ab2Sdrh     const unsigned char *zWord = sqlite3_value_text(argv[SPELLFIX_COL_WORD+2]);
2870b7045ab2Sdrh     int nWord = sqlite3_value_bytes(argv[SPELLFIX_COL_WORD+2]);
2871b7045ab2Sdrh     int iLang = sqlite3_value_int(argv[SPELLFIX_COL_LANGID+2]);
2872b7045ab2Sdrh     int iRank = sqlite3_value_int(argv[SPELLFIX_COL_RANK+2]);
2873b7045ab2Sdrh     const unsigned char *zSoundslike =
2874b7045ab2Sdrh            sqlite3_value_text(argv[SPELLFIX_COL_SOUNDSLIKE+2]);
2875b7045ab2Sdrh     int nSoundslike = sqlite3_value_bytes(argv[SPELLFIX_COL_SOUNDSLIKE+2]);
2876b7045ab2Sdrh     char *zK1, *zK2;
2877b7045ab2Sdrh     int i;
2878b7045ab2Sdrh     char c;
287988d702e6Sdan     const char *zConflict = spellfix1GetConflict(db);
2880b7045ab2Sdrh 
2881b7045ab2Sdrh     if( zWord==0 ){
2882b7045ab2Sdrh       /* Inserts of the form:  INSERT INTO table(command) VALUES('xyzzy');
2883b7045ab2Sdrh       ** cause zWord to be NULL, so we look at the "command" column to see
2884b7045ab2Sdrh       ** what special actions to take */
2885b7045ab2Sdrh       const char *zCmd =
2886b7045ab2Sdrh          (const char*)sqlite3_value_text(argv[SPELLFIX_COL_COMMAND+2]);
2887b7045ab2Sdrh       if( zCmd==0 ){
2888f8396b20Sdrh         pVTab->zErrMsg = sqlite3_mprintf("NOT NULL constraint failed: %s.word",
2889b7045ab2Sdrh                                          p->zTableName);
2890b7045ab2Sdrh         return SQLITE_CONSTRAINT_NOTNULL;
2891b7045ab2Sdrh       }
2892b7045ab2Sdrh       if( strcmp(zCmd,"reset")==0 ){
2893b7045ab2Sdrh         /* Reset the  edit cost table (if there is one). */
2894b7045ab2Sdrh         editDist3ConfigDelete(p->pConfig3);
2895b7045ab2Sdrh         p->pConfig3 = 0;
2896b7045ab2Sdrh         return SQLITE_OK;
2897b7045ab2Sdrh       }
2898b7045ab2Sdrh       if( strncmp(zCmd,"edit_cost_table=",16)==0 ){
2899b7045ab2Sdrh         editDist3ConfigDelete(p->pConfig3);
2900b7045ab2Sdrh         p->pConfig3 = 0;
2901b7045ab2Sdrh         sqlite3_free(p->zCostTable);
2902b7045ab2Sdrh         p->zCostTable = spellfix1Dequote(zCmd+16);
2903b7045ab2Sdrh         if( p->zCostTable==0 ) return SQLITE_NOMEM;
2904b7045ab2Sdrh         if( p->zCostTable[0]==0 || sqlite3_stricmp(p->zCostTable,"null")==0 ){
2905b7045ab2Sdrh           sqlite3_free(p->zCostTable);
2906b7045ab2Sdrh           p->zCostTable = 0;
2907b7045ab2Sdrh         }
2908b7045ab2Sdrh         return SQLITE_OK;
2909b7045ab2Sdrh       }
2910b7045ab2Sdrh       pVTab->zErrMsg = sqlite3_mprintf("unknown value for %s.command: \"%w\"",
2911b7045ab2Sdrh                                        p->zTableName, zCmd);
2912b7045ab2Sdrh       return SQLITE_ERROR;
2913b7045ab2Sdrh     }
2914b7045ab2Sdrh     if( iRank<1 ) iRank = 1;
2915b7045ab2Sdrh     if( zSoundslike ){
2916b7045ab2Sdrh       zK1 = (char*)transliterate(zSoundslike, nSoundslike);
2917b7045ab2Sdrh     }else{
2918b7045ab2Sdrh       zK1 = (char*)transliterate(zWord, nWord);
2919b7045ab2Sdrh     }
2920b7045ab2Sdrh     if( zK1==0 ) return SQLITE_NOMEM;
2921b7045ab2Sdrh     for(i=0; (c = zK1[i])!=0; i++){
2922b7045ab2Sdrh        if( c>='A' && c<='Z' ) zK1[i] += 'a' - 'A';
2923b7045ab2Sdrh     }
2924b7045ab2Sdrh     zK2 = (char*)phoneticHash((const unsigned char*)zK1, i);
2925b7045ab2Sdrh     if( zK2==0 ){
2926b7045ab2Sdrh       sqlite3_free(zK1);
2927b7045ab2Sdrh       return SQLITE_NOMEM;
2928b7045ab2Sdrh     }
2929b7045ab2Sdrh     if( sqlite3_value_type(argv[0])==SQLITE_NULL ){
29305ab56707Sdrh       if( sqlite3_value_type(argv[1])==SQLITE_NULL ){
2931b7045ab2Sdrh         spellfix1DbExec(&rc, db,
2932b7045ab2Sdrh                "INSERT INTO \"%w\".\"%w_vocab\"(rank,langid,word,k1,k2) "
2933e2d27e02Sdrh                "VALUES(%d,%d,%Q,nullif(%Q,%Q),%Q)",
2934b7045ab2Sdrh                p->zDbName, p->zTableName,
2935e2d27e02Sdrh                iRank, iLang, zWord, zK1, zWord, zK2
2936b7045ab2Sdrh         );
29375ab56707Sdrh       }else{
29385ab56707Sdrh         newRowid = sqlite3_value_int64(argv[1]);
29395ab56707Sdrh         spellfix1DbExec(&rc, db,
294088d702e6Sdan             "INSERT OR %s INTO \"%w\".\"%w_vocab\"(id,rank,langid,word,k1,k2) "
2941e2d27e02Sdrh             "VALUES(%lld,%d,%d,%Q,nullif(%Q,%Q),%Q)",
294288d702e6Sdan             zConflict, p->zDbName, p->zTableName,
2943e2d27e02Sdrh             newRowid, iRank, iLang, zWord, zK1, zWord, zK2
29445ab56707Sdrh         );
29455ab56707Sdrh       }
2946b7045ab2Sdrh       *pRowid = sqlite3_last_insert_rowid(db);
2947b7045ab2Sdrh     }else{
2948b7045ab2Sdrh       rowid = sqlite3_value_int64(argv[0]);
2949b7045ab2Sdrh       newRowid = *pRowid = sqlite3_value_int64(argv[1]);
2950b7045ab2Sdrh       spellfix1DbExec(&rc, db,
295188d702e6Sdan              "UPDATE OR %s \"%w\".\"%w_vocab\" SET id=%lld, rank=%d, langid=%d,"
2952e2d27e02Sdrh              " word=%Q, k1=nullif(%Q,%Q), k2=%Q WHERE id=%lld",
295388d702e6Sdan              zConflict, p->zDbName, p->zTableName, newRowid, iRank, iLang,
2954e2d27e02Sdrh              zWord, zK1, zWord, zK2, rowid
2955b7045ab2Sdrh       );
2956b7045ab2Sdrh     }
2957b7045ab2Sdrh     sqlite3_free(zK1);
2958b7045ab2Sdrh     sqlite3_free(zK2);
2959b7045ab2Sdrh   }
2960b7045ab2Sdrh   return rc;
2961b7045ab2Sdrh }
2962b7045ab2Sdrh 
2963b7045ab2Sdrh /*
2964b7045ab2Sdrh ** Rename the spellfix1 table.
2965b7045ab2Sdrh */
spellfix1Rename(sqlite3_vtab * pVTab,const char * zNew)2966b7045ab2Sdrh static int spellfix1Rename(sqlite3_vtab *pVTab, const char *zNew){
2967b7045ab2Sdrh   spellfix1_vtab *p = (spellfix1_vtab*)pVTab;
2968b7045ab2Sdrh   sqlite3 *db = p->db;
2969b7045ab2Sdrh   int rc = SQLITE_OK;
2970b7045ab2Sdrh   char *zNewName = sqlite3_mprintf("%s", zNew);
2971b7045ab2Sdrh   if( zNewName==0 ){
2972b7045ab2Sdrh     return SQLITE_NOMEM;
2973b7045ab2Sdrh   }
2974b7045ab2Sdrh   spellfix1DbExec(&rc, db,
2975b7045ab2Sdrh      "ALTER TABLE \"%w\".\"%w_vocab\" RENAME TO \"%w_vocab\"",
2976b7045ab2Sdrh      p->zDbName, p->zTableName, zNewName
2977b7045ab2Sdrh   );
2978b7045ab2Sdrh   if( rc==SQLITE_OK ){
2979b7045ab2Sdrh     sqlite3_free(p->zTableName);
2980b7045ab2Sdrh     p->zTableName = zNewName;
2981b7045ab2Sdrh   }else{
2982b7045ab2Sdrh     sqlite3_free(zNewName);
2983b7045ab2Sdrh   }
2984b7045ab2Sdrh   return rc;
2985b7045ab2Sdrh }
2986b7045ab2Sdrh 
2987b7045ab2Sdrh 
2988b7045ab2Sdrh /*
2989b7045ab2Sdrh ** A virtual table module that provides fuzzy search.
2990b7045ab2Sdrh */
2991b7045ab2Sdrh static sqlite3_module spellfix1Module = {
2992b7045ab2Sdrh   0,                       /* iVersion */
2993b7045ab2Sdrh   spellfix1Create,         /* xCreate - handle CREATE VIRTUAL TABLE */
2994b7045ab2Sdrh   spellfix1Connect,        /* xConnect - reconnected to an existing table */
2995b7045ab2Sdrh   spellfix1BestIndex,      /* xBestIndex - figure out how to do a query */
2996b7045ab2Sdrh   spellfix1Disconnect,     /* xDisconnect - close a connection */
2997b7045ab2Sdrh   spellfix1Destroy,        /* xDestroy - handle DROP TABLE */
2998b7045ab2Sdrh   spellfix1Open,           /* xOpen - open a cursor */
2999b7045ab2Sdrh   spellfix1Close,          /* xClose - close a cursor */
3000b7045ab2Sdrh   spellfix1Filter,         /* xFilter - configure scan constraints */
3001b7045ab2Sdrh   spellfix1Next,           /* xNext - advance a cursor */
3002b7045ab2Sdrh   spellfix1Eof,            /* xEof - check for end of scan */
3003b7045ab2Sdrh   spellfix1Column,         /* xColumn - read data */
3004b7045ab2Sdrh   spellfix1Rowid,          /* xRowid - read data */
3005b7045ab2Sdrh   spellfix1Update,         /* xUpdate */
3006b7045ab2Sdrh   0,                       /* xBegin */
3007b7045ab2Sdrh   0,                       /* xSync */
3008b7045ab2Sdrh   0,                       /* xCommit */
3009b7045ab2Sdrh   0,                       /* xRollback */
3010b7045ab2Sdrh   0,                       /* xFindMethod */
3011b7045ab2Sdrh   spellfix1Rename,         /* xRename */
3012b7045ab2Sdrh };
3013b7045ab2Sdrh 
3014b7045ab2Sdrh /*
3015b7045ab2Sdrh ** Register the various functions and the virtual table.
3016b7045ab2Sdrh */
spellfix1Register(sqlite3 * db)3017b7045ab2Sdrh static int spellfix1Register(sqlite3 *db){
3018b7045ab2Sdrh   int rc = SQLITE_OK;
3019b7045ab2Sdrh   int i;
3020537e7028Sdrh   rc = sqlite3_create_function(db, "spellfix1_translit", 1,
3021537e7028Sdrh                                SQLITE_UTF8|SQLITE_DETERMINISTIC, 0,
3022b7045ab2Sdrh                                 transliterateSqlFunc, 0, 0);
3023b7045ab2Sdrh   if( rc==SQLITE_OK ){
3024537e7028Sdrh     rc = sqlite3_create_function(db, "spellfix1_editdist", 2,
3025537e7028Sdrh                                  SQLITE_UTF8|SQLITE_DETERMINISTIC, 0,
3026b7045ab2Sdrh                                   editdistSqlFunc, 0, 0);
3027b7045ab2Sdrh   }
3028b7045ab2Sdrh   if( rc==SQLITE_OK ){
3029537e7028Sdrh     rc = sqlite3_create_function(db, "spellfix1_phonehash", 1,
3030537e7028Sdrh                                  SQLITE_UTF8|SQLITE_DETERMINISTIC, 0,
3031b7045ab2Sdrh                                   phoneticHashSqlFunc, 0, 0);
3032b7045ab2Sdrh   }
3033b7045ab2Sdrh   if( rc==SQLITE_OK ){
3034537e7028Sdrh     rc = sqlite3_create_function(db, "spellfix1_scriptcode", 1,
3035537e7028Sdrh                                   SQLITE_UTF8|SQLITE_DETERMINISTIC, 0,
3036b7045ab2Sdrh                                   scriptCodeSqlFunc, 0, 0);
3037b7045ab2Sdrh   }
3038b7045ab2Sdrh   if( rc==SQLITE_OK ){
3039b7045ab2Sdrh     rc = sqlite3_create_module(db, "spellfix1", &spellfix1Module, 0);
3040b7045ab2Sdrh   }
3041b7045ab2Sdrh   if( rc==SQLITE_OK ){
3042b7045ab2Sdrh     rc = editDist3Install(db);
3043b7045ab2Sdrh   }
3044b7045ab2Sdrh 
3045b7045ab2Sdrh   /* Verify sanity of the translit[] table */
3046b7045ab2Sdrh   for(i=0; i<sizeof(translit)/sizeof(translit[0])-1; i++){
3047b7045ab2Sdrh     assert( translit[i].cFrom<translit[i+1].cFrom );
3048b7045ab2Sdrh   }
3049b7045ab2Sdrh 
3050b7045ab2Sdrh   return rc;
3051b7045ab2Sdrh }
3052b7045ab2Sdrh 
305311f71d6aSdan #endif /* SQLITE_OMIT_VIRTUALTABLE */
305411f71d6aSdan 
3055b7045ab2Sdrh /*
3056b7045ab2Sdrh ** Extension load function.
3057b7045ab2Sdrh */
3058b7045ab2Sdrh #ifdef _WIN32
3059b7045ab2Sdrh __declspec(dllexport)
3060b7045ab2Sdrh #endif
sqlite3_spellfix_init(sqlite3 * db,char ** pzErrMsg,const sqlite3_api_routines * pApi)3061b7045ab2Sdrh int sqlite3_spellfix_init(
3062b7045ab2Sdrh   sqlite3 *db,
3063b7045ab2Sdrh   char **pzErrMsg,
3064b7045ab2Sdrh   const sqlite3_api_routines *pApi
3065b7045ab2Sdrh ){
3066b7045ab2Sdrh   SQLITE_EXTENSION_INIT2(pApi);
306711f71d6aSdan #ifndef SQLITE_OMIT_VIRTUALTABLE
3068b7045ab2Sdrh   return spellfix1Register(db);
306911f71d6aSdan #endif
307011f71d6aSdan   return SQLITE_OK;
3071b7045ab2Sdrh }
3072