1 /* 2 ** 2012 April 10 3 ** 4 ** The author disclaims copyright to this source code. In place of 5 ** a legal notice, here is a blessing: 6 ** 7 ** May you do good and not evil. 8 ** May you find forgiveness for yourself and forgive others. 9 ** May you share freely, never taking more than you give. 10 ** 11 ************************************************************************* 12 ** 13 ** This module implements the spellfix1 VIRTUAL TABLE that can be used 14 ** to search a large vocabulary for close matches. See separate 15 ** documentation (http://www.sqlite.org/spellfix1.html) for details. 16 */ 17 #include "sqlite3ext.h" 18 SQLITE_EXTENSION_INIT1 19 20 #ifndef SQLITE_AMALGAMATION 21 # include <string.h> 22 # include <stdio.h> 23 # include <stdlib.h> 24 # include <assert.h> 25 # define ALWAYS(X) 1 26 # define NEVER(X) 0 27 typedef unsigned char u8; 28 typedef unsigned short u16; 29 #endif 30 #include <ctype.h> 31 32 #ifndef SQLITE_OMIT_VIRTUALTABLE 33 34 /* 35 ** Character classes for ASCII characters: 36 ** 37 ** 0 '' Silent letters: H W 38 ** 1 'A' Any vowel: A E I O U (Y) 39 ** 2 'B' A bilabeal stop or fricative: B F P V W 40 ** 3 'C' Other fricatives or back stops: C G J K Q S X Z 41 ** 4 'D' Alveolar stops: D T 42 ** 5 'H' Letter H at the beginning of a word 43 ** 6 'L' Glide: L 44 ** 7 'R' Semivowel: R 45 ** 8 'M' Nasals: M N 46 ** 9 'Y' Letter Y at the beginning of a word. 47 ** 10 '9' Digits: 0 1 2 3 4 5 6 7 8 9 48 ** 11 ' ' White space 49 ** 12 '?' Other. 50 */ 51 #define CCLASS_SILENT 0 52 #define CCLASS_VOWEL 1 53 #define CCLASS_B 2 54 #define CCLASS_C 3 55 #define CCLASS_D 4 56 #define CCLASS_H 5 57 #define CCLASS_L 6 58 #define CCLASS_R 7 59 #define CCLASS_M 8 60 #define CCLASS_Y 9 61 #define CCLASS_DIGIT 10 62 #define CCLASS_SPACE 11 63 #define CCLASS_OTHER 12 64 65 /* 66 ** The following table gives the character class for non-initial ASCII 67 ** characters. 68 */ 69 static const unsigned char midClass[] = { 70 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 71 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 72 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 73 /* */ CCLASS_SPACE, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 74 /* */ CCLASS_SPACE, /* */ CCLASS_SPACE, /* */ CCLASS_OTHER, 75 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 76 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 77 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 78 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 79 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 80 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_SPACE, 81 /* ! */ CCLASS_OTHER, /* " */ CCLASS_OTHER, /* # */ CCLASS_OTHER, 82 /* $ */ CCLASS_OTHER, /* % */ CCLASS_OTHER, /* & */ CCLASS_OTHER, 83 /* ' */ CCLASS_SILENT, /* ( */ CCLASS_OTHER, /* ) */ CCLASS_OTHER, 84 /* * */ CCLASS_OTHER, /* + */ CCLASS_OTHER, /* , */ CCLASS_OTHER, 85 /* - */ CCLASS_OTHER, /* . */ CCLASS_OTHER, /* / */ CCLASS_OTHER, 86 /* 0 */ CCLASS_DIGIT, /* 1 */ CCLASS_DIGIT, /* 2 */ CCLASS_DIGIT, 87 /* 3 */ CCLASS_DIGIT, /* 4 */ CCLASS_DIGIT, /* 5 */ CCLASS_DIGIT, 88 /* 6 */ CCLASS_DIGIT, /* 7 */ CCLASS_DIGIT, /* 8 */ CCLASS_DIGIT, 89 /* 9 */ CCLASS_DIGIT, /* : */ CCLASS_OTHER, /* ; */ CCLASS_OTHER, 90 /* < */ CCLASS_OTHER, /* = */ CCLASS_OTHER, /* > */ CCLASS_OTHER, 91 /* ? */ CCLASS_OTHER, /* @ */ CCLASS_OTHER, /* A */ CCLASS_VOWEL, 92 /* B */ CCLASS_B, /* C */ CCLASS_C, /* D */ CCLASS_D, 93 /* E */ CCLASS_VOWEL, /* F */ CCLASS_B, /* G */ CCLASS_C, 94 /* H */ CCLASS_SILENT, /* I */ CCLASS_VOWEL, /* J */ CCLASS_C, 95 /* K */ CCLASS_C, /* L */ CCLASS_L, /* M */ CCLASS_M, 96 /* N */ CCLASS_M, /* O */ CCLASS_VOWEL, /* P */ CCLASS_B, 97 /* Q */ CCLASS_C, /* R */ CCLASS_R, /* S */ CCLASS_C, 98 /* T */ CCLASS_D, /* U */ CCLASS_VOWEL, /* V */ CCLASS_B, 99 /* W */ CCLASS_B, /* X */ CCLASS_C, /* Y */ CCLASS_VOWEL, 100 /* Z */ CCLASS_C, /* [ */ CCLASS_OTHER, /* \ */ CCLASS_OTHER, 101 /* ] */ CCLASS_OTHER, /* ^ */ CCLASS_OTHER, /* _ */ CCLASS_OTHER, 102 /* ` */ CCLASS_OTHER, /* a */ CCLASS_VOWEL, /* b */ CCLASS_B, 103 /* c */ CCLASS_C, /* d */ CCLASS_D, /* e */ CCLASS_VOWEL, 104 /* f */ CCLASS_B, /* g */ CCLASS_C, /* h */ CCLASS_SILENT, 105 /* i */ CCLASS_VOWEL, /* j */ CCLASS_C, /* k */ CCLASS_C, 106 /* l */ CCLASS_L, /* m */ CCLASS_M, /* n */ CCLASS_M, 107 /* o */ CCLASS_VOWEL, /* p */ CCLASS_B, /* q */ CCLASS_C, 108 /* r */ CCLASS_R, /* s */ CCLASS_C, /* t */ CCLASS_D, 109 /* u */ CCLASS_VOWEL, /* v */ CCLASS_B, /* w */ CCLASS_B, 110 /* x */ CCLASS_C, /* y */ CCLASS_VOWEL, /* z */ CCLASS_C, 111 /* { */ CCLASS_OTHER, /* | */ CCLASS_OTHER, /* } */ CCLASS_OTHER, 112 /* ~ */ CCLASS_OTHER, /* */ CCLASS_OTHER, 113 }; 114 /* 115 ** This tables gives the character class for ASCII characters that form the 116 ** initial character of a word. The only difference from midClass is with 117 ** the letters H, W, and Y. 118 */ 119 static const unsigned char initClass[] = { 120 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 121 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 122 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 123 /* */ CCLASS_SPACE, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 124 /* */ CCLASS_SPACE, /* */ CCLASS_SPACE, /* */ CCLASS_OTHER, 125 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 126 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 127 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 128 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 129 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, 130 /* */ CCLASS_OTHER, /* */ CCLASS_OTHER, /* */ CCLASS_SPACE, 131 /* ! */ CCLASS_OTHER, /* " */ CCLASS_OTHER, /* # */ CCLASS_OTHER, 132 /* $ */ CCLASS_OTHER, /* % */ CCLASS_OTHER, /* & */ CCLASS_OTHER, 133 /* ' */ CCLASS_OTHER, /* ( */ CCLASS_OTHER, /* ) */ CCLASS_OTHER, 134 /* * */ CCLASS_OTHER, /* + */ CCLASS_OTHER, /* , */ CCLASS_OTHER, 135 /* - */ CCLASS_OTHER, /* . */ CCLASS_OTHER, /* / */ CCLASS_OTHER, 136 /* 0 */ CCLASS_DIGIT, /* 1 */ CCLASS_DIGIT, /* 2 */ CCLASS_DIGIT, 137 /* 3 */ CCLASS_DIGIT, /* 4 */ CCLASS_DIGIT, /* 5 */ CCLASS_DIGIT, 138 /* 6 */ CCLASS_DIGIT, /* 7 */ CCLASS_DIGIT, /* 8 */ CCLASS_DIGIT, 139 /* 9 */ CCLASS_DIGIT, /* : */ CCLASS_OTHER, /* ; */ CCLASS_OTHER, 140 /* < */ CCLASS_OTHER, /* = */ CCLASS_OTHER, /* > */ CCLASS_OTHER, 141 /* ? */ CCLASS_OTHER, /* @ */ CCLASS_OTHER, /* A */ CCLASS_VOWEL, 142 /* B */ CCLASS_B, /* C */ CCLASS_C, /* D */ CCLASS_D, 143 /* E */ CCLASS_VOWEL, /* F */ CCLASS_B, /* G */ CCLASS_C, 144 /* H */ CCLASS_SILENT, /* I */ CCLASS_VOWEL, /* J */ CCLASS_C, 145 /* K */ CCLASS_C, /* L */ CCLASS_L, /* M */ CCLASS_M, 146 /* N */ CCLASS_M, /* O */ CCLASS_VOWEL, /* P */ CCLASS_B, 147 /* Q */ CCLASS_C, /* R */ CCLASS_R, /* S */ CCLASS_C, 148 /* T */ CCLASS_D, /* U */ CCLASS_VOWEL, /* V */ CCLASS_B, 149 /* W */ CCLASS_B, /* X */ CCLASS_C, /* Y */ CCLASS_Y, 150 /* Z */ CCLASS_C, /* [ */ CCLASS_OTHER, /* \ */ CCLASS_OTHER, 151 /* ] */ CCLASS_OTHER, /* ^ */ CCLASS_OTHER, /* _ */ CCLASS_OTHER, 152 /* ` */ CCLASS_OTHER, /* a */ CCLASS_VOWEL, /* b */ CCLASS_B, 153 /* c */ CCLASS_C, /* d */ CCLASS_D, /* e */ CCLASS_VOWEL, 154 /* f */ CCLASS_B, /* g */ CCLASS_C, /* h */ CCLASS_SILENT, 155 /* i */ CCLASS_VOWEL, /* j */ CCLASS_C, /* k */ CCLASS_C, 156 /* l */ CCLASS_L, /* m */ CCLASS_M, /* n */ CCLASS_M, 157 /* o */ CCLASS_VOWEL, /* p */ CCLASS_B, /* q */ CCLASS_C, 158 /* r */ CCLASS_R, /* s */ CCLASS_C, /* t */ CCLASS_D, 159 /* u */ CCLASS_VOWEL, /* v */ CCLASS_B, /* w */ CCLASS_B, 160 /* x */ CCLASS_C, /* y */ CCLASS_Y, /* z */ CCLASS_C, 161 /* { */ CCLASS_OTHER, /* | */ CCLASS_OTHER, /* } */ CCLASS_OTHER, 162 /* ~ */ CCLASS_OTHER, /* */ CCLASS_OTHER, 163 }; 164 165 /* 166 ** Mapping from the character class number (0-13) to a symbol for each 167 ** character class. Note that initClass[] can be used to map the class 168 ** symbol back into the class number. 169 */ 170 static const unsigned char className[] = ".ABCDHLRMY9 ?"; 171 172 /* 173 ** Generate a "phonetic hash" from a string of ASCII characters 174 ** in zIn[0..nIn-1]. 175 ** 176 ** * Map characters by character class as defined above. 177 ** * Omit double-letters 178 ** * Omit vowels beside R and L 179 ** * Omit T when followed by CH 180 ** * Omit W when followed by R 181 ** * Omit D when followed by J or G 182 ** * Omit K in KN or G in GN at the beginning of a word 183 ** 184 ** Space to hold the result is obtained from sqlite3_malloc() 185 ** 186 ** Return NULL if memory allocation fails. 187 */ 188 static unsigned char *phoneticHash(const unsigned char *zIn, int nIn){ 189 unsigned char *zOut = sqlite3_malloc64( nIn + 1 ); 190 int i; 191 int nOut = 0; 192 char cPrev = 0x77; 193 char cPrevX = 0x77; 194 const unsigned char *aClass = initClass; 195 196 if( zOut==0 ) return 0; 197 if( nIn>2 ){ 198 switch( zIn[0] ){ 199 case 'g': 200 case 'k': { 201 if( zIn[1]=='n' ){ zIn++; nIn--; } 202 break; 203 } 204 } 205 } 206 for(i=0; i<nIn; i++){ 207 unsigned char c = zIn[i]; 208 if( i+1<nIn ){ 209 if( c=='w' && zIn[i+1]=='r' ) continue; 210 if( c=='d' && (zIn[i+1]=='j' || zIn[i+1]=='g') ) continue; 211 if( i+2<nIn ){ 212 if( c=='t' && zIn[i+1]=='c' && zIn[i+2]=='h' ) continue; 213 } 214 } 215 c = aClass[c&0x7f]; 216 if( c==CCLASS_SPACE ) continue; 217 if( c==CCLASS_OTHER && cPrev!=CCLASS_DIGIT ) continue; 218 aClass = midClass; 219 if( c==CCLASS_VOWEL && (cPrevX==CCLASS_R || cPrevX==CCLASS_L) ){ 220 continue; /* No vowels beside L or R */ 221 } 222 if( (c==CCLASS_R || c==CCLASS_L) && cPrevX==CCLASS_VOWEL ){ 223 nOut--; /* No vowels beside L or R */ 224 } 225 cPrev = c; 226 if( c==CCLASS_SILENT ) continue; 227 cPrevX = c; 228 c = className[c]; 229 assert( nOut>=0 ); 230 if( nOut==0 || c!=zOut[nOut-1] ) zOut[nOut++] = c; 231 } 232 zOut[nOut] = 0; 233 return zOut; 234 } 235 236 /* 237 ** This is an SQL function wrapper around phoneticHash(). See 238 ** the description of phoneticHash() for additional information. 239 */ 240 static void phoneticHashSqlFunc( 241 sqlite3_context *context, 242 int argc, 243 sqlite3_value **argv 244 ){ 245 const unsigned char *zIn; 246 unsigned char *zOut; 247 248 zIn = sqlite3_value_text(argv[0]); 249 if( zIn==0 ) return; 250 zOut = phoneticHash(zIn, sqlite3_value_bytes(argv[0])); 251 if( zOut==0 ){ 252 sqlite3_result_error_nomem(context); 253 }else{ 254 sqlite3_result_text(context, (char*)zOut, -1, sqlite3_free); 255 } 256 } 257 258 /* 259 ** Return the character class number for a character given its 260 ** context. 261 */ 262 static char characterClass(char cPrev, char c){ 263 return cPrev==0 ? initClass[c&0x7f] : midClass[c&0x7f]; 264 } 265 266 /* 267 ** Return the cost of inserting or deleting character c immediately 268 ** following character cPrev. If cPrev==0, that means c is the first 269 ** character of the word. 270 */ 271 static int insertOrDeleteCost(char cPrev, char c, char cNext){ 272 char classC = characterClass(cPrev, c); 273 char classCprev; 274 275 if( classC==CCLASS_SILENT ){ 276 /* Insert or delete "silent" characters such as H or W */ 277 return 1; 278 } 279 if( cPrev==c ){ 280 /* Repeated characters, or miss a repeat */ 281 return 10; 282 } 283 if( classC==CCLASS_VOWEL && (cPrev=='r' || cNext=='r') ){ 284 return 20; /* Insert a vowel before or after 'r' */ 285 } 286 classCprev = characterClass(cPrev, cPrev); 287 if( classC==classCprev ){ 288 if( classC==CCLASS_VOWEL ){ 289 /* Remove or add a new vowel to a vowel cluster */ 290 return 15; 291 }else{ 292 /* Remove or add a consonant not in the same class */ 293 return 50; 294 } 295 } 296 297 /* any other character insertion or deletion */ 298 return 100; 299 } 300 301 /* 302 ** Divide the insertion cost by this factor when appending to the 303 ** end of the word. 304 */ 305 #define FINAL_INS_COST_DIV 4 306 307 /* 308 ** Return the cost of substituting cTo in place of cFrom assuming 309 ** the previous character is cPrev. If cPrev==0 then cTo is the first 310 ** character of the word. 311 */ 312 static int substituteCost(char cPrev, char cFrom, char cTo){ 313 char classFrom, classTo; 314 if( cFrom==cTo ){ 315 /* Exact match */ 316 return 0; 317 } 318 if( cFrom==(cTo^0x20) && ((cTo>='A' && cTo<='Z') || (cTo>='a' && cTo<='z')) ){ 319 /* differ only in case */ 320 return 0; 321 } 322 classFrom = characterClass(cPrev, cFrom); 323 classTo = characterClass(cPrev, cTo); 324 if( classFrom==classTo ){ 325 /* Same character class */ 326 return 40; 327 } 328 if( classFrom>=CCLASS_B && classFrom<=CCLASS_Y 329 && classTo>=CCLASS_B && classTo<=CCLASS_Y ){ 330 /* Convert from one consonant to another, but in a different class */ 331 return 75; 332 } 333 /* Any other subsitution */ 334 return 100; 335 } 336 337 /* 338 ** Given two strings zA and zB which are pure ASCII, return the cost 339 ** of transforming zA into zB. If zA ends with '*' assume that it is 340 ** a prefix of zB and give only minimal penalty for extra characters 341 ** on the end of zB. 342 ** 343 ** Smaller numbers mean a closer match. 344 ** 345 ** Negative values indicate an error: 346 ** -1 One of the inputs is NULL 347 ** -2 Non-ASCII characters on input 348 ** -3 Unable to allocate memory 349 ** 350 ** If pnMatch is not NULL, then *pnMatch is set to the number of bytes 351 ** of zB that matched the pattern in zA. If zA does not end with a '*', 352 ** then this value is always the number of bytes in zB (i.e. strlen(zB)). 353 ** If zA does end in a '*', then it is the number of bytes in the prefix 354 ** of zB that was deemed to match zA. 355 */ 356 static int editdist1(const char *zA, const char *zB, int *pnMatch){ 357 int nA, nB; /* Number of characters in zA[] and zB[] */ 358 int xA, xB; /* Loop counters for zA[] and zB[] */ 359 char cA = 0, cB; /* Current character of zA and zB */ 360 char cAprev, cBprev; /* Previous character of zA and zB */ 361 char cAnext, cBnext; /* Next character in zA and zB */ 362 int d; /* North-west cost value */ 363 int dc = 0; /* North-west character value */ 364 int res; /* Final result */ 365 int *m; /* The cost matrix */ 366 char *cx; /* Corresponding character values */ 367 int *toFree = 0; /* Malloced space */ 368 int nMatch = 0; 369 int mStack[60+15]; /* Stack space to use if not too much is needed */ 370 371 /* Early out if either input is NULL */ 372 if( zA==0 || zB==0 ) return -1; 373 374 /* Skip any common prefix */ 375 while( zA[0] && zA[0]==zB[0] ){ dc = zA[0]; zA++; zB++; nMatch++; } 376 if( pnMatch ) *pnMatch = nMatch; 377 if( zA[0]==0 && zB[0]==0 ) return 0; 378 379 #if 0 380 printf("A=\"%s\" B=\"%s\" dc=%c\n", zA, zB, dc?dc:' '); 381 #endif 382 383 /* Verify input strings and measure their lengths */ 384 for(nA=0; zA[nA]; nA++){ 385 if( zA[nA]&0x80 ) return -2; 386 } 387 for(nB=0; zB[nB]; nB++){ 388 if( zB[nB]&0x80 ) return -2; 389 } 390 391 /* Special processing if either string is empty */ 392 if( nA==0 ){ 393 cBprev = (char)dc; 394 for(xB=res=0; (cB = zB[xB])!=0; xB++){ 395 res += insertOrDeleteCost(cBprev, cB, zB[xB+1])/FINAL_INS_COST_DIV; 396 cBprev = cB; 397 } 398 return res; 399 } 400 if( nB==0 ){ 401 cAprev = (char)dc; 402 for(xA=res=0; (cA = zA[xA])!=0; xA++){ 403 res += insertOrDeleteCost(cAprev, cA, zA[xA+1]); 404 cAprev = cA; 405 } 406 return res; 407 } 408 409 /* A is a prefix of B */ 410 if( zA[0]=='*' && zA[1]==0 ) return 0; 411 412 /* Allocate and initialize the Wagner matrix */ 413 if( nB<(sizeof(mStack)*4)/(sizeof(mStack[0])*5) ){ 414 m = mStack; 415 }else{ 416 m = toFree = sqlite3_malloc64( (nB+1)*5*sizeof(m[0])/4 ); 417 if( m==0 ) return -3; 418 } 419 cx = (char*)&m[nB+1]; 420 421 /* Compute the Wagner edit distance */ 422 m[0] = 0; 423 cx[0] = (char)dc; 424 cBprev = (char)dc; 425 for(xB=1; xB<=nB; xB++){ 426 cBnext = zB[xB]; 427 cB = zB[xB-1]; 428 cx[xB] = cB; 429 m[xB] = m[xB-1] + insertOrDeleteCost(cBprev, cB, cBnext); 430 cBprev = cB; 431 } 432 cAprev = (char)dc; 433 for(xA=1; xA<=nA; xA++){ 434 int lastA = (xA==nA); 435 cA = zA[xA-1]; 436 cAnext = zA[xA]; 437 if( cA=='*' && lastA ) break; 438 d = m[0]; 439 dc = cx[0]; 440 m[0] = d + insertOrDeleteCost(cAprev, cA, cAnext); 441 cBprev = 0; 442 for(xB=1; xB<=nB; xB++){ 443 int totalCost, insCost, delCost, subCost, ncx; 444 cB = zB[xB-1]; 445 cBnext = zB[xB]; 446 447 /* Cost to insert cB */ 448 insCost = insertOrDeleteCost(cx[xB-1], cB, cBnext); 449 if( lastA ) insCost /= FINAL_INS_COST_DIV; 450 451 /* Cost to delete cA */ 452 delCost = insertOrDeleteCost(cx[xB], cA, cBnext); 453 454 /* Cost to substitute cA->cB */ 455 subCost = substituteCost(cx[xB-1], cA, cB); 456 457 /* Best cost */ 458 totalCost = insCost + m[xB-1]; 459 ncx = cB; 460 if( (delCost + m[xB])<totalCost ){ 461 totalCost = delCost + m[xB]; 462 ncx = cA; 463 } 464 if( (subCost + d)<totalCost ){ 465 totalCost = subCost + d; 466 } 467 468 #if 0 469 printf("%d,%d d=%4d u=%4d r=%4d dc=%c cA=%c cB=%c" 470 " ins=%4d del=%4d sub=%4d t=%4d ncx=%c\n", 471 xA, xB, d, m[xB], m[xB-1], dc?dc:' ', cA, cB, 472 insCost, delCost, subCost, totalCost, ncx?ncx:' '); 473 #endif 474 475 /* Update the matrix */ 476 d = m[xB]; 477 dc = cx[xB]; 478 m[xB] = totalCost; 479 cx[xB] = (char)ncx; 480 cBprev = cB; 481 } 482 cAprev = cA; 483 } 484 485 /* Free the wagner matrix and return the result */ 486 if( cA=='*' ){ 487 res = m[1]; 488 for(xB=1; xB<=nB; xB++){ 489 if( m[xB]<res ){ 490 res = m[xB]; 491 if( pnMatch ) *pnMatch = xB+nMatch; 492 } 493 } 494 }else{ 495 res = m[nB]; 496 /* In the current implementation, pnMatch is always NULL if zA does 497 ** not end in "*" */ 498 assert( pnMatch==0 ); 499 } 500 sqlite3_free(toFree); 501 return res; 502 } 503 504 /* 505 ** Function: editdist(A,B) 506 ** 507 ** Return the cost of transforming string A into string B. Both strings 508 ** must be pure ASCII text. If A ends with '*' then it is assumed to be 509 ** a prefix of B and extra characters on the end of B have minimal additional 510 ** cost. 511 */ 512 static void editdistSqlFunc( 513 sqlite3_context *context, 514 int argc, 515 sqlite3_value **argv 516 ){ 517 int res = editdist1( 518 (const char*)sqlite3_value_text(argv[0]), 519 (const char*)sqlite3_value_text(argv[1]), 520 0); 521 if( res<0 ){ 522 if( res==(-3) ){ 523 sqlite3_result_error_nomem(context); 524 }else if( res==(-2) ){ 525 sqlite3_result_error(context, "non-ASCII input to editdist()", -1); 526 }else{ 527 sqlite3_result_error(context, "NULL input to editdist()", -1); 528 } 529 }else{ 530 sqlite3_result_int(context, res); 531 } 532 } 533 534 /* End of the fixed-cost edit distance implementation 535 ****************************************************************************** 536 ***************************************************************************** 537 ** Begin: Configurable cost unicode edit distance routines 538 */ 539 /* Forward declaration of structures */ 540 typedef struct EditDist3Cost EditDist3Cost; 541 typedef struct EditDist3Config EditDist3Config; 542 typedef struct EditDist3Point EditDist3Point; 543 typedef struct EditDist3From EditDist3From; 544 typedef struct EditDist3FromString EditDist3FromString; 545 typedef struct EditDist3To EditDist3To; 546 typedef struct EditDist3ToString EditDist3ToString; 547 typedef struct EditDist3Lang EditDist3Lang; 548 549 550 /* 551 ** An entry in the edit cost table 552 */ 553 struct EditDist3Cost { 554 EditDist3Cost *pNext; /* Next cost element */ 555 u8 nFrom; /* Number of bytes in aFrom */ 556 u8 nTo; /* Number of bytes in aTo */ 557 u16 iCost; /* Cost of this transformation */ 558 char a[4] ; /* FROM string followed by TO string */ 559 /* Additional TO and FROM string bytes appended as necessary */ 560 }; 561 562 /* 563 ** Edit costs for a particular language ID 564 */ 565 struct EditDist3Lang { 566 int iLang; /* Language ID */ 567 int iInsCost; /* Default insertion cost */ 568 int iDelCost; /* Default deletion cost */ 569 int iSubCost; /* Default substitution cost */ 570 EditDist3Cost *pCost; /* Costs */ 571 }; 572 573 574 /* 575 ** The default EditDist3Lang object, with default costs. 576 */ 577 static const EditDist3Lang editDist3Lang = { 0, 100, 100, 150, 0 }; 578 579 /* 580 ** Complete configuration 581 */ 582 struct EditDist3Config { 583 int nLang; /* Number of language IDs. Size of a[] */ 584 EditDist3Lang *a; /* One for each distinct language ID */ 585 }; 586 587 /* 588 ** Extra information about each character in the FROM string. 589 */ 590 struct EditDist3From { 591 int nSubst; /* Number of substitution cost entries */ 592 int nDel; /* Number of deletion cost entries */ 593 int nByte; /* Number of bytes in this character */ 594 EditDist3Cost **apSubst; /* Array of substitution costs for this element */ 595 EditDist3Cost **apDel; /* Array of deletion cost entries */ 596 }; 597 598 /* 599 ** A precompiled FROM string. 600 * 601 ** In the common case we expect the FROM string to be reused multiple times. 602 ** In other words, the common case will be to measure the edit distance 603 ** from a single origin string to multiple target strings. 604 */ 605 struct EditDist3FromString { 606 char *z; /* The complete text of the FROM string */ 607 int n; /* Number of characters in the FROM string */ 608 int isPrefix; /* True if ends with '*' character */ 609 EditDist3From *a; /* Extra info about each char of the FROM string */ 610 }; 611 612 /* 613 ** Extra information about each character in the TO string. 614 */ 615 struct EditDist3To { 616 int nIns; /* Number of insertion cost entries */ 617 int nByte; /* Number of bytes in this character */ 618 EditDist3Cost **apIns; /* Array of deletion cost entries */ 619 }; 620 621 /* 622 ** A precompiled FROM string 623 */ 624 struct EditDist3ToString { 625 char *z; /* The complete text of the TO string */ 626 int n; /* Number of characters in the TO string */ 627 EditDist3To *a; /* Extra info about each char of the TO string */ 628 }; 629 630 /* 631 ** Clear or delete an instance of the object that records all edit-distance 632 ** weights. 633 */ 634 static void editDist3ConfigClear(EditDist3Config *p){ 635 int i; 636 if( p==0 ) return; 637 for(i=0; i<p->nLang; i++){ 638 EditDist3Cost *pCost, *pNext; 639 pCost = p->a[i].pCost; 640 while( pCost ){ 641 pNext = pCost->pNext; 642 sqlite3_free(pCost); 643 pCost = pNext; 644 } 645 } 646 sqlite3_free(p->a); 647 memset(p, 0, sizeof(*p)); 648 } 649 static void editDist3ConfigDelete(void *pIn){ 650 EditDist3Config *p = (EditDist3Config*)pIn; 651 editDist3ConfigClear(p); 652 sqlite3_free(p); 653 } 654 655 /* 656 ** Load all edit-distance weights from a table. 657 */ 658 static int editDist3ConfigLoad( 659 EditDist3Config *p, /* The edit distance configuration to load */ 660 sqlite3 *db, /* Load from this database */ 661 const char *zTable /* Name of the table from which to load */ 662 ){ 663 sqlite3_stmt *pStmt; 664 int rc, rc2; 665 char *zSql; 666 int iLangPrev = -9999; 667 EditDist3Lang *pLang = 0; 668 669 zSql = sqlite3_mprintf("SELECT iLang, cFrom, cTo, iCost" 670 " FROM \"%w\" WHERE iLang>=0 ORDER BY iLang", zTable); 671 if( zSql==0 ) return SQLITE_NOMEM; 672 rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0); 673 sqlite3_free(zSql); 674 if( rc ) return rc; 675 editDist3ConfigClear(p); 676 while( sqlite3_step(pStmt)==SQLITE_ROW ){ 677 int iLang = sqlite3_column_int(pStmt, 0); 678 const char *zFrom = (const char*)sqlite3_column_text(pStmt, 1); 679 int nFrom = zFrom ? sqlite3_column_bytes(pStmt, 1) : 0; 680 const char *zTo = (const char*)sqlite3_column_text(pStmt, 2); 681 int nTo = zTo ? sqlite3_column_bytes(pStmt, 2) : 0; 682 int iCost = sqlite3_column_int(pStmt, 3); 683 684 assert( zFrom!=0 || nFrom==0 ); 685 assert( zTo!=0 || nTo==0 ); 686 if( nFrom>100 || nTo>100 ) continue; 687 if( iCost<0 ) continue; 688 if( pLang==0 || iLang!=iLangPrev ){ 689 EditDist3Lang *pNew; 690 pNew = sqlite3_realloc64(p->a, (p->nLang+1)*sizeof(p->a[0])); 691 if( pNew==0 ){ rc = SQLITE_NOMEM; break; } 692 p->a = pNew; 693 pLang = &p->a[p->nLang]; 694 p->nLang++; 695 pLang->iLang = iLang; 696 pLang->iInsCost = 100; 697 pLang->iDelCost = 100; 698 pLang->iSubCost = 150; 699 pLang->pCost = 0; 700 iLangPrev = iLang; 701 } 702 if( nFrom==1 && zFrom[0]=='?' && nTo==0 ){ 703 pLang->iDelCost = iCost; 704 }else if( nFrom==0 && nTo==1 && zTo[0]=='?' ){ 705 pLang->iInsCost = iCost; 706 }else if( nFrom==1 && nTo==1 && zFrom[0]=='?' && zTo[0]=='?' ){ 707 pLang->iSubCost = iCost; 708 }else{ 709 EditDist3Cost *pCost; 710 int nExtra = nFrom + nTo - 4; 711 if( nExtra<0 ) nExtra = 0; 712 pCost = sqlite3_malloc64( sizeof(*pCost) + nExtra ); 713 if( pCost==0 ){ rc = SQLITE_NOMEM; break; } 714 pCost->nFrom = (u8)nFrom; 715 pCost->nTo = (u8)nTo; 716 pCost->iCost = (u16)iCost; 717 memcpy(pCost->a, zFrom, nFrom); 718 memcpy(pCost->a + nFrom, zTo, nTo); 719 pCost->pNext = pLang->pCost; 720 pLang->pCost = pCost; 721 } 722 } 723 rc2 = sqlite3_finalize(pStmt); 724 if( rc==SQLITE_OK ) rc = rc2; 725 return rc; 726 } 727 728 /* 729 ** Return the length (in bytes) of a utf-8 character. Or return a maximum 730 ** of N. 731 */ 732 static int utf8Len(unsigned char c, int N){ 733 int len = 1; 734 if( c>0x7f ){ 735 if( (c&0xe0)==0xc0 ){ 736 len = 2; 737 }else if( (c&0xf0)==0xe0 ){ 738 len = 3; 739 }else{ 740 len = 4; 741 } 742 } 743 if( len>N ) len = N; 744 return len; 745 } 746 747 /* 748 ** Return TRUE (non-zero) if the To side of the given cost matches 749 ** the given string. 750 */ 751 static int matchTo(EditDist3Cost *p, const char *z, int n){ 752 if( p->nTo>n ) return 0; 753 if( strncmp(p->a+p->nFrom, z, p->nTo)!=0 ) return 0; 754 return 1; 755 } 756 757 /* 758 ** Return TRUE (non-zero) if the From side of the given cost matches 759 ** the given string. 760 */ 761 static int matchFrom(EditDist3Cost *p, const char *z, int n){ 762 assert( p->nFrom<=n ); 763 if( strncmp(p->a, z, p->nFrom)!=0 ) return 0; 764 return 1; 765 } 766 767 /* 768 ** Return TRUE (non-zero) of the next FROM character and the next TO 769 ** character are the same. 770 */ 771 static int matchFromTo( 772 EditDist3FromString *pStr, /* Left hand string */ 773 int n1, /* Index of comparison character on the left */ 774 const char *z2, /* Right-handl comparison character */ 775 int n2 /* Bytes remaining in z2[] */ 776 ){ 777 int b1 = pStr->a[n1].nByte; 778 if( b1>n2 ) return 0; 779 if( memcmp(pStr->z+n1, z2, b1)!=0 ) return 0; 780 return 1; 781 } 782 783 /* 784 ** Delete an EditDist3FromString objecct 785 */ 786 static void editDist3FromStringDelete(EditDist3FromString *p){ 787 int i; 788 if( p ){ 789 for(i=0; i<p->n; i++){ 790 sqlite3_free(p->a[i].apDel); 791 sqlite3_free(p->a[i].apSubst); 792 } 793 sqlite3_free(p); 794 } 795 } 796 797 /* 798 ** Create a EditDist3FromString object. 799 */ 800 static EditDist3FromString *editDist3FromStringNew( 801 const EditDist3Lang *pLang, 802 const char *z, 803 int n 804 ){ 805 EditDist3FromString *pStr; 806 EditDist3Cost *p; 807 int i; 808 809 if( z==0 ) return 0; 810 if( n<0 ) n = (int)strlen(z); 811 pStr = sqlite3_malloc64( sizeof(*pStr) + sizeof(pStr->a[0])*n + n + 1 ); 812 if( pStr==0 ) return 0; 813 pStr->a = (EditDist3From*)&pStr[1]; 814 memset(pStr->a, 0, sizeof(pStr->a[0])*n); 815 pStr->n = n; 816 pStr->z = (char*)&pStr->a[n]; 817 memcpy(pStr->z, z, n+1); 818 if( n && z[n-1]=='*' ){ 819 pStr->isPrefix = 1; 820 n--; 821 pStr->n--; 822 pStr->z[n] = 0; 823 }else{ 824 pStr->isPrefix = 0; 825 } 826 827 for(i=0; i<n; i++){ 828 EditDist3From *pFrom = &pStr->a[i]; 829 memset(pFrom, 0, sizeof(*pFrom)); 830 pFrom->nByte = utf8Len((unsigned char)z[i], n-i); 831 for(p=pLang->pCost; p; p=p->pNext){ 832 EditDist3Cost **apNew; 833 if( i+p->nFrom>n ) continue; 834 if( matchFrom(p, z+i, n-i)==0 ) continue; 835 if( p->nTo==0 ){ 836 apNew = sqlite3_realloc64(pFrom->apDel, 837 sizeof(*apNew)*(pFrom->nDel+1)); 838 if( apNew==0 ) break; 839 pFrom->apDel = apNew; 840 apNew[pFrom->nDel++] = p; 841 }else{ 842 apNew = sqlite3_realloc64(pFrom->apSubst, 843 sizeof(*apNew)*(pFrom->nSubst+1)); 844 if( apNew==0 ) break; 845 pFrom->apSubst = apNew; 846 apNew[pFrom->nSubst++] = p; 847 } 848 } 849 if( p ){ 850 editDist3FromStringDelete(pStr); 851 pStr = 0; 852 break; 853 } 854 } 855 return pStr; 856 } 857 858 /* 859 ** Update entry m[i] such that it is the minimum of its current value 860 ** and m[j]+iCost. 861 ** 862 ** If the iCost is 1,000,000 or greater, then consider the cost to be 863 ** infinite and skip the update. 864 */ 865 static void updateCost( 866 unsigned int *m, 867 int i, 868 int j, 869 int iCost 870 ){ 871 assert( iCost>=0 ); 872 if( iCost<10000 ){ 873 unsigned int b = m[j] + iCost; 874 if( b<m[i] ) m[i] = b; 875 } 876 } 877 878 /* 879 ** How much stack space (int bytes) to use for Wagner matrix in 880 ** editDist3Core(). If more space than this is required, the entire 881 ** matrix is taken from the heap. To reduce the load on the memory 882 ** allocator, make this value as large as practical for the 883 ** architecture in use. 884 */ 885 #ifndef SQLITE_SPELLFIX_STACKALLOC_SZ 886 # define SQLITE_SPELLFIX_STACKALLOC_SZ (1024) 887 #endif 888 889 /* Compute the edit distance between two strings. 890 ** 891 ** If an error occurs, return a negative number which is the error code. 892 ** 893 ** If pnMatch is not NULL, then *pnMatch is set to the number of characters 894 ** (not bytes) in z2 that matched the search pattern in *pFrom. If pFrom does 895 ** not contain the pattern for a prefix-search, then this is always the number 896 ** of characters in z2. If pFrom does contain a prefix search pattern, then 897 ** it is the number of characters in the prefix of z2 that was deemed to 898 ** match pFrom. 899 */ 900 static int editDist3Core( 901 EditDist3FromString *pFrom, /* The FROM string */ 902 const char *z2, /* The TO string */ 903 int n2, /* Length of the TO string */ 904 const EditDist3Lang *pLang, /* Edit weights for a particular language ID */ 905 int *pnMatch /* OUT: Characters in matched prefix */ 906 ){ 907 int k, n; 908 int i1, b1; 909 int i2, b2; 910 EditDist3FromString f = *pFrom; 911 EditDist3To *a2; 912 unsigned int *m; 913 unsigned int *pToFree; 914 int szRow; 915 EditDist3Cost *p; 916 int res; 917 sqlite3_uint64 nByte; 918 unsigned int stackSpace[SQLITE_SPELLFIX_STACKALLOC_SZ/sizeof(unsigned int)]; 919 920 /* allocate the Wagner matrix and the aTo[] array for the TO string */ 921 n = (f.n+1)*(n2+1); 922 n = (n+1)&~1; 923 nByte = n*sizeof(m[0]) + sizeof(a2[0])*n2; 924 if( nByte<=sizeof(stackSpace) ){ 925 m = stackSpace; 926 pToFree = 0; 927 }else{ 928 m = pToFree = sqlite3_malloc64( nByte ); 929 if( m==0 ) return -1; /* Out of memory */ 930 } 931 a2 = (EditDist3To*)&m[n]; 932 memset(a2, 0, sizeof(a2[0])*n2); 933 934 /* Fill in the a1[] matrix for all characters of the TO string */ 935 for(i2=0; i2<n2; i2++){ 936 a2[i2].nByte = utf8Len((unsigned char)z2[i2], n2-i2); 937 for(p=pLang->pCost; p; p=p->pNext){ 938 EditDist3Cost **apNew; 939 if( p->nFrom>0 ) continue; 940 if( i2+p->nTo>n2 ) continue; 941 if( matchTo(p, z2+i2, n2-i2)==0 ) continue; 942 a2[i2].nIns++; 943 apNew = sqlite3_realloc64(a2[i2].apIns, sizeof(*apNew)*a2[i2].nIns); 944 if( apNew==0 ){ 945 res = -1; /* Out of memory */ 946 goto editDist3Abort; 947 } 948 a2[i2].apIns = apNew; 949 a2[i2].apIns[a2[i2].nIns-1] = p; 950 } 951 } 952 953 /* Prepare to compute the minimum edit distance */ 954 szRow = f.n+1; 955 memset(m, 0x01, (n2+1)*szRow*sizeof(m[0])); 956 m[0] = 0; 957 958 /* First fill in the top-row of the matrix with FROM deletion costs */ 959 for(i1=0; i1<f.n; i1 += b1){ 960 b1 = f.a[i1].nByte; 961 updateCost(m, i1+b1, i1, pLang->iDelCost); 962 for(k=0; k<f.a[i1].nDel; k++){ 963 p = f.a[i1].apDel[k]; 964 updateCost(m, i1+p->nFrom, i1, p->iCost); 965 } 966 } 967 968 /* Fill in all subsequent rows, top-to-bottom, left-to-right */ 969 for(i2=0; i2<n2; i2 += b2){ 970 int rx; /* Starting index for current row */ 971 int rxp; /* Starting index for previous row */ 972 b2 = a2[i2].nByte; 973 rx = szRow*(i2+b2); 974 rxp = szRow*i2; 975 updateCost(m, rx, rxp, pLang->iInsCost); 976 for(k=0; k<a2[i2].nIns; k++){ 977 p = a2[i2].apIns[k]; 978 updateCost(m, szRow*(i2+p->nTo), rxp, p->iCost); 979 } 980 for(i1=0; i1<f.n; i1+=b1){ 981 int cx; /* Index of current cell */ 982 int cxp; /* Index of cell immediately to the left */ 983 int cxd; /* Index of cell to the left and one row above */ 984 int cxu; /* Index of cell immediately above */ 985 b1 = f.a[i1].nByte; 986 cxp = rx + i1; 987 cx = cxp + b1; 988 cxd = rxp + i1; 989 cxu = cxd + b1; 990 updateCost(m, cx, cxp, pLang->iDelCost); 991 for(k=0; k<f.a[i1].nDel; k++){ 992 p = f.a[i1].apDel[k]; 993 updateCost(m, cxp+p->nFrom, cxp, p->iCost); 994 } 995 updateCost(m, cx, cxu, pLang->iInsCost); 996 if( matchFromTo(&f, i1, z2+i2, n2-i2) ){ 997 updateCost(m, cx, cxd, 0); 998 } 999 updateCost(m, cx, cxd, pLang->iSubCost); 1000 for(k=0; k<f.a[i1].nSubst; k++){ 1001 p = f.a[i1].apSubst[k]; 1002 if( matchTo(p, z2+i2, n2-i2) ){ 1003 updateCost(m, cxd+p->nFrom+szRow*p->nTo, cxd, p->iCost); 1004 } 1005 } 1006 } 1007 } 1008 1009 #if 0 /* Enable for debugging */ 1010 printf(" ^"); 1011 for(i1=0; i1<f.n; i1++) printf(" %c-%2x", f.z[i1], f.z[i1]&0xff); 1012 printf("\n ^:"); 1013 for(i1=0; i1<szRow; i1++){ 1014 int v = m[i1]; 1015 if( v>9999 ) printf(" ****"); 1016 else printf(" %4d", v); 1017 } 1018 printf("\n"); 1019 for(i2=0; i2<n2; i2++){ 1020 printf("%c-%02x:", z2[i2], z2[i2]&0xff); 1021 for(i1=0; i1<szRow; i1++){ 1022 int v = m[(i2+1)*szRow+i1]; 1023 if( v>9999 ) printf(" ****"); 1024 else printf(" %4d", v); 1025 } 1026 printf("\n"); 1027 } 1028 #endif 1029 1030 /* Free memory allocations and return the result */ 1031 res = (int)m[szRow*(n2+1)-1]; 1032 n = n2; 1033 if( f.isPrefix ){ 1034 for(i2=1; i2<=n2; i2++){ 1035 int b = m[szRow*i2-1]; 1036 if( b<=res ){ 1037 res = b; 1038 n = i2 - 1; 1039 } 1040 } 1041 } 1042 if( pnMatch ){ 1043 int nExtra = 0; 1044 for(k=0; k<n; k++){ 1045 if( (z2[k] & 0xc0)==0x80 ) nExtra++; 1046 } 1047 *pnMatch = n - nExtra; 1048 } 1049 1050 editDist3Abort: 1051 for(i2=0; i2<n2; i2++) sqlite3_free(a2[i2].apIns); 1052 sqlite3_free(pToFree); 1053 return res; 1054 } 1055 1056 /* 1057 ** Get an appropriate EditDist3Lang object. 1058 */ 1059 static const EditDist3Lang *editDist3FindLang( 1060 EditDist3Config *pConfig, 1061 int iLang 1062 ){ 1063 int i; 1064 for(i=0; i<pConfig->nLang; i++){ 1065 if( pConfig->a[i].iLang==iLang ) return &pConfig->a[i]; 1066 } 1067 return &editDist3Lang; 1068 } 1069 1070 /* 1071 ** Function: editdist3(A,B,iLang) 1072 ** editdist3(tablename) 1073 ** 1074 ** Return the cost of transforming string A into string B using edit 1075 ** weights for iLang. 1076 ** 1077 ** The second form loads edit weights into memory from a table. 1078 */ 1079 static void editDist3SqlFunc( 1080 sqlite3_context *context, 1081 int argc, 1082 sqlite3_value **argv 1083 ){ 1084 EditDist3Config *pConfig = (EditDist3Config*)sqlite3_user_data(context); 1085 sqlite3 *db = sqlite3_context_db_handle(context); 1086 int rc; 1087 if( argc==1 ){ 1088 const char *zTable = (const char*)sqlite3_value_text(argv[0]); 1089 rc = editDist3ConfigLoad(pConfig, db, zTable); 1090 if( rc ) sqlite3_result_error_code(context, rc); 1091 }else{ 1092 const char *zA = (const char*)sqlite3_value_text(argv[0]); 1093 const char *zB = (const char*)sqlite3_value_text(argv[1]); 1094 int nA = sqlite3_value_bytes(argv[0]); 1095 int nB = sqlite3_value_bytes(argv[1]); 1096 int iLang = argc==3 ? sqlite3_value_int(argv[2]) : 0; 1097 const EditDist3Lang *pLang = editDist3FindLang(pConfig, iLang); 1098 EditDist3FromString *pFrom; 1099 int dist; 1100 1101 pFrom = editDist3FromStringNew(pLang, zA, nA); 1102 if( pFrom==0 ){ 1103 sqlite3_result_error_nomem(context); 1104 return; 1105 } 1106 dist = editDist3Core(pFrom, zB, nB, pLang, 0); 1107 editDist3FromStringDelete(pFrom); 1108 if( dist==(-1) ){ 1109 sqlite3_result_error_nomem(context); 1110 }else{ 1111 sqlite3_result_int(context, dist); 1112 } 1113 } 1114 } 1115 1116 /* 1117 ** Register the editDist3 function with SQLite 1118 */ 1119 static int editDist3Install(sqlite3 *db){ 1120 int rc; 1121 EditDist3Config *pConfig = sqlite3_malloc64( sizeof(*pConfig) ); 1122 if( pConfig==0 ) return SQLITE_NOMEM; 1123 memset(pConfig, 0, sizeof(*pConfig)); 1124 rc = sqlite3_create_function_v2(db, "editdist3", 1125 2, SQLITE_UTF8|SQLITE_DETERMINISTIC, pConfig, 1126 editDist3SqlFunc, 0, 0, 0); 1127 if( rc==SQLITE_OK ){ 1128 rc = sqlite3_create_function_v2(db, "editdist3", 1129 3, SQLITE_UTF8|SQLITE_DETERMINISTIC, pConfig, 1130 editDist3SqlFunc, 0, 0, 0); 1131 } 1132 if( rc==SQLITE_OK ){ 1133 rc = sqlite3_create_function_v2(db, "editdist3", 1134 1, SQLITE_UTF8|SQLITE_DETERMINISTIC, pConfig, 1135 editDist3SqlFunc, 0, 0, editDist3ConfigDelete); 1136 }else{ 1137 sqlite3_free(pConfig); 1138 } 1139 return rc; 1140 } 1141 /* End configurable cost unicode edit distance routines 1142 ****************************************************************************** 1143 ****************************************************************************** 1144 ** Begin transliterate unicode-to-ascii implementation 1145 */ 1146 1147 #if !SQLITE_AMALGAMATION 1148 /* 1149 ** This lookup table is used to help decode the first byte of 1150 ** a multi-byte UTF8 character. 1151 */ 1152 static const unsigned char sqlite3Utf8Trans1[] = { 1153 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 1154 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 1155 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 1156 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 1157 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 1158 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 1159 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 1160 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00, 1161 }; 1162 #endif 1163 1164 /* 1165 ** Return the value of the first UTF-8 character in the string. 1166 */ 1167 static int utf8Read(const unsigned char *z, int n, int *pSize){ 1168 int c, i; 1169 1170 /* All callers to this routine (in the current implementation) 1171 ** always have n>0. */ 1172 if( NEVER(n==0) ){ 1173 c = i = 0; 1174 }else{ 1175 c = z[0]; 1176 i = 1; 1177 if( c>=0xc0 ){ 1178 c = sqlite3Utf8Trans1[c-0xc0]; 1179 while( i<n && (z[i] & 0xc0)==0x80 ){ 1180 c = (c<<6) + (0x3f & z[i++]); 1181 } 1182 } 1183 } 1184 *pSize = i; 1185 return c; 1186 } 1187 1188 /* 1189 ** Return the number of characters in the utf-8 string in the nIn byte 1190 ** buffer pointed to by zIn. 1191 */ 1192 static int utf8Charlen(const char *zIn, int nIn){ 1193 int i; 1194 int nChar = 0; 1195 for(i=0; i<nIn; nChar++){ 1196 int sz; 1197 utf8Read((const unsigned char *)&zIn[i], nIn-i, &sz); 1198 i += sz; 1199 } 1200 return nChar; 1201 } 1202 1203 typedef struct Transliteration Transliteration; 1204 struct Transliteration { 1205 unsigned short int cFrom; 1206 unsigned char cTo0, cTo1, cTo2, cTo3; 1207 }; 1208 1209 /* 1210 ** Table of translations from unicode characters into ASCII. 1211 */ 1212 static const Transliteration translit[] = { 1213 { 0x00A0, 0x20, 0x00, 0x00, 0x00 }, /* to */ 1214 { 0x00B5, 0x75, 0x00, 0x00, 0x00 }, /* µ to u */ 1215 { 0x00C0, 0x41, 0x00, 0x00, 0x00 }, /* À to A */ 1216 { 0x00C1, 0x41, 0x00, 0x00, 0x00 }, /* Á to A */ 1217 { 0x00C2, 0x41, 0x00, 0x00, 0x00 }, /*  to A */ 1218 { 0x00C3, 0x41, 0x00, 0x00, 0x00 }, /* à to A */ 1219 { 0x00C4, 0x41, 0x65, 0x00, 0x00 }, /* Ä to Ae */ 1220 { 0x00C5, 0x41, 0x61, 0x00, 0x00 }, /* Å to Aa */ 1221 { 0x00C6, 0x41, 0x45, 0x00, 0x00 }, /* Æ to AE */ 1222 { 0x00C7, 0x43, 0x00, 0x00, 0x00 }, /* Ç to C */ 1223 { 0x00C8, 0x45, 0x00, 0x00, 0x00 }, /* È to E */ 1224 { 0x00C9, 0x45, 0x00, 0x00, 0x00 }, /* É to E */ 1225 { 0x00CA, 0x45, 0x00, 0x00, 0x00 }, /* Ê to E */ 1226 { 0x00CB, 0x45, 0x00, 0x00, 0x00 }, /* Ë to E */ 1227 { 0x00CC, 0x49, 0x00, 0x00, 0x00 }, /* Ì to I */ 1228 { 0x00CD, 0x49, 0x00, 0x00, 0x00 }, /* Í to I */ 1229 { 0x00CE, 0x49, 0x00, 0x00, 0x00 }, /* Î to I */ 1230 { 0x00CF, 0x49, 0x00, 0x00, 0x00 }, /* Ï to I */ 1231 { 0x00D0, 0x44, 0x00, 0x00, 0x00 }, /* Ð to D */ 1232 { 0x00D1, 0x4E, 0x00, 0x00, 0x00 }, /* Ñ to N */ 1233 { 0x00D2, 0x4F, 0x00, 0x00, 0x00 }, /* Ò to O */ 1234 { 0x00D3, 0x4F, 0x00, 0x00, 0x00 }, /* Ó to O */ 1235 { 0x00D4, 0x4F, 0x00, 0x00, 0x00 }, /* Ô to O */ 1236 { 0x00D5, 0x4F, 0x00, 0x00, 0x00 }, /* Õ to O */ 1237 { 0x00D6, 0x4F, 0x65, 0x00, 0x00 }, /* Ö to Oe */ 1238 { 0x00D7, 0x78, 0x00, 0x00, 0x00 }, /* × to x */ 1239 { 0x00D8, 0x4F, 0x00, 0x00, 0x00 }, /* Ø to O */ 1240 { 0x00D9, 0x55, 0x00, 0x00, 0x00 }, /* Ù to U */ 1241 { 0x00DA, 0x55, 0x00, 0x00, 0x00 }, /* Ú to U */ 1242 { 0x00DB, 0x55, 0x00, 0x00, 0x00 }, /* Û to U */ 1243 { 0x00DC, 0x55, 0x65, 0x00, 0x00 }, /* Ü to Ue */ 1244 { 0x00DD, 0x59, 0x00, 0x00, 0x00 }, /* Ý to Y */ 1245 { 0x00DE, 0x54, 0x68, 0x00, 0x00 }, /* Þ to Th */ 1246 { 0x00DF, 0x73, 0x73, 0x00, 0x00 }, /* ß to ss */ 1247 { 0x00E0, 0x61, 0x00, 0x00, 0x00 }, /* à to a */ 1248 { 0x00E1, 0x61, 0x00, 0x00, 0x00 }, /* á to a */ 1249 { 0x00E2, 0x61, 0x00, 0x00, 0x00 }, /* â to a */ 1250 { 0x00E3, 0x61, 0x00, 0x00, 0x00 }, /* ã to a */ 1251 { 0x00E4, 0x61, 0x65, 0x00, 0x00 }, /* ä to ae */ 1252 { 0x00E5, 0x61, 0x61, 0x00, 0x00 }, /* å to aa */ 1253 { 0x00E6, 0x61, 0x65, 0x00, 0x00 }, /* æ to ae */ 1254 { 0x00E7, 0x63, 0x00, 0x00, 0x00 }, /* ç to c */ 1255 { 0x00E8, 0x65, 0x00, 0x00, 0x00 }, /* è to e */ 1256 { 0x00E9, 0x65, 0x00, 0x00, 0x00 }, /* é to e */ 1257 { 0x00EA, 0x65, 0x00, 0x00, 0x00 }, /* ê to e */ 1258 { 0x00EB, 0x65, 0x00, 0x00, 0x00 }, /* ë to e */ 1259 { 0x00EC, 0x69, 0x00, 0x00, 0x00 }, /* ì to i */ 1260 { 0x00ED, 0x69, 0x00, 0x00, 0x00 }, /* í to i */ 1261 { 0x00EE, 0x69, 0x00, 0x00, 0x00 }, /* î to i */ 1262 { 0x00EF, 0x69, 0x00, 0x00, 0x00 }, /* ï to i */ 1263 { 0x00F0, 0x64, 0x00, 0x00, 0x00 }, /* ð to d */ 1264 { 0x00F1, 0x6E, 0x00, 0x00, 0x00 }, /* ñ to n */ 1265 { 0x00F2, 0x6F, 0x00, 0x00, 0x00 }, /* ò to o */ 1266 { 0x00F3, 0x6F, 0x00, 0x00, 0x00 }, /* ó to o */ 1267 { 0x00F4, 0x6F, 0x00, 0x00, 0x00 }, /* ô to o */ 1268 { 0x00F5, 0x6F, 0x00, 0x00, 0x00 }, /* õ to o */ 1269 { 0x00F6, 0x6F, 0x65, 0x00, 0x00 }, /* ö to oe */ 1270 { 0x00F7, 0x3A, 0x00, 0x00, 0x00 }, /* ÷ to : */ 1271 { 0x00F8, 0x6F, 0x00, 0x00, 0x00 }, /* ø to o */ 1272 { 0x00F9, 0x75, 0x00, 0x00, 0x00 }, /* ù to u */ 1273 { 0x00FA, 0x75, 0x00, 0x00, 0x00 }, /* ú to u */ 1274 { 0x00FB, 0x75, 0x00, 0x00, 0x00 }, /* û to u */ 1275 { 0x00FC, 0x75, 0x65, 0x00, 0x00 }, /* ü to ue */ 1276 { 0x00FD, 0x79, 0x00, 0x00, 0x00 }, /* ý to y */ 1277 { 0x00FE, 0x74, 0x68, 0x00, 0x00 }, /* þ to th */ 1278 { 0x00FF, 0x79, 0x00, 0x00, 0x00 }, /* ÿ to y */ 1279 { 0x0100, 0x41, 0x00, 0x00, 0x00 }, /* Ā to A */ 1280 { 0x0101, 0x61, 0x00, 0x00, 0x00 }, /* ā to a */ 1281 { 0x0102, 0x41, 0x00, 0x00, 0x00 }, /* Ă to A */ 1282 { 0x0103, 0x61, 0x00, 0x00, 0x00 }, /* ă to a */ 1283 { 0x0104, 0x41, 0x00, 0x00, 0x00 }, /* Ą to A */ 1284 { 0x0105, 0x61, 0x00, 0x00, 0x00 }, /* ą to a */ 1285 { 0x0106, 0x43, 0x00, 0x00, 0x00 }, /* Ć to C */ 1286 { 0x0107, 0x63, 0x00, 0x00, 0x00 }, /* ć to c */ 1287 { 0x0108, 0x43, 0x68, 0x00, 0x00 }, /* Ĉ to Ch */ 1288 { 0x0109, 0x63, 0x68, 0x00, 0x00 }, /* ĉ to ch */ 1289 { 0x010A, 0x43, 0x00, 0x00, 0x00 }, /* Ċ to C */ 1290 { 0x010B, 0x63, 0x00, 0x00, 0x00 }, /* ċ to c */ 1291 { 0x010C, 0x43, 0x00, 0x00, 0x00 }, /* Č to C */ 1292 { 0x010D, 0x63, 0x00, 0x00, 0x00 }, /* č to c */ 1293 { 0x010E, 0x44, 0x00, 0x00, 0x00 }, /* Ď to D */ 1294 { 0x010F, 0x64, 0x00, 0x00, 0x00 }, /* ď to d */ 1295 { 0x0110, 0x44, 0x00, 0x00, 0x00 }, /* Đ to D */ 1296 { 0x0111, 0x64, 0x00, 0x00, 0x00 }, /* đ to d */ 1297 { 0x0112, 0x45, 0x00, 0x00, 0x00 }, /* Ē to E */ 1298 { 0x0113, 0x65, 0x00, 0x00, 0x00 }, /* ē to e */ 1299 { 0x0114, 0x45, 0x00, 0x00, 0x00 }, /* Ĕ to E */ 1300 { 0x0115, 0x65, 0x00, 0x00, 0x00 }, /* ĕ to e */ 1301 { 0x0116, 0x45, 0x00, 0x00, 0x00 }, /* Ė to E */ 1302 { 0x0117, 0x65, 0x00, 0x00, 0x00 }, /* ė to e */ 1303 { 0x0118, 0x45, 0x00, 0x00, 0x00 }, /* Ę to E */ 1304 { 0x0119, 0x65, 0x00, 0x00, 0x00 }, /* ę to e */ 1305 { 0x011A, 0x45, 0x00, 0x00, 0x00 }, /* Ě to E */ 1306 { 0x011B, 0x65, 0x00, 0x00, 0x00 }, /* ě to e */ 1307 { 0x011C, 0x47, 0x68, 0x00, 0x00 }, /* Ĝ to Gh */ 1308 { 0x011D, 0x67, 0x68, 0x00, 0x00 }, /* ĝ to gh */ 1309 { 0x011E, 0x47, 0x00, 0x00, 0x00 }, /* Ğ to G */ 1310 { 0x011F, 0x67, 0x00, 0x00, 0x00 }, /* ğ to g */ 1311 { 0x0120, 0x47, 0x00, 0x00, 0x00 }, /* Ġ to G */ 1312 { 0x0121, 0x67, 0x00, 0x00, 0x00 }, /* ġ to g */ 1313 { 0x0122, 0x47, 0x00, 0x00, 0x00 }, /* Ģ to G */ 1314 { 0x0123, 0x67, 0x00, 0x00, 0x00 }, /* ģ to g */ 1315 { 0x0124, 0x48, 0x68, 0x00, 0x00 }, /* Ĥ to Hh */ 1316 { 0x0125, 0x68, 0x68, 0x00, 0x00 }, /* ĥ to hh */ 1317 { 0x0126, 0x48, 0x00, 0x00, 0x00 }, /* Ħ to H */ 1318 { 0x0127, 0x68, 0x00, 0x00, 0x00 }, /* ħ to h */ 1319 { 0x0128, 0x49, 0x00, 0x00, 0x00 }, /* Ĩ to I */ 1320 { 0x0129, 0x69, 0x00, 0x00, 0x00 }, /* ĩ to i */ 1321 { 0x012A, 0x49, 0x00, 0x00, 0x00 }, /* Ī to I */ 1322 { 0x012B, 0x69, 0x00, 0x00, 0x00 }, /* ī to i */ 1323 { 0x012C, 0x49, 0x00, 0x00, 0x00 }, /* Ĭ to I */ 1324 { 0x012D, 0x69, 0x00, 0x00, 0x00 }, /* ĭ to i */ 1325 { 0x012E, 0x49, 0x00, 0x00, 0x00 }, /* Į to I */ 1326 { 0x012F, 0x69, 0x00, 0x00, 0x00 }, /* į to i */ 1327 { 0x0130, 0x49, 0x00, 0x00, 0x00 }, /* İ to I */ 1328 { 0x0131, 0x69, 0x00, 0x00, 0x00 }, /* ı to i */ 1329 { 0x0132, 0x49, 0x4A, 0x00, 0x00 }, /* IJ to IJ */ 1330 { 0x0133, 0x69, 0x6A, 0x00, 0x00 }, /* ij to ij */ 1331 { 0x0134, 0x4A, 0x68, 0x00, 0x00 }, /* Ĵ to Jh */ 1332 { 0x0135, 0x6A, 0x68, 0x00, 0x00 }, /* ĵ to jh */ 1333 { 0x0136, 0x4B, 0x00, 0x00, 0x00 }, /* Ķ to K */ 1334 { 0x0137, 0x6B, 0x00, 0x00, 0x00 }, /* ķ to k */ 1335 { 0x0138, 0x6B, 0x00, 0x00, 0x00 }, /* ĸ to k */ 1336 { 0x0139, 0x4C, 0x00, 0x00, 0x00 }, /* Ĺ to L */ 1337 { 0x013A, 0x6C, 0x00, 0x00, 0x00 }, /* ĺ to l */ 1338 { 0x013B, 0x4C, 0x00, 0x00, 0x00 }, /* Ļ to L */ 1339 { 0x013C, 0x6C, 0x00, 0x00, 0x00 }, /* ļ to l */ 1340 { 0x013D, 0x4C, 0x00, 0x00, 0x00 }, /* Ľ to L */ 1341 { 0x013E, 0x6C, 0x00, 0x00, 0x00 }, /* ľ to l */ 1342 { 0x013F, 0x4C, 0x2E, 0x00, 0x00 }, /* Ŀ to L. */ 1343 { 0x0140, 0x6C, 0x2E, 0x00, 0x00 }, /* ŀ to l. */ 1344 { 0x0141, 0x4C, 0x00, 0x00, 0x00 }, /* Ł to L */ 1345 { 0x0142, 0x6C, 0x00, 0x00, 0x00 }, /* ł to l */ 1346 { 0x0143, 0x4E, 0x00, 0x00, 0x00 }, /* Ń to N */ 1347 { 0x0144, 0x6E, 0x00, 0x00, 0x00 }, /* ń to n */ 1348 { 0x0145, 0x4E, 0x00, 0x00, 0x00 }, /* Ņ to N */ 1349 { 0x0146, 0x6E, 0x00, 0x00, 0x00 }, /* ņ to n */ 1350 { 0x0147, 0x4E, 0x00, 0x00, 0x00 }, /* Ň to N */ 1351 { 0x0148, 0x6E, 0x00, 0x00, 0x00 }, /* ň to n */ 1352 { 0x0149, 0x27, 0x6E, 0x00, 0x00 }, /* ʼn to 'n */ 1353 { 0x014A, 0x4E, 0x47, 0x00, 0x00 }, /* Ŋ to NG */ 1354 { 0x014B, 0x6E, 0x67, 0x00, 0x00 }, /* ŋ to ng */ 1355 { 0x014C, 0x4F, 0x00, 0x00, 0x00 }, /* Ō to O */ 1356 { 0x014D, 0x6F, 0x00, 0x00, 0x00 }, /* ō to o */ 1357 { 0x014E, 0x4F, 0x00, 0x00, 0x00 }, /* Ŏ to O */ 1358 { 0x014F, 0x6F, 0x00, 0x00, 0x00 }, /* ŏ to o */ 1359 { 0x0150, 0x4F, 0x00, 0x00, 0x00 }, /* Ő to O */ 1360 { 0x0151, 0x6F, 0x00, 0x00, 0x00 }, /* ő to o */ 1361 { 0x0152, 0x4F, 0x45, 0x00, 0x00 }, /* Œ to OE */ 1362 { 0x0153, 0x6F, 0x65, 0x00, 0x00 }, /* œ to oe */ 1363 { 0x0154, 0x52, 0x00, 0x00, 0x00 }, /* Ŕ to R */ 1364 { 0x0155, 0x72, 0x00, 0x00, 0x00 }, /* ŕ to r */ 1365 { 0x0156, 0x52, 0x00, 0x00, 0x00 }, /* Ŗ to R */ 1366 { 0x0157, 0x72, 0x00, 0x00, 0x00 }, /* ŗ to r */ 1367 { 0x0158, 0x52, 0x00, 0x00, 0x00 }, /* Ř to R */ 1368 { 0x0159, 0x72, 0x00, 0x00, 0x00 }, /* ř to r */ 1369 { 0x015A, 0x53, 0x00, 0x00, 0x00 }, /* Ś to S */ 1370 { 0x015B, 0x73, 0x00, 0x00, 0x00 }, /* ś to s */ 1371 { 0x015C, 0x53, 0x68, 0x00, 0x00 }, /* Ŝ to Sh */ 1372 { 0x015D, 0x73, 0x68, 0x00, 0x00 }, /* ŝ to sh */ 1373 { 0x015E, 0x53, 0x00, 0x00, 0x00 }, /* Ş to S */ 1374 { 0x015F, 0x73, 0x00, 0x00, 0x00 }, /* ş to s */ 1375 { 0x0160, 0x53, 0x00, 0x00, 0x00 }, /* Š to S */ 1376 { 0x0161, 0x73, 0x00, 0x00, 0x00 }, /* š to s */ 1377 { 0x0162, 0x54, 0x00, 0x00, 0x00 }, /* Ţ to T */ 1378 { 0x0163, 0x74, 0x00, 0x00, 0x00 }, /* ţ to t */ 1379 { 0x0164, 0x54, 0x00, 0x00, 0x00 }, /* Ť to T */ 1380 { 0x0165, 0x74, 0x00, 0x00, 0x00 }, /* ť to t */ 1381 { 0x0166, 0x54, 0x00, 0x00, 0x00 }, /* Ŧ to T */ 1382 { 0x0167, 0x74, 0x00, 0x00, 0x00 }, /* ŧ to t */ 1383 { 0x0168, 0x55, 0x00, 0x00, 0x00 }, /* Ũ to U */ 1384 { 0x0169, 0x75, 0x00, 0x00, 0x00 }, /* ũ to u */ 1385 { 0x016A, 0x55, 0x00, 0x00, 0x00 }, /* Ū to U */ 1386 { 0x016B, 0x75, 0x00, 0x00, 0x00 }, /* ū to u */ 1387 { 0x016C, 0x55, 0x00, 0x00, 0x00 }, /* Ŭ to U */ 1388 { 0x016D, 0x75, 0x00, 0x00, 0x00 }, /* ŭ to u */ 1389 { 0x016E, 0x55, 0x00, 0x00, 0x00 }, /* Ů to U */ 1390 { 0x016F, 0x75, 0x00, 0x00, 0x00 }, /* ů to u */ 1391 { 0x0170, 0x55, 0x00, 0x00, 0x00 }, /* Ű to U */ 1392 { 0x0171, 0x75, 0x00, 0x00, 0x00 }, /* ű to u */ 1393 { 0x0172, 0x55, 0x00, 0x00, 0x00 }, /* Ų to U */ 1394 { 0x0173, 0x75, 0x00, 0x00, 0x00 }, /* ų to u */ 1395 { 0x0174, 0x57, 0x00, 0x00, 0x00 }, /* Ŵ to W */ 1396 { 0x0175, 0x77, 0x00, 0x00, 0x00 }, /* ŵ to w */ 1397 { 0x0176, 0x59, 0x00, 0x00, 0x00 }, /* Ŷ to Y */ 1398 { 0x0177, 0x79, 0x00, 0x00, 0x00 }, /* ŷ to y */ 1399 { 0x0178, 0x59, 0x00, 0x00, 0x00 }, /* Ÿ to Y */ 1400 { 0x0179, 0x5A, 0x00, 0x00, 0x00 }, /* Ź to Z */ 1401 { 0x017A, 0x7A, 0x00, 0x00, 0x00 }, /* ź to z */ 1402 { 0x017B, 0x5A, 0x00, 0x00, 0x00 }, /* Ż to Z */ 1403 { 0x017C, 0x7A, 0x00, 0x00, 0x00 }, /* ż to z */ 1404 { 0x017D, 0x5A, 0x00, 0x00, 0x00 }, /* Ž to Z */ 1405 { 0x017E, 0x7A, 0x00, 0x00, 0x00 }, /* ž to z */ 1406 { 0x017F, 0x73, 0x00, 0x00, 0x00 }, /* ſ to s */ 1407 { 0x0192, 0x66, 0x00, 0x00, 0x00 }, /* ƒ to f */ 1408 { 0x0218, 0x53, 0x00, 0x00, 0x00 }, /* Ș to S */ 1409 { 0x0219, 0x73, 0x00, 0x00, 0x00 }, /* ș to s */ 1410 { 0x021A, 0x54, 0x00, 0x00, 0x00 }, /* Ț to T */ 1411 { 0x021B, 0x74, 0x00, 0x00, 0x00 }, /* ț to t */ 1412 { 0x0386, 0x41, 0x00, 0x00, 0x00 }, /* Ά to A */ 1413 { 0x0388, 0x45, 0x00, 0x00, 0x00 }, /* Έ to E */ 1414 { 0x0389, 0x49, 0x00, 0x00, 0x00 }, /* Ή to I */ 1415 { 0x038A, 0x49, 0x00, 0x00, 0x00 }, /* Ί to I */ 1416 { 0x038C, 0x4f, 0x00, 0x00, 0x00 }, /* Ό to O */ 1417 { 0x038E, 0x59, 0x00, 0x00, 0x00 }, /* Ύ to Y */ 1418 { 0x038F, 0x4f, 0x00, 0x00, 0x00 }, /* Ώ to O */ 1419 { 0x0390, 0x69, 0x00, 0x00, 0x00 }, /* ΐ to i */ 1420 { 0x0391, 0x41, 0x00, 0x00, 0x00 }, /* Α to A */ 1421 { 0x0392, 0x42, 0x00, 0x00, 0x00 }, /* Β to B */ 1422 { 0x0393, 0x47, 0x00, 0x00, 0x00 }, /* Γ to G */ 1423 { 0x0394, 0x44, 0x00, 0x00, 0x00 }, /* Δ to D */ 1424 { 0x0395, 0x45, 0x00, 0x00, 0x00 }, /* Ε to E */ 1425 { 0x0396, 0x5a, 0x00, 0x00, 0x00 }, /* Ζ to Z */ 1426 { 0x0397, 0x49, 0x00, 0x00, 0x00 }, /* Η to I */ 1427 { 0x0398, 0x54, 0x68, 0x00, 0x00 }, /* Θ to Th */ 1428 { 0x0399, 0x49, 0x00, 0x00, 0x00 }, /* Ι to I */ 1429 { 0x039A, 0x4b, 0x00, 0x00, 0x00 }, /* Κ to K */ 1430 { 0x039B, 0x4c, 0x00, 0x00, 0x00 }, /* Λ to L */ 1431 { 0x039C, 0x4d, 0x00, 0x00, 0x00 }, /* Μ to M */ 1432 { 0x039D, 0x4e, 0x00, 0x00, 0x00 }, /* Ν to N */ 1433 { 0x039E, 0x58, 0x00, 0x00, 0x00 }, /* Ξ to X */ 1434 { 0x039F, 0x4f, 0x00, 0x00, 0x00 }, /* Ο to O */ 1435 { 0x03A0, 0x50, 0x00, 0x00, 0x00 }, /* Π to P */ 1436 { 0x03A1, 0x52, 0x00, 0x00, 0x00 }, /* Ρ to R */ 1437 { 0x03A3, 0x53, 0x00, 0x00, 0x00 }, /* Σ to S */ 1438 { 0x03A4, 0x54, 0x00, 0x00, 0x00 }, /* Τ to T */ 1439 { 0x03A5, 0x59, 0x00, 0x00, 0x00 }, /* Υ to Y */ 1440 { 0x03A6, 0x46, 0x00, 0x00, 0x00 }, /* Φ to F */ 1441 { 0x03A7, 0x43, 0x68, 0x00, 0x00 }, /* Χ to Ch */ 1442 { 0x03A8, 0x50, 0x73, 0x00, 0x00 }, /* Ψ to Ps */ 1443 { 0x03A9, 0x4f, 0x00, 0x00, 0x00 }, /* Ω to O */ 1444 { 0x03AA, 0x49, 0x00, 0x00, 0x00 }, /* Ϊ to I */ 1445 { 0x03AB, 0x59, 0x00, 0x00, 0x00 }, /* Ϋ to Y */ 1446 { 0x03AC, 0x61, 0x00, 0x00, 0x00 }, /* ά to a */ 1447 { 0x03AD, 0x65, 0x00, 0x00, 0x00 }, /* έ to e */ 1448 { 0x03AE, 0x69, 0x00, 0x00, 0x00 }, /* ή to i */ 1449 { 0x03AF, 0x69, 0x00, 0x00, 0x00 }, /* ί to i */ 1450 { 0x03B1, 0x61, 0x00, 0x00, 0x00 }, /* α to a */ 1451 { 0x03B2, 0x62, 0x00, 0x00, 0x00 }, /* β to b */ 1452 { 0x03B3, 0x67, 0x00, 0x00, 0x00 }, /* γ to g */ 1453 { 0x03B4, 0x64, 0x00, 0x00, 0x00 }, /* δ to d */ 1454 { 0x03B5, 0x65, 0x00, 0x00, 0x00 }, /* ε to e */ 1455 { 0x03B6, 0x7a, 0x00, 0x00, 0x00 }, /* ζ to z */ 1456 { 0x03B7, 0x69, 0x00, 0x00, 0x00 }, /* η to i */ 1457 { 0x03B8, 0x74, 0x68, 0x00, 0x00 }, /* θ to th */ 1458 { 0x03B9, 0x69, 0x00, 0x00, 0x00 }, /* ι to i */ 1459 { 0x03BA, 0x6b, 0x00, 0x00, 0x00 }, /* κ to k */ 1460 { 0x03BB, 0x6c, 0x00, 0x00, 0x00 }, /* λ to l */ 1461 { 0x03BC, 0x6d, 0x00, 0x00, 0x00 }, /* μ to m */ 1462 { 0x03BD, 0x6e, 0x00, 0x00, 0x00 }, /* ν to n */ 1463 { 0x03BE, 0x78, 0x00, 0x00, 0x00 }, /* ξ to x */ 1464 { 0x03BF, 0x6f, 0x00, 0x00, 0x00 }, /* ο to o */ 1465 { 0x03C0, 0x70, 0x00, 0x00, 0x00 }, /* π to p */ 1466 { 0x03C1, 0x72, 0x00, 0x00, 0x00 }, /* ρ to r */ 1467 { 0x03C3, 0x73, 0x00, 0x00, 0x00 }, /* σ to s */ 1468 { 0x03C4, 0x74, 0x00, 0x00, 0x00 }, /* τ to t */ 1469 { 0x03C5, 0x79, 0x00, 0x00, 0x00 }, /* υ to y */ 1470 { 0x03C6, 0x66, 0x00, 0x00, 0x00 }, /* φ to f */ 1471 { 0x03C7, 0x63, 0x68, 0x00, 0x00 }, /* χ to ch */ 1472 { 0x03C8, 0x70, 0x73, 0x00, 0x00 }, /* ψ to ps */ 1473 { 0x03C9, 0x6f, 0x00, 0x00, 0x00 }, /* ω to o */ 1474 { 0x03CA, 0x69, 0x00, 0x00, 0x00 }, /* ϊ to i */ 1475 { 0x03CB, 0x79, 0x00, 0x00, 0x00 }, /* ϋ to y */ 1476 { 0x03CC, 0x6f, 0x00, 0x00, 0x00 }, /* ό to o */ 1477 { 0x03CD, 0x79, 0x00, 0x00, 0x00 }, /* ύ to y */ 1478 { 0x03CE, 0x69, 0x00, 0x00, 0x00 }, /* ώ to i */ 1479 { 0x0400, 0x45, 0x00, 0x00, 0x00 }, /* Ѐ to E */ 1480 { 0x0401, 0x45, 0x00, 0x00, 0x00 }, /* Ё to E */ 1481 { 0x0402, 0x44, 0x00, 0x00, 0x00 }, /* Ђ to D */ 1482 { 0x0403, 0x47, 0x00, 0x00, 0x00 }, /* Ѓ to G */ 1483 { 0x0404, 0x45, 0x00, 0x00, 0x00 }, /* Є to E */ 1484 { 0x0405, 0x5a, 0x00, 0x00, 0x00 }, /* Ѕ to Z */ 1485 { 0x0406, 0x49, 0x00, 0x00, 0x00 }, /* І to I */ 1486 { 0x0407, 0x49, 0x00, 0x00, 0x00 }, /* Ї to I */ 1487 { 0x0408, 0x4a, 0x00, 0x00, 0x00 }, /* Ј to J */ 1488 { 0x0409, 0x49, 0x00, 0x00, 0x00 }, /* Љ to I */ 1489 { 0x040A, 0x4e, 0x00, 0x00, 0x00 }, /* Њ to N */ 1490 { 0x040B, 0x44, 0x00, 0x00, 0x00 }, /* Ћ to D */ 1491 { 0x040C, 0x4b, 0x00, 0x00, 0x00 }, /* Ќ to K */ 1492 { 0x040D, 0x49, 0x00, 0x00, 0x00 }, /* Ѝ to I */ 1493 { 0x040E, 0x55, 0x00, 0x00, 0x00 }, /* Ў to U */ 1494 { 0x040F, 0x44, 0x00, 0x00, 0x00 }, /* Џ to D */ 1495 { 0x0410, 0x41, 0x00, 0x00, 0x00 }, /* А to A */ 1496 { 0x0411, 0x42, 0x00, 0x00, 0x00 }, /* Б to B */ 1497 { 0x0412, 0x56, 0x00, 0x00, 0x00 }, /* В to V */ 1498 { 0x0413, 0x47, 0x00, 0x00, 0x00 }, /* Г to G */ 1499 { 0x0414, 0x44, 0x00, 0x00, 0x00 }, /* Д to D */ 1500 { 0x0415, 0x45, 0x00, 0x00, 0x00 }, /* Е to E */ 1501 { 0x0416, 0x5a, 0x68, 0x00, 0x00 }, /* Ж to Zh */ 1502 { 0x0417, 0x5a, 0x00, 0x00, 0x00 }, /* З to Z */ 1503 { 0x0418, 0x49, 0x00, 0x00, 0x00 }, /* И to I */ 1504 { 0x0419, 0x49, 0x00, 0x00, 0x00 }, /* Й to I */ 1505 { 0x041A, 0x4b, 0x00, 0x00, 0x00 }, /* К to K */ 1506 { 0x041B, 0x4c, 0x00, 0x00, 0x00 }, /* Л to L */ 1507 { 0x041C, 0x4d, 0x00, 0x00, 0x00 }, /* М to M */ 1508 { 0x041D, 0x4e, 0x00, 0x00, 0x00 }, /* Н to N */ 1509 { 0x041E, 0x4f, 0x00, 0x00, 0x00 }, /* О to O */ 1510 { 0x041F, 0x50, 0x00, 0x00, 0x00 }, /* П to P */ 1511 { 0x0420, 0x52, 0x00, 0x00, 0x00 }, /* Р to R */ 1512 { 0x0421, 0x53, 0x00, 0x00, 0x00 }, /* С to S */ 1513 { 0x0422, 0x54, 0x00, 0x00, 0x00 }, /* Т to T */ 1514 { 0x0423, 0x55, 0x00, 0x00, 0x00 }, /* У to U */ 1515 { 0x0424, 0x46, 0x00, 0x00, 0x00 }, /* Ф to F */ 1516 { 0x0425, 0x4b, 0x68, 0x00, 0x00 }, /* Х to Kh */ 1517 { 0x0426, 0x54, 0x63, 0x00, 0x00 }, /* Ц to Tc */ 1518 { 0x0427, 0x43, 0x68, 0x00, 0x00 }, /* Ч to Ch */ 1519 { 0x0428, 0x53, 0x68, 0x00, 0x00 }, /* Ш to Sh */ 1520 { 0x0429, 0x53, 0x68, 0x63, 0x68 }, /* Щ to Shch */ 1521 { 0x042A, 0x61, 0x00, 0x00, 0x00 }, /* to A */ 1522 { 0x042B, 0x59, 0x00, 0x00, 0x00 }, /* Ы to Y */ 1523 { 0x042C, 0x59, 0x00, 0x00, 0x00 }, /* to Y */ 1524 { 0x042D, 0x45, 0x00, 0x00, 0x00 }, /* Э to E */ 1525 { 0x042E, 0x49, 0x75, 0x00, 0x00 }, /* Ю to Iu */ 1526 { 0x042F, 0x49, 0x61, 0x00, 0x00 }, /* Я to Ia */ 1527 { 0x0430, 0x61, 0x00, 0x00, 0x00 }, /* а to a */ 1528 { 0x0431, 0x62, 0x00, 0x00, 0x00 }, /* б to b */ 1529 { 0x0432, 0x76, 0x00, 0x00, 0x00 }, /* в to v */ 1530 { 0x0433, 0x67, 0x00, 0x00, 0x00 }, /* г to g */ 1531 { 0x0434, 0x64, 0x00, 0x00, 0x00 }, /* д to d */ 1532 { 0x0435, 0x65, 0x00, 0x00, 0x00 }, /* е to e */ 1533 { 0x0436, 0x7a, 0x68, 0x00, 0x00 }, /* ж to zh */ 1534 { 0x0437, 0x7a, 0x00, 0x00, 0x00 }, /* з to z */ 1535 { 0x0438, 0x69, 0x00, 0x00, 0x00 }, /* и to i */ 1536 { 0x0439, 0x69, 0x00, 0x00, 0x00 }, /* й to i */ 1537 { 0x043A, 0x6b, 0x00, 0x00, 0x00 }, /* к to k */ 1538 { 0x043B, 0x6c, 0x00, 0x00, 0x00 }, /* л to l */ 1539 { 0x043C, 0x6d, 0x00, 0x00, 0x00 }, /* м to m */ 1540 { 0x043D, 0x6e, 0x00, 0x00, 0x00 }, /* н to n */ 1541 { 0x043E, 0x6f, 0x00, 0x00, 0x00 }, /* о to o */ 1542 { 0x043F, 0x70, 0x00, 0x00, 0x00 }, /* п to p */ 1543 { 0x0440, 0x72, 0x00, 0x00, 0x00 }, /* р to r */ 1544 { 0x0441, 0x73, 0x00, 0x00, 0x00 }, /* с to s */ 1545 { 0x0442, 0x74, 0x00, 0x00, 0x00 }, /* т to t */ 1546 { 0x0443, 0x75, 0x00, 0x00, 0x00 }, /* у to u */ 1547 { 0x0444, 0x66, 0x00, 0x00, 0x00 }, /* ф to f */ 1548 { 0x0445, 0x6b, 0x68, 0x00, 0x00 }, /* х to kh */ 1549 { 0x0446, 0x74, 0x63, 0x00, 0x00 }, /* ц to tc */ 1550 { 0x0447, 0x63, 0x68, 0x00, 0x00 }, /* ч to ch */ 1551 { 0x0448, 0x73, 0x68, 0x00, 0x00 }, /* ш to sh */ 1552 { 0x0449, 0x73, 0x68, 0x63, 0x68 }, /* щ to shch */ 1553 { 0x044A, 0x61, 0x00, 0x00, 0x00 }, /* to a */ 1554 { 0x044B, 0x79, 0x00, 0x00, 0x00 }, /* ы to y */ 1555 { 0x044C, 0x79, 0x00, 0x00, 0x00 }, /* to y */ 1556 { 0x044D, 0x65, 0x00, 0x00, 0x00 }, /* э to e */ 1557 { 0x044E, 0x69, 0x75, 0x00, 0x00 }, /* ю to iu */ 1558 { 0x044F, 0x69, 0x61, 0x00, 0x00 }, /* я to ia */ 1559 { 0x0450, 0x65, 0x00, 0x00, 0x00 }, /* ѐ to e */ 1560 { 0x0451, 0x65, 0x00, 0x00, 0x00 }, /* ё to e */ 1561 { 0x0452, 0x64, 0x00, 0x00, 0x00 }, /* ђ to d */ 1562 { 0x0453, 0x67, 0x00, 0x00, 0x00 }, /* ѓ to g */ 1563 { 0x0454, 0x65, 0x00, 0x00, 0x00 }, /* є to e */ 1564 { 0x0455, 0x7a, 0x00, 0x00, 0x00 }, /* ѕ to z */ 1565 { 0x0456, 0x69, 0x00, 0x00, 0x00 }, /* і to i */ 1566 { 0x0457, 0x69, 0x00, 0x00, 0x00 }, /* ї to i */ 1567 { 0x0458, 0x6a, 0x00, 0x00, 0x00 }, /* ј to j */ 1568 { 0x0459, 0x69, 0x00, 0x00, 0x00 }, /* љ to i */ 1569 { 0x045A, 0x6e, 0x00, 0x00, 0x00 }, /* њ to n */ 1570 { 0x045B, 0x64, 0x00, 0x00, 0x00 }, /* ћ to d */ 1571 { 0x045C, 0x6b, 0x00, 0x00, 0x00 }, /* ќ to k */ 1572 { 0x045D, 0x69, 0x00, 0x00, 0x00 }, /* ѝ to i */ 1573 { 0x045E, 0x75, 0x00, 0x00, 0x00 }, /* ў to u */ 1574 { 0x045F, 0x64, 0x00, 0x00, 0x00 }, /* џ to d */ 1575 { 0x1E02, 0x42, 0x00, 0x00, 0x00 }, /* Ḃ to B */ 1576 { 0x1E03, 0x62, 0x00, 0x00, 0x00 }, /* ḃ to b */ 1577 { 0x1E0A, 0x44, 0x00, 0x00, 0x00 }, /* Ḋ to D */ 1578 { 0x1E0B, 0x64, 0x00, 0x00, 0x00 }, /* ḋ to d */ 1579 { 0x1E1E, 0x46, 0x00, 0x00, 0x00 }, /* Ḟ to F */ 1580 { 0x1E1F, 0x66, 0x00, 0x00, 0x00 }, /* ḟ to f */ 1581 { 0x1E40, 0x4D, 0x00, 0x00, 0x00 }, /* Ṁ to M */ 1582 { 0x1E41, 0x6D, 0x00, 0x00, 0x00 }, /* ṁ to m */ 1583 { 0x1E56, 0x50, 0x00, 0x00, 0x00 }, /* Ṗ to P */ 1584 { 0x1E57, 0x70, 0x00, 0x00, 0x00 }, /* ṗ to p */ 1585 { 0x1E60, 0x53, 0x00, 0x00, 0x00 }, /* Ṡ to S */ 1586 { 0x1E61, 0x73, 0x00, 0x00, 0x00 }, /* ṡ to s */ 1587 { 0x1E6A, 0x54, 0x00, 0x00, 0x00 }, /* Ṫ to T */ 1588 { 0x1E6B, 0x74, 0x00, 0x00, 0x00 }, /* ṫ to t */ 1589 { 0x1E80, 0x57, 0x00, 0x00, 0x00 }, /* Ẁ to W */ 1590 { 0x1E81, 0x77, 0x00, 0x00, 0x00 }, /* ẁ to w */ 1591 { 0x1E82, 0x57, 0x00, 0x00, 0x00 }, /* Ẃ to W */ 1592 { 0x1E83, 0x77, 0x00, 0x00, 0x00 }, /* ẃ to w */ 1593 { 0x1E84, 0x57, 0x00, 0x00, 0x00 }, /* Ẅ to W */ 1594 { 0x1E85, 0x77, 0x00, 0x00, 0x00 }, /* ẅ to w */ 1595 { 0x1EF2, 0x59, 0x00, 0x00, 0x00 }, /* Ỳ to Y */ 1596 { 0x1EF3, 0x79, 0x00, 0x00, 0x00 }, /* ỳ to y */ 1597 { 0xFB00, 0x66, 0x66, 0x00, 0x00 }, /* ff to ff */ 1598 { 0xFB01, 0x66, 0x69, 0x00, 0x00 }, /* fi to fi */ 1599 { 0xFB02, 0x66, 0x6C, 0x00, 0x00 }, /* fl to fl */ 1600 { 0xFB05, 0x73, 0x74, 0x00, 0x00 }, /* ſt to st */ 1601 { 0xFB06, 0x73, 0x74, 0x00, 0x00 }, /* st to st */ 1602 }; 1603 1604 static const Transliteration *spellfixFindTranslit(int c, int *pxTop){ 1605 *pxTop = (sizeof(translit)/sizeof(translit[0])) - 1; 1606 return translit; 1607 } 1608 1609 /* 1610 ** Convert the input string from UTF-8 into pure ASCII by converting 1611 ** all non-ASCII characters to some combination of characters in the 1612 ** ASCII subset. 1613 ** 1614 ** The returned string might contain more characters than the input. 1615 ** 1616 ** Space to hold the returned string comes from sqlite3_malloc() and 1617 ** should be freed by the caller. 1618 */ 1619 static unsigned char *transliterate(const unsigned char *zIn, int nIn){ 1620 unsigned char *zOut = sqlite3_malloc64( nIn*4 + 1 ); 1621 int c, sz, nOut; 1622 if( zOut==0 ) return 0; 1623 nOut = 0; 1624 while( nIn>0 ){ 1625 c = utf8Read(zIn, nIn, &sz); 1626 zIn += sz; 1627 nIn -= sz; 1628 if( c<=127 ){ 1629 zOut[nOut++] = (unsigned char)c; 1630 }else{ 1631 int xTop, xBtm, x; 1632 const Transliteration *tbl = spellfixFindTranslit(c, &xTop); 1633 xBtm = 0; 1634 while( xTop>=xBtm ){ 1635 x = (xTop + xBtm)/2; 1636 if( tbl[x].cFrom==c ){ 1637 zOut[nOut++] = tbl[x].cTo0; 1638 if( tbl[x].cTo1 ){ 1639 zOut[nOut++] = tbl[x].cTo1; 1640 if( tbl[x].cTo2 ){ 1641 zOut[nOut++] = tbl[x].cTo2; 1642 if( tbl[x].cTo3 ){ 1643 zOut[nOut++] = tbl[x].cTo3; 1644 } 1645 } 1646 } 1647 c = 0; 1648 break; 1649 }else if( tbl[x].cFrom>c ){ 1650 xTop = x-1; 1651 }else{ 1652 xBtm = x+1; 1653 } 1654 } 1655 if( c ) zOut[nOut++] = '?'; 1656 } 1657 } 1658 zOut[nOut] = 0; 1659 return zOut; 1660 } 1661 1662 /* 1663 ** Return the number of characters in the shortest prefix of the input 1664 ** string that transliterates to an ASCII string nTrans bytes or longer. 1665 ** Or, if the transliteration of the input string is less than nTrans 1666 ** bytes in size, return the number of characters in the input string. 1667 */ 1668 static int translen_to_charlen(const char *zIn, int nIn, int nTrans){ 1669 int i, c, sz, nOut; 1670 int nChar; 1671 1672 i = nOut = 0; 1673 for(nChar=0; i<nIn && nOut<nTrans; nChar++){ 1674 c = utf8Read((const unsigned char *)&zIn[i], nIn-i, &sz); 1675 i += sz; 1676 1677 nOut++; 1678 if( c>=128 ){ 1679 int xTop, xBtm, x; 1680 const Transliteration *tbl = spellfixFindTranslit(c, &xTop); 1681 xBtm = 0; 1682 while( xTop>=xBtm ){ 1683 x = (xTop + xBtm)/2; 1684 if( tbl[x].cFrom==c ){ 1685 if( tbl[x].cTo1 ){ 1686 nOut++; 1687 if( tbl[x].cTo2 ){ 1688 nOut++; 1689 if( tbl[x].cTo3 ){ 1690 nOut++; 1691 } 1692 } 1693 } 1694 break; 1695 }else if( tbl[x].cFrom>c ){ 1696 xTop = x-1; 1697 }else{ 1698 xBtm = x+1; 1699 } 1700 } 1701 } 1702 } 1703 1704 return nChar; 1705 } 1706 1707 1708 /* 1709 ** spellfix1_translit(X) 1710 ** 1711 ** Convert a string that contains non-ASCII Roman characters into 1712 ** pure ASCII. 1713 */ 1714 static void transliterateSqlFunc( 1715 sqlite3_context *context, 1716 int argc, 1717 sqlite3_value **argv 1718 ){ 1719 const unsigned char *zIn = sqlite3_value_text(argv[0]); 1720 int nIn = sqlite3_value_bytes(argv[0]); 1721 unsigned char *zOut = transliterate(zIn, nIn); 1722 if( zOut==0 ){ 1723 sqlite3_result_error_nomem(context); 1724 }else{ 1725 sqlite3_result_text(context, (char*)zOut, -1, sqlite3_free); 1726 } 1727 } 1728 1729 /* 1730 ** spellfix1_scriptcode(X) 1731 ** 1732 ** Try to determine the dominant script used by the word X and return 1733 ** its ISO 15924 numeric code. 1734 ** 1735 ** The current implementation only understands the following scripts: 1736 ** 1737 ** 215 (Latin) 1738 ** 220 (Cyrillic) 1739 ** 200 (Greek) 1740 ** 1741 ** This routine will return 998 if the input X contains characters from 1742 ** two or more of the above scripts or 999 if X contains no characters 1743 ** from any of the above scripts. 1744 */ 1745 static void scriptCodeSqlFunc( 1746 sqlite3_context *context, 1747 int argc, 1748 sqlite3_value **argv 1749 ){ 1750 const unsigned char *zIn = sqlite3_value_text(argv[0]); 1751 int nIn = sqlite3_value_bytes(argv[0]); 1752 int c, sz; 1753 int scriptMask = 0; 1754 int res; 1755 int seenDigit = 0; 1756 # define SCRIPT_LATIN 0x0001 1757 # define SCRIPT_CYRILLIC 0x0002 1758 # define SCRIPT_GREEK 0x0004 1759 # define SCRIPT_HEBREW 0x0008 1760 # define SCRIPT_ARABIC 0x0010 1761 1762 while( nIn>0 ){ 1763 c = utf8Read(zIn, nIn, &sz); 1764 zIn += sz; 1765 nIn -= sz; 1766 if( c<0x02af ){ 1767 if( c>=0x80 || midClass[c&0x7f]<CCLASS_DIGIT ){ 1768 scriptMask |= SCRIPT_LATIN; 1769 }else if( c>='0' && c<='9' ){ 1770 seenDigit = 1; 1771 } 1772 }else if( c>=0x0400 && c<=0x04ff ){ 1773 scriptMask |= SCRIPT_CYRILLIC; 1774 }else if( c>=0x0386 && c<=0x03ce ){ 1775 scriptMask |= SCRIPT_GREEK; 1776 }else if( c>=0x0590 && c<=0x05ff ){ 1777 scriptMask |= SCRIPT_HEBREW; 1778 }else if( c>=0x0600 && c<=0x06ff ){ 1779 scriptMask |= SCRIPT_ARABIC; 1780 } 1781 } 1782 if( scriptMask==0 && seenDigit ) scriptMask = SCRIPT_LATIN; 1783 switch( scriptMask ){ 1784 case 0: res = 999; break; 1785 case SCRIPT_LATIN: res = 215; break; 1786 case SCRIPT_CYRILLIC: res = 220; break; 1787 case SCRIPT_GREEK: res = 200; break; 1788 case SCRIPT_HEBREW: res = 125; break; 1789 case SCRIPT_ARABIC: res = 160; break; 1790 default: res = 998; break; 1791 } 1792 sqlite3_result_int(context, res); 1793 } 1794 1795 /* End transliterate 1796 ****************************************************************************** 1797 ****************************************************************************** 1798 ** Begin spellfix1 virtual table. 1799 */ 1800 1801 /* Maximum length of a phonehash used for querying the shadow table */ 1802 #define SPELLFIX_MX_HASH 32 1803 1804 /* Maximum number of hash strings to examine per query */ 1805 #define SPELLFIX_MX_RUN 1 1806 1807 typedef struct spellfix1_vtab spellfix1_vtab; 1808 typedef struct spellfix1_cursor spellfix1_cursor; 1809 1810 /* Fuzzy-search virtual table object */ 1811 struct spellfix1_vtab { 1812 sqlite3_vtab base; /* Base class - must be first */ 1813 sqlite3 *db; /* Database connection */ 1814 char *zDbName; /* Name of database holding this table */ 1815 char *zTableName; /* Name of the virtual table */ 1816 char *zCostTable; /* Table holding edit-distance cost numbers */ 1817 EditDist3Config *pConfig3; /* Parsed edit distance costs */ 1818 }; 1819 1820 /* Fuzzy-search cursor object */ 1821 struct spellfix1_cursor { 1822 sqlite3_vtab_cursor base; /* Base class - must be first */ 1823 spellfix1_vtab *pVTab; /* The table to which this cursor belongs */ 1824 char *zPattern; /* rhs of MATCH clause */ 1825 int idxNum; /* idxNum value passed to xFilter() */ 1826 int nRow; /* Number of rows of content */ 1827 int nAlloc; /* Number of allocated rows */ 1828 int iRow; /* Current row of content */ 1829 int iLang; /* Value of the langid= constraint */ 1830 int iTop; /* Value of the top= constraint */ 1831 int iScope; /* Value of the scope= constraint */ 1832 int nSearch; /* Number of vocabulary items checked */ 1833 sqlite3_stmt *pFullScan; /* Shadow query for a full table scan */ 1834 struct spellfix1_row { /* For each row of content */ 1835 sqlite3_int64 iRowid; /* Rowid for this row */ 1836 char *zWord; /* Text for this row */ 1837 int iRank; /* Rank for this row */ 1838 int iDistance; /* Distance from pattern for this row */ 1839 int iScore; /* Score for sorting */ 1840 int iMatchlen; /* Value of matchlen column (or -1) */ 1841 char zHash[SPELLFIX_MX_HASH]; /* the phonehash used for this match */ 1842 } *a; 1843 }; 1844 1845 /* 1846 ** Construct one or more SQL statements from the format string given 1847 ** and then evaluate those statements. The success code is written 1848 ** into *pRc. 1849 ** 1850 ** If *pRc is initially non-zero then this routine is a no-op. 1851 */ 1852 static void spellfix1DbExec( 1853 int *pRc, /* Success code */ 1854 sqlite3 *db, /* Database in which to run SQL */ 1855 const char *zFormat, /* Format string for SQL */ 1856 ... /* Arguments to the format string */ 1857 ){ 1858 va_list ap; 1859 char *zSql; 1860 if( *pRc ) return; 1861 va_start(ap, zFormat); 1862 zSql = sqlite3_vmprintf(zFormat, ap); 1863 va_end(ap); 1864 if( zSql==0 ){ 1865 *pRc = SQLITE_NOMEM; 1866 }else{ 1867 *pRc = sqlite3_exec(db, zSql, 0, 0, 0); 1868 sqlite3_free(zSql); 1869 } 1870 } 1871 1872 /* 1873 ** xDisconnect/xDestroy method for the fuzzy-search module. 1874 */ 1875 static int spellfix1Uninit(int isDestroy, sqlite3_vtab *pVTab){ 1876 spellfix1_vtab *p = (spellfix1_vtab*)pVTab; 1877 int rc = SQLITE_OK; 1878 if( isDestroy ){ 1879 sqlite3 *db = p->db; 1880 spellfix1DbExec(&rc, db, "DROP TABLE IF EXISTS \"%w\".\"%w_vocab\"", 1881 p->zDbName, p->zTableName); 1882 } 1883 if( rc==SQLITE_OK ){ 1884 sqlite3_free(p->zTableName); 1885 editDist3ConfigDelete(p->pConfig3); 1886 sqlite3_free(p->zCostTable); 1887 sqlite3_free(p); 1888 } 1889 return rc; 1890 } 1891 static int spellfix1Disconnect(sqlite3_vtab *pVTab){ 1892 return spellfix1Uninit(0, pVTab); 1893 } 1894 static int spellfix1Destroy(sqlite3_vtab *pVTab){ 1895 return spellfix1Uninit(1, pVTab); 1896 } 1897 1898 /* 1899 ** Make a copy of a string. Remove leading and trailing whitespace 1900 ** and dequote it. 1901 */ 1902 static char *spellfix1Dequote(const char *zIn){ 1903 char *zOut; 1904 int i, j; 1905 char c; 1906 while( isspace((unsigned char)zIn[0]) ) zIn++; 1907 zOut = sqlite3_mprintf("%s", zIn); 1908 if( zOut==0 ) return 0; 1909 i = (int)strlen(zOut); 1910 #if 0 /* The parser will never leave spaces at the end */ 1911 while( i>0 && isspace(zOut[i-1]) ){ i--; } 1912 #endif 1913 zOut[i] = 0; 1914 c = zOut[0]; 1915 if( c=='\'' || c=='"' ){ 1916 for(i=1, j=0; ALWAYS(zOut[i]); i++){ 1917 zOut[j++] = zOut[i]; 1918 if( zOut[i]==c ){ 1919 if( zOut[i+1]==c ){ 1920 i++; 1921 }else{ 1922 zOut[j-1] = 0; 1923 break; 1924 } 1925 } 1926 } 1927 } 1928 return zOut; 1929 } 1930 1931 1932 /* 1933 ** xConnect/xCreate method for the spellfix1 module. Arguments are: 1934 ** 1935 ** argv[0] -> module name ("spellfix1") 1936 ** argv[1] -> database name 1937 ** argv[2] -> table name 1938 ** argv[3].. -> optional arguments (i.e. "edit_cost_table" parameter) 1939 */ 1940 static int spellfix1Init( 1941 int isCreate, 1942 sqlite3 *db, 1943 void *pAux, 1944 int argc, const char *const*argv, 1945 sqlite3_vtab **ppVTab, 1946 char **pzErr 1947 ){ 1948 spellfix1_vtab *pNew = 0; 1949 /* const char *zModule = argv[0]; // not used */ 1950 const char *zDbName = argv[1]; 1951 const char *zTableName = argv[2]; 1952 int nDbName; 1953 int rc = SQLITE_OK; 1954 int i; 1955 1956 nDbName = (int)strlen(zDbName); 1957 pNew = sqlite3_malloc64( sizeof(*pNew) + nDbName + 1); 1958 if( pNew==0 ){ 1959 rc = SQLITE_NOMEM; 1960 }else{ 1961 memset(pNew, 0, sizeof(*pNew)); 1962 pNew->zDbName = (char*)&pNew[1]; 1963 memcpy(pNew->zDbName, zDbName, nDbName+1); 1964 pNew->zTableName = sqlite3_mprintf("%s", zTableName); 1965 pNew->db = db; 1966 if( pNew->zTableName==0 ){ 1967 rc = SQLITE_NOMEM; 1968 }else{ 1969 rc = sqlite3_declare_vtab(db, 1970 "CREATE TABLE x(word,rank,distance,langid, " 1971 "score, matchlen, phonehash HIDDEN, " 1972 "top HIDDEN, scope HIDDEN, srchcnt HIDDEN, " 1973 "soundslike HIDDEN, command HIDDEN)" 1974 ); 1975 #define SPELLFIX_COL_WORD 0 1976 #define SPELLFIX_COL_RANK 1 1977 #define SPELLFIX_COL_DISTANCE 2 1978 #define SPELLFIX_COL_LANGID 3 1979 #define SPELLFIX_COL_SCORE 4 1980 #define SPELLFIX_COL_MATCHLEN 5 1981 #define SPELLFIX_COL_PHONEHASH 6 1982 #define SPELLFIX_COL_TOP 7 1983 #define SPELLFIX_COL_SCOPE 8 1984 #define SPELLFIX_COL_SRCHCNT 9 1985 #define SPELLFIX_COL_SOUNDSLIKE 10 1986 #define SPELLFIX_COL_COMMAND 11 1987 } 1988 if( rc==SQLITE_OK && isCreate ){ 1989 spellfix1DbExec(&rc, db, 1990 "CREATE TABLE IF NOT EXISTS \"%w\".\"%w_vocab\"(\n" 1991 " id INTEGER PRIMARY KEY,\n" 1992 " rank INT,\n" 1993 " langid INT,\n" 1994 " word TEXT,\n" 1995 " k1 TEXT,\n" 1996 " k2 TEXT\n" 1997 ");\n", 1998 zDbName, zTableName 1999 ); 2000 spellfix1DbExec(&rc, db, 2001 "CREATE INDEX IF NOT EXISTS \"%w\".\"%w_vocab_index_langid_k2\" " 2002 "ON \"%w_vocab\"(langid,k2);", 2003 zDbName, zTableName, zTableName 2004 ); 2005 } 2006 for(i=3; rc==SQLITE_OK && i<argc; i++){ 2007 if( strncmp(argv[i],"edit_cost_table=",16)==0 && pNew->zCostTable==0 ){ 2008 pNew->zCostTable = spellfix1Dequote(&argv[i][16]); 2009 if( pNew->zCostTable==0 ) rc = SQLITE_NOMEM; 2010 continue; 2011 } 2012 *pzErr = sqlite3_mprintf("bad argument to spellfix1(): \"%s\"", argv[i]); 2013 rc = SQLITE_ERROR; 2014 } 2015 } 2016 2017 if( rc && pNew ){ 2018 *ppVTab = 0; 2019 spellfix1Uninit(0, &pNew->base); 2020 }else{ 2021 *ppVTab = (sqlite3_vtab *)pNew; 2022 } 2023 return rc; 2024 } 2025 2026 /* 2027 ** The xConnect and xCreate methods 2028 */ 2029 static int spellfix1Connect( 2030 sqlite3 *db, 2031 void *pAux, 2032 int argc, const char *const*argv, 2033 sqlite3_vtab **ppVTab, 2034 char **pzErr 2035 ){ 2036 return spellfix1Init(0, db, pAux, argc, argv, ppVTab, pzErr); 2037 } 2038 static int spellfix1Create( 2039 sqlite3 *db, 2040 void *pAux, 2041 int argc, const char *const*argv, 2042 sqlite3_vtab **ppVTab, 2043 char **pzErr 2044 ){ 2045 return spellfix1Init(1, db, pAux, argc, argv, ppVTab, pzErr); 2046 } 2047 2048 /* 2049 ** Clear all of the content from a cursor. 2050 */ 2051 static void spellfix1ResetCursor(spellfix1_cursor *pCur){ 2052 int i; 2053 for(i=0; i<pCur->nRow; i++){ 2054 sqlite3_free(pCur->a[i].zWord); 2055 } 2056 pCur->nRow = 0; 2057 pCur->iRow = 0; 2058 pCur->nSearch = 0; 2059 if( pCur->pFullScan ){ 2060 sqlite3_finalize(pCur->pFullScan); 2061 pCur->pFullScan = 0; 2062 } 2063 } 2064 2065 /* 2066 ** Resize the cursor to hold up to N rows of content 2067 */ 2068 static void spellfix1ResizeCursor(spellfix1_cursor *pCur, int N){ 2069 struct spellfix1_row *aNew; 2070 assert( N>=pCur->nRow ); 2071 aNew = sqlite3_realloc64(pCur->a, sizeof(pCur->a[0])*N); 2072 if( aNew==0 && N>0 ){ 2073 spellfix1ResetCursor(pCur); 2074 sqlite3_free(pCur->a); 2075 pCur->nAlloc = 0; 2076 pCur->a = 0; 2077 }else{ 2078 pCur->nAlloc = N; 2079 pCur->a = aNew; 2080 } 2081 } 2082 2083 2084 /* 2085 ** Close a fuzzy-search cursor. 2086 */ 2087 static int spellfix1Close(sqlite3_vtab_cursor *cur){ 2088 spellfix1_cursor *pCur = (spellfix1_cursor *)cur; 2089 spellfix1ResetCursor(pCur); 2090 spellfix1ResizeCursor(pCur, 0); 2091 sqlite3_free(pCur->zPattern); 2092 sqlite3_free(pCur); 2093 return SQLITE_OK; 2094 } 2095 2096 #define SPELLFIX_IDXNUM_MATCH 0x01 /* word MATCH $str */ 2097 #define SPELLFIX_IDXNUM_LANGID 0x02 /* langid == $langid */ 2098 #define SPELLFIX_IDXNUM_TOP 0x04 /* top = $top */ 2099 #define SPELLFIX_IDXNUM_SCOPE 0x08 /* scope = $scope */ 2100 #define SPELLFIX_IDXNUM_DISTLT 0x10 /* distance < $distance */ 2101 #define SPELLFIX_IDXNUM_DISTLE 0x20 /* distance <= $distance */ 2102 #define SPELLFIX_IDXNUM_ROWID 0x40 /* rowid = $rowid */ 2103 #define SPELLFIX_IDXNUM_DIST (0x10|0x20) /* DISTLT and DISTLE */ 2104 2105 /* 2106 ** 2107 ** The plan number is a bitmask of the SPELLFIX_IDXNUM_* values defined 2108 ** above. 2109 ** 2110 ** filter.argv[*] values contains $str, $langid, $top, $scope and $rowid 2111 ** if specified and in that order. 2112 */ 2113 static int spellfix1BestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){ 2114 int iPlan = 0; 2115 int iLangTerm = -1; 2116 int iTopTerm = -1; 2117 int iScopeTerm = -1; 2118 int iDistTerm = -1; 2119 int iRowidTerm = -1; 2120 int i; 2121 const struct sqlite3_index_constraint *pConstraint; 2122 pConstraint = pIdxInfo->aConstraint; 2123 for(i=0; i<pIdxInfo->nConstraint; i++, pConstraint++){ 2124 if( pConstraint->usable==0 ) continue; 2125 2126 /* Terms of the form: word MATCH $str */ 2127 if( (iPlan & SPELLFIX_IDXNUM_MATCH)==0 2128 && pConstraint->iColumn==SPELLFIX_COL_WORD 2129 && pConstraint->op==SQLITE_INDEX_CONSTRAINT_MATCH 2130 ){ 2131 iPlan |= SPELLFIX_IDXNUM_MATCH; 2132 pIdxInfo->aConstraintUsage[i].argvIndex = 1; 2133 pIdxInfo->aConstraintUsage[i].omit = 1; 2134 } 2135 2136 /* Terms of the form: langid = $langid */ 2137 if( (iPlan & SPELLFIX_IDXNUM_LANGID)==0 2138 && pConstraint->iColumn==SPELLFIX_COL_LANGID 2139 && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ 2140 ){ 2141 iPlan |= SPELLFIX_IDXNUM_LANGID; 2142 iLangTerm = i; 2143 } 2144 2145 /* Terms of the form: top = $top */ 2146 if( (iPlan & SPELLFIX_IDXNUM_TOP)==0 2147 && pConstraint->iColumn==SPELLFIX_COL_TOP 2148 && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ 2149 ){ 2150 iPlan |= SPELLFIX_IDXNUM_TOP; 2151 iTopTerm = i; 2152 } 2153 2154 /* Terms of the form: scope = $scope */ 2155 if( (iPlan & SPELLFIX_IDXNUM_SCOPE)==0 2156 && pConstraint->iColumn==SPELLFIX_COL_SCOPE 2157 && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ 2158 ){ 2159 iPlan |= SPELLFIX_IDXNUM_SCOPE; 2160 iScopeTerm = i; 2161 } 2162 2163 /* Terms of the form: distance < $dist or distance <= $dist */ 2164 if( (iPlan & SPELLFIX_IDXNUM_DIST)==0 2165 && pConstraint->iColumn==SPELLFIX_COL_DISTANCE 2166 && (pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT 2167 || pConstraint->op==SQLITE_INDEX_CONSTRAINT_LE) 2168 ){ 2169 if( pConstraint->op==SQLITE_INDEX_CONSTRAINT_LT ){ 2170 iPlan |= SPELLFIX_IDXNUM_DISTLT; 2171 }else{ 2172 iPlan |= SPELLFIX_IDXNUM_DISTLE; 2173 } 2174 iDistTerm = i; 2175 } 2176 2177 /* Terms of the form: distance < $dist or distance <= $dist */ 2178 if( (iPlan & SPELLFIX_IDXNUM_ROWID)==0 2179 && pConstraint->iColumn<0 2180 && pConstraint->op==SQLITE_INDEX_CONSTRAINT_EQ 2181 ){ 2182 iPlan |= SPELLFIX_IDXNUM_ROWID; 2183 iRowidTerm = i; 2184 } 2185 } 2186 if( iPlan&SPELLFIX_IDXNUM_MATCH ){ 2187 int idx = 2; 2188 pIdxInfo->idxNum = iPlan; 2189 if( pIdxInfo->nOrderBy==1 2190 && pIdxInfo->aOrderBy[0].iColumn==SPELLFIX_COL_SCORE 2191 && pIdxInfo->aOrderBy[0].desc==0 2192 ){ 2193 pIdxInfo->orderByConsumed = 1; /* Default order by iScore */ 2194 } 2195 if( iPlan&SPELLFIX_IDXNUM_LANGID ){ 2196 pIdxInfo->aConstraintUsage[iLangTerm].argvIndex = idx++; 2197 pIdxInfo->aConstraintUsage[iLangTerm].omit = 1; 2198 } 2199 if( iPlan&SPELLFIX_IDXNUM_TOP ){ 2200 pIdxInfo->aConstraintUsage[iTopTerm].argvIndex = idx++; 2201 pIdxInfo->aConstraintUsage[iTopTerm].omit = 1; 2202 } 2203 if( iPlan&SPELLFIX_IDXNUM_SCOPE ){ 2204 pIdxInfo->aConstraintUsage[iScopeTerm].argvIndex = idx++; 2205 pIdxInfo->aConstraintUsage[iScopeTerm].omit = 1; 2206 } 2207 if( iPlan&SPELLFIX_IDXNUM_DIST ){ 2208 pIdxInfo->aConstraintUsage[iDistTerm].argvIndex = idx++; 2209 pIdxInfo->aConstraintUsage[iDistTerm].omit = 1; 2210 } 2211 pIdxInfo->estimatedCost = 1e5; 2212 }else if( (iPlan & SPELLFIX_IDXNUM_ROWID) ){ 2213 pIdxInfo->idxNum = SPELLFIX_IDXNUM_ROWID; 2214 pIdxInfo->aConstraintUsage[iRowidTerm].argvIndex = 1; 2215 pIdxInfo->aConstraintUsage[iRowidTerm].omit = 1; 2216 pIdxInfo->estimatedCost = 5; 2217 }else{ 2218 pIdxInfo->idxNum = 0; 2219 pIdxInfo->estimatedCost = 1e50; 2220 } 2221 return SQLITE_OK; 2222 } 2223 2224 /* 2225 ** Open a new fuzzy-search cursor. 2226 */ 2227 static int spellfix1Open(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){ 2228 spellfix1_vtab *p = (spellfix1_vtab*)pVTab; 2229 spellfix1_cursor *pCur; 2230 pCur = sqlite3_malloc64( sizeof(*pCur) ); 2231 if( pCur==0 ) return SQLITE_NOMEM; 2232 memset(pCur, 0, sizeof(*pCur)); 2233 pCur->pVTab = p; 2234 *ppCursor = &pCur->base; 2235 return SQLITE_OK; 2236 } 2237 2238 /* 2239 ** Adjust a distance measurement by the words rank in order to show 2240 ** preference to common words. 2241 */ 2242 static int spellfix1Score(int iDistance, int iRank){ 2243 int iLog2; 2244 for(iLog2=0; iRank>0; iLog2++, iRank>>=1){} 2245 return iDistance + 32 - iLog2; 2246 } 2247 2248 /* 2249 ** Compare two spellfix1_row objects for sorting purposes in qsort() such 2250 ** that they sort in order of increasing distance. 2251 */ 2252 static int SQLITE_CDECL spellfix1RowCompare(const void *A, const void *B){ 2253 const struct spellfix1_row *a = (const struct spellfix1_row*)A; 2254 const struct spellfix1_row *b = (const struct spellfix1_row*)B; 2255 return a->iScore - b->iScore; 2256 } 2257 2258 /* 2259 ** A structure used to pass information from spellfix1FilterForMatch() 2260 ** into spellfix1RunQuery(). 2261 */ 2262 typedef struct MatchQuery { 2263 spellfix1_cursor *pCur; /* The cursor being queried */ 2264 sqlite3_stmt *pStmt; /* shadow table query statment */ 2265 char zHash[SPELLFIX_MX_HASH]; /* The current phonehash for zPattern */ 2266 const char *zPattern; /* Transliterated input string */ 2267 int nPattern; /* Length of zPattern */ 2268 EditDist3FromString *pMatchStr3; /* Original unicode string */ 2269 EditDist3Config *pConfig3; /* Edit-distance cost coefficients */ 2270 const EditDist3Lang *pLang; /* The selected language coefficients */ 2271 int iLang; /* The language id */ 2272 int iScope; /* Default scope */ 2273 int iMaxDist; /* Maximum allowed edit distance, or -1 */ 2274 int rc; /* Error code */ 2275 int nRun; /* Number of prior runs for the same zPattern */ 2276 char azPrior[SPELLFIX_MX_RUN][SPELLFIX_MX_HASH]; /* Prior hashes */ 2277 } MatchQuery; 2278 2279 /* 2280 ** Run a query looking for the best matches against zPattern using 2281 ** zHash as the character class seed hash. 2282 */ 2283 static void spellfix1RunQuery(MatchQuery *p, const char *zQuery, int nQuery){ 2284 const char *zK1; 2285 const char *zWord; 2286 int iDist; 2287 int iRank; 2288 int iScore; 2289 int iWorst = 0; 2290 int idx; 2291 int idxWorst = -1; 2292 int i; 2293 int iScope = p->iScope; 2294 spellfix1_cursor *pCur = p->pCur; 2295 sqlite3_stmt *pStmt = p->pStmt; 2296 char zHash1[SPELLFIX_MX_HASH]; 2297 char zHash2[SPELLFIX_MX_HASH]; 2298 char *zClass; 2299 int nClass; 2300 int rc; 2301 2302 if( pCur->a==0 || p->rc ) return; /* Prior memory allocation failure */ 2303 zClass = (char*)phoneticHash((unsigned char*)zQuery, nQuery); 2304 if( zClass==0 ){ 2305 p->rc = SQLITE_NOMEM; 2306 return; 2307 } 2308 nClass = (int)strlen(zClass); 2309 if( nClass>SPELLFIX_MX_HASH-2 ){ 2310 nClass = SPELLFIX_MX_HASH-2; 2311 zClass[nClass] = 0; 2312 } 2313 if( nClass<=iScope ){ 2314 if( nClass>2 ){ 2315 iScope = nClass-1; 2316 }else{ 2317 iScope = nClass; 2318 } 2319 } 2320 memcpy(zHash1, zClass, iScope); 2321 sqlite3_free(zClass); 2322 zHash1[iScope] = 0; 2323 memcpy(zHash2, zHash1, iScope); 2324 zHash2[iScope] = 'Z'; 2325 zHash2[iScope+1] = 0; 2326 #if SPELLFIX_MX_RUN>1 2327 for(i=0; i<p->nRun; i++){ 2328 if( strcmp(p->azPrior[i], zHash1)==0 ) return; 2329 } 2330 #endif 2331 assert( p->nRun<SPELLFIX_MX_RUN ); 2332 memcpy(p->azPrior[p->nRun++], zHash1, iScope+1); 2333 if( sqlite3_bind_text(pStmt, 1, zHash1, -1, SQLITE_STATIC)==SQLITE_NOMEM 2334 || sqlite3_bind_text(pStmt, 2, zHash2, -1, SQLITE_STATIC)==SQLITE_NOMEM 2335 ){ 2336 p->rc = SQLITE_NOMEM; 2337 return; 2338 } 2339 #if SPELLFIX_MX_RUN>1 2340 for(i=0; i<pCur->nRow; i++){ 2341 if( pCur->a[i].iScore>iWorst ){ 2342 iWorst = pCur->a[i].iScore; 2343 idxWorst = i; 2344 } 2345 } 2346 #endif 2347 while( sqlite3_step(pStmt)==SQLITE_ROW ){ 2348 int iMatchlen = -1; 2349 iRank = sqlite3_column_int(pStmt, 2); 2350 if( p->pMatchStr3 ){ 2351 int nWord = sqlite3_column_bytes(pStmt, 1); 2352 zWord = (const char*)sqlite3_column_text(pStmt, 1); 2353 iDist = editDist3Core(p->pMatchStr3, zWord, nWord, p->pLang, &iMatchlen); 2354 }else{ 2355 zK1 = (const char*)sqlite3_column_text(pStmt, 3); 2356 if( zK1==0 ) continue; 2357 iDist = editdist1(p->zPattern, zK1, 0); 2358 } 2359 if( iDist<0 ){ 2360 p->rc = SQLITE_NOMEM; 2361 break; 2362 } 2363 pCur->nSearch++; 2364 2365 /* If there is a "distance < $dist" or "distance <= $dist" constraint, 2366 ** check if this row meets it. If not, jump back up to the top of the 2367 ** loop to process the next row. Otherwise, if the row does match the 2368 ** distance constraint, check if the pCur->a[] array is already full. 2369 ** If it is and no explicit "top = ?" constraint was present in the 2370 ** query, grow the array to ensure there is room for the new entry. */ 2371 assert( (p->iMaxDist>=0)==((pCur->idxNum & SPELLFIX_IDXNUM_DIST) ? 1 : 0) ); 2372 if( p->iMaxDist>=0 ){ 2373 if( iDist>p->iMaxDist ) continue; 2374 if( pCur->nRow>=pCur->nAlloc && (pCur->idxNum & SPELLFIX_IDXNUM_TOP)==0 ){ 2375 spellfix1ResizeCursor(pCur, pCur->nAlloc*2 + 10); 2376 if( pCur->a==0 ) break; 2377 } 2378 } 2379 2380 iScore = spellfix1Score(iDist,iRank); 2381 if( pCur->nRow<pCur->nAlloc ){ 2382 idx = pCur->nRow; 2383 }else if( iScore<iWorst ){ 2384 idx = idxWorst; 2385 sqlite3_free(pCur->a[idx].zWord); 2386 }else{ 2387 continue; 2388 } 2389 2390 pCur->a[idx].zWord = sqlite3_mprintf("%s", sqlite3_column_text(pStmt, 1)); 2391 if( pCur->a[idx].zWord==0 ){ 2392 p->rc = SQLITE_NOMEM; 2393 break; 2394 } 2395 pCur->a[idx].iRowid = sqlite3_column_int64(pStmt, 0); 2396 pCur->a[idx].iRank = iRank; 2397 pCur->a[idx].iDistance = iDist; 2398 pCur->a[idx].iScore = iScore; 2399 pCur->a[idx].iMatchlen = iMatchlen; 2400 memcpy(pCur->a[idx].zHash, zHash1, iScope+1); 2401 if( pCur->nRow<pCur->nAlloc ) pCur->nRow++; 2402 if( pCur->nRow==pCur->nAlloc ){ 2403 iWorst = pCur->a[0].iScore; 2404 idxWorst = 0; 2405 for(i=1; i<pCur->nRow; i++){ 2406 iScore = pCur->a[i].iScore; 2407 if( iWorst<iScore ){ 2408 iWorst = iScore; 2409 idxWorst = i; 2410 } 2411 } 2412 } 2413 } 2414 rc = sqlite3_reset(pStmt); 2415 if( rc ) p->rc = rc; 2416 } 2417 2418 /* 2419 ** This version of the xFilter method work if the MATCH term is present 2420 ** and we are doing a scan. 2421 */ 2422 static int spellfix1FilterForMatch( 2423 spellfix1_cursor *pCur, 2424 int argc, 2425 sqlite3_value **argv 2426 ){ 2427 int idxNum = pCur->idxNum; 2428 const unsigned char *zMatchThis; /* RHS of the MATCH operator */ 2429 EditDist3FromString *pMatchStr3 = 0; /* zMatchThis as an editdist string */ 2430 char *zPattern; /* Transliteration of zMatchThis */ 2431 int nPattern; /* Length of zPattern */ 2432 int iLimit = 20; /* Max number of rows of output */ 2433 int iScope = 3; /* Use this many characters of zClass */ 2434 int iLang = 0; /* Language code */ 2435 char *zSql; /* SQL of shadow table query */ 2436 sqlite3_stmt *pStmt = 0; /* Shadow table query */ 2437 int rc; /* Result code */ 2438 int idx = 1; /* Next available filter parameter */ 2439 spellfix1_vtab *p = pCur->pVTab; /* The virtual table that owns pCur */ 2440 MatchQuery x; /* For passing info to RunQuery() */ 2441 2442 /* Load the cost table if we have not already done so */ 2443 if( p->zCostTable!=0 && p->pConfig3==0 ){ 2444 p->pConfig3 = sqlite3_malloc64( sizeof(p->pConfig3[0]) ); 2445 if( p->pConfig3==0 ) return SQLITE_NOMEM; 2446 memset(p->pConfig3, 0, sizeof(p->pConfig3[0])); 2447 rc = editDist3ConfigLoad(p->pConfig3, p->db, p->zCostTable); 2448 if( rc ) return rc; 2449 } 2450 memset(&x, 0, sizeof(x)); 2451 x.iScope = 3; /* Default scope if none specified by "WHERE scope=N" */ 2452 x.iMaxDist = -1; /* Maximum allowed edit distance */ 2453 2454 if( idxNum&2 ){ 2455 iLang = sqlite3_value_int(argv[idx++]); 2456 } 2457 if( idxNum&4 ){ 2458 iLimit = sqlite3_value_int(argv[idx++]); 2459 if( iLimit<1 ) iLimit = 1; 2460 } 2461 if( idxNum&8 ){ 2462 x.iScope = sqlite3_value_int(argv[idx++]); 2463 if( x.iScope<1 ) x.iScope = 1; 2464 if( x.iScope>SPELLFIX_MX_HASH-2 ) x.iScope = SPELLFIX_MX_HASH-2; 2465 } 2466 if( idxNum&(16|32) ){ 2467 x.iMaxDist = sqlite3_value_int(argv[idx++]); 2468 if( idxNum&16 ) x.iMaxDist--; 2469 if( x.iMaxDist<0 ) x.iMaxDist = 0; 2470 } 2471 spellfix1ResetCursor(pCur); 2472 spellfix1ResizeCursor(pCur, iLimit); 2473 zMatchThis = sqlite3_value_text(argv[0]); 2474 if( zMatchThis==0 ) return SQLITE_OK; 2475 if( p->pConfig3 ){ 2476 x.pLang = editDist3FindLang(p->pConfig3, iLang); 2477 pMatchStr3 = editDist3FromStringNew(x.pLang, (const char*)zMatchThis, -1); 2478 if( pMatchStr3==0 ){ 2479 x.rc = SQLITE_NOMEM; 2480 goto filter_exit; 2481 } 2482 }else{ 2483 x.pLang = 0; 2484 } 2485 zPattern = (char*)transliterate(zMatchThis, sqlite3_value_bytes(argv[0])); 2486 sqlite3_free(pCur->zPattern); 2487 pCur->zPattern = zPattern; 2488 if( zPattern==0 ){ 2489 x.rc = SQLITE_NOMEM; 2490 goto filter_exit; 2491 } 2492 nPattern = (int)strlen(zPattern); 2493 if( zPattern[nPattern-1]=='*' ) nPattern--; 2494 zSql = sqlite3_mprintf( 2495 "SELECT id, word, rank, coalesce(k1,word)" 2496 " FROM \"%w\".\"%w_vocab\"" 2497 " WHERE langid=%d AND k2>=?1 AND k2<?2", 2498 p->zDbName, p->zTableName, iLang 2499 ); 2500 if( zSql==0 ){ 2501 x.rc = SQLITE_NOMEM; 2502 pStmt = 0; 2503 goto filter_exit; 2504 } 2505 rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0); 2506 sqlite3_free(zSql); 2507 pCur->iLang = iLang; 2508 x.pCur = pCur; 2509 x.pStmt = pStmt; 2510 x.zPattern = zPattern; 2511 x.nPattern = nPattern; 2512 x.pMatchStr3 = pMatchStr3; 2513 x.iLang = iLang; 2514 x.rc = rc; 2515 x.pConfig3 = p->pConfig3; 2516 if( x.rc==SQLITE_OK ){ 2517 spellfix1RunQuery(&x, zPattern, nPattern); 2518 } 2519 2520 if( pCur->a ){ 2521 qsort(pCur->a, pCur->nRow, sizeof(pCur->a[0]), spellfix1RowCompare); 2522 pCur->iTop = iLimit; 2523 pCur->iScope = iScope; 2524 }else{ 2525 x.rc = SQLITE_NOMEM; 2526 } 2527 2528 filter_exit: 2529 sqlite3_finalize(pStmt); 2530 editDist3FromStringDelete(pMatchStr3); 2531 return x.rc; 2532 } 2533 2534 /* 2535 ** This version of xFilter handles a full-table scan case 2536 */ 2537 static int spellfix1FilterForFullScan( 2538 spellfix1_cursor *pCur, 2539 int argc, 2540 sqlite3_value **argv 2541 ){ 2542 int rc = SQLITE_OK; 2543 int idxNum = pCur->idxNum; 2544 char *zSql; 2545 spellfix1_vtab *pVTab = pCur->pVTab; 2546 spellfix1ResetCursor(pCur); 2547 assert( idxNum==0 || idxNum==64 ); 2548 zSql = sqlite3_mprintf( 2549 "SELECT word, rank, NULL, langid, id FROM \"%w\".\"%w_vocab\"%s", 2550 pVTab->zDbName, pVTab->zTableName, 2551 ((idxNum & 64) ? " WHERE rowid=?" : "") 2552 ); 2553 if( zSql==0 ) return SQLITE_NOMEM; 2554 rc = sqlite3_prepare_v2(pVTab->db, zSql, -1, &pCur->pFullScan, 0); 2555 sqlite3_free(zSql); 2556 if( rc==SQLITE_OK && (idxNum & 64) ){ 2557 assert( argc==1 ); 2558 rc = sqlite3_bind_value(pCur->pFullScan, 1, argv[0]); 2559 } 2560 pCur->nRow = pCur->iRow = 0; 2561 if( rc==SQLITE_OK ){ 2562 rc = sqlite3_step(pCur->pFullScan); 2563 if( rc==SQLITE_ROW ){ pCur->iRow = -1; rc = SQLITE_OK; } 2564 if( rc==SQLITE_DONE ){ rc = SQLITE_OK; } 2565 }else{ 2566 pCur->iRow = 0; 2567 } 2568 return rc; 2569 } 2570 2571 2572 /* 2573 ** Called to "rewind" a cursor back to the beginning so that 2574 ** it starts its output over again. Always called at least once 2575 ** prior to any spellfix1Column, spellfix1Rowid, or spellfix1Eof call. 2576 */ 2577 static int spellfix1Filter( 2578 sqlite3_vtab_cursor *cur, 2579 int idxNum, const char *idxStr, 2580 int argc, sqlite3_value **argv 2581 ){ 2582 spellfix1_cursor *pCur = (spellfix1_cursor *)cur; 2583 int rc; 2584 pCur->idxNum = idxNum; 2585 if( idxNum & 1 ){ 2586 rc = spellfix1FilterForMatch(pCur, argc, argv); 2587 }else{ 2588 rc = spellfix1FilterForFullScan(pCur, argc, argv); 2589 } 2590 return rc; 2591 } 2592 2593 2594 /* 2595 ** Advance a cursor to its next row of output 2596 */ 2597 static int spellfix1Next(sqlite3_vtab_cursor *cur){ 2598 spellfix1_cursor *pCur = (spellfix1_cursor *)cur; 2599 int rc = SQLITE_OK; 2600 if( pCur->iRow < pCur->nRow ){ 2601 if( pCur->pFullScan ){ 2602 rc = sqlite3_step(pCur->pFullScan); 2603 if( rc!=SQLITE_ROW ) pCur->iRow = pCur->nRow; 2604 if( rc==SQLITE_ROW || rc==SQLITE_DONE ) rc = SQLITE_OK; 2605 }else{ 2606 pCur->iRow++; 2607 } 2608 } 2609 return rc; 2610 } 2611 2612 /* 2613 ** Return TRUE if we are at the end-of-file 2614 */ 2615 static int spellfix1Eof(sqlite3_vtab_cursor *cur){ 2616 spellfix1_cursor *pCur = (spellfix1_cursor *)cur; 2617 return pCur->iRow>=pCur->nRow; 2618 } 2619 2620 /* 2621 ** Return columns from the current row. 2622 */ 2623 static int spellfix1Column( 2624 sqlite3_vtab_cursor *cur, 2625 sqlite3_context *ctx, 2626 int i 2627 ){ 2628 spellfix1_cursor *pCur = (spellfix1_cursor*)cur; 2629 if( pCur->pFullScan ){ 2630 if( i<=SPELLFIX_COL_LANGID ){ 2631 sqlite3_result_value(ctx, sqlite3_column_value(pCur->pFullScan, i)); 2632 }else{ 2633 sqlite3_result_null(ctx); 2634 } 2635 return SQLITE_OK; 2636 } 2637 switch( i ){ 2638 case SPELLFIX_COL_WORD: { 2639 sqlite3_result_text(ctx, pCur->a[pCur->iRow].zWord, -1, SQLITE_STATIC); 2640 break; 2641 } 2642 case SPELLFIX_COL_RANK: { 2643 sqlite3_result_int(ctx, pCur->a[pCur->iRow].iRank); 2644 break; 2645 } 2646 case SPELLFIX_COL_DISTANCE: { 2647 sqlite3_result_int(ctx, pCur->a[pCur->iRow].iDistance); 2648 break; 2649 } 2650 case SPELLFIX_COL_LANGID: { 2651 sqlite3_result_int(ctx, pCur->iLang); 2652 break; 2653 } 2654 case SPELLFIX_COL_SCORE: { 2655 sqlite3_result_int(ctx, pCur->a[pCur->iRow].iScore); 2656 break; 2657 } 2658 case SPELLFIX_COL_MATCHLEN: { 2659 int iMatchlen = pCur->a[pCur->iRow].iMatchlen; 2660 if( iMatchlen<0 ){ 2661 int nPattern = (int)strlen(pCur->zPattern); 2662 char *zWord = pCur->a[pCur->iRow].zWord; 2663 int nWord = (int)strlen(zWord); 2664 2665 if( nPattern>0 && pCur->zPattern[nPattern-1]=='*' ){ 2666 char *zTranslit; 2667 int res; 2668 zTranslit = (char *)transliterate((unsigned char *)zWord, nWord); 2669 if( !zTranslit ) return SQLITE_NOMEM; 2670 res = editdist1(pCur->zPattern, zTranslit, &iMatchlen); 2671 sqlite3_free(zTranslit); 2672 if( res<0 ) return SQLITE_NOMEM; 2673 iMatchlen = translen_to_charlen(zWord, nWord, iMatchlen); 2674 }else{ 2675 iMatchlen = utf8Charlen(zWord, nWord); 2676 } 2677 } 2678 2679 sqlite3_result_int(ctx, iMatchlen); 2680 break; 2681 } 2682 case SPELLFIX_COL_PHONEHASH: { 2683 sqlite3_result_text(ctx, pCur->a[pCur->iRow].zHash, -1, SQLITE_STATIC); 2684 break; 2685 } 2686 case SPELLFIX_COL_TOP: { 2687 sqlite3_result_int(ctx, pCur->iTop); 2688 break; 2689 } 2690 case SPELLFIX_COL_SCOPE: { 2691 sqlite3_result_int(ctx, pCur->iScope); 2692 break; 2693 } 2694 case SPELLFIX_COL_SRCHCNT: { 2695 sqlite3_result_int(ctx, pCur->nSearch); 2696 break; 2697 } 2698 default: { 2699 sqlite3_result_null(ctx); 2700 break; 2701 } 2702 } 2703 return SQLITE_OK; 2704 } 2705 2706 /* 2707 ** The rowid. 2708 */ 2709 static int spellfix1Rowid(sqlite3_vtab_cursor *cur, sqlite_int64 *pRowid){ 2710 spellfix1_cursor *pCur = (spellfix1_cursor*)cur; 2711 if( pCur->pFullScan ){ 2712 *pRowid = sqlite3_column_int64(pCur->pFullScan, 4); 2713 }else{ 2714 *pRowid = pCur->a[pCur->iRow].iRowid; 2715 } 2716 return SQLITE_OK; 2717 } 2718 2719 /* 2720 ** This function is called by the xUpdate() method. It returns a string 2721 ** containing the conflict mode that xUpdate() should use for the current 2722 ** operation. One of: "ROLLBACK", "IGNORE", "ABORT" or "REPLACE". 2723 */ 2724 static const char *spellfix1GetConflict(sqlite3 *db){ 2725 static const char *azConflict[] = { 2726 /* Note: Instead of "FAIL" - "ABORT". */ 2727 "ROLLBACK", "IGNORE", "ABORT", "ABORT", "REPLACE" 2728 }; 2729 int eConflict = sqlite3_vtab_on_conflict(db); 2730 2731 assert( eConflict==SQLITE_ROLLBACK || eConflict==SQLITE_IGNORE 2732 || eConflict==SQLITE_FAIL || eConflict==SQLITE_ABORT 2733 || eConflict==SQLITE_REPLACE 2734 ); 2735 assert( SQLITE_ROLLBACK==1 ); 2736 assert( SQLITE_IGNORE==2 ); 2737 assert( SQLITE_FAIL==3 ); 2738 assert( SQLITE_ABORT==4 ); 2739 assert( SQLITE_REPLACE==5 ); 2740 2741 return azConflict[eConflict-1]; 2742 } 2743 2744 /* 2745 ** The xUpdate() method. 2746 */ 2747 static int spellfix1Update( 2748 sqlite3_vtab *pVTab, 2749 int argc, 2750 sqlite3_value **argv, 2751 sqlite_int64 *pRowid 2752 ){ 2753 int rc = SQLITE_OK; 2754 sqlite3_int64 rowid, newRowid; 2755 spellfix1_vtab *p = (spellfix1_vtab*)pVTab; 2756 sqlite3 *db = p->db; 2757 2758 if( argc==1 ){ 2759 /* A delete operation on the rowid given by argv[0] */ 2760 rowid = *pRowid = sqlite3_value_int64(argv[0]); 2761 spellfix1DbExec(&rc, db, "DELETE FROM \"%w\".\"%w_vocab\" " 2762 " WHERE id=%lld", 2763 p->zDbName, p->zTableName, rowid); 2764 }else{ 2765 const unsigned char *zWord = sqlite3_value_text(argv[SPELLFIX_COL_WORD+2]); 2766 int nWord = sqlite3_value_bytes(argv[SPELLFIX_COL_WORD+2]); 2767 int iLang = sqlite3_value_int(argv[SPELLFIX_COL_LANGID+2]); 2768 int iRank = sqlite3_value_int(argv[SPELLFIX_COL_RANK+2]); 2769 const unsigned char *zSoundslike = 2770 sqlite3_value_text(argv[SPELLFIX_COL_SOUNDSLIKE+2]); 2771 int nSoundslike = sqlite3_value_bytes(argv[SPELLFIX_COL_SOUNDSLIKE+2]); 2772 char *zK1, *zK2; 2773 int i; 2774 char c; 2775 const char *zConflict = spellfix1GetConflict(db); 2776 2777 if( zWord==0 ){ 2778 /* Inserts of the form: INSERT INTO table(command) VALUES('xyzzy'); 2779 ** cause zWord to be NULL, so we look at the "command" column to see 2780 ** what special actions to take */ 2781 const char *zCmd = 2782 (const char*)sqlite3_value_text(argv[SPELLFIX_COL_COMMAND+2]); 2783 if( zCmd==0 ){ 2784 pVTab->zErrMsg = sqlite3_mprintf("NOT NULL constraint failed: %s.word", 2785 p->zTableName); 2786 return SQLITE_CONSTRAINT_NOTNULL; 2787 } 2788 if( strcmp(zCmd,"reset")==0 ){ 2789 /* Reset the edit cost table (if there is one). */ 2790 editDist3ConfigDelete(p->pConfig3); 2791 p->pConfig3 = 0; 2792 return SQLITE_OK; 2793 } 2794 if( strncmp(zCmd,"edit_cost_table=",16)==0 ){ 2795 editDist3ConfigDelete(p->pConfig3); 2796 p->pConfig3 = 0; 2797 sqlite3_free(p->zCostTable); 2798 p->zCostTable = spellfix1Dequote(zCmd+16); 2799 if( p->zCostTable==0 ) return SQLITE_NOMEM; 2800 if( p->zCostTable[0]==0 || sqlite3_stricmp(p->zCostTable,"null")==0 ){ 2801 sqlite3_free(p->zCostTable); 2802 p->zCostTable = 0; 2803 } 2804 return SQLITE_OK; 2805 } 2806 pVTab->zErrMsg = sqlite3_mprintf("unknown value for %s.command: \"%w\"", 2807 p->zTableName, zCmd); 2808 return SQLITE_ERROR; 2809 } 2810 if( iRank<1 ) iRank = 1; 2811 if( zSoundslike ){ 2812 zK1 = (char*)transliterate(zSoundslike, nSoundslike); 2813 }else{ 2814 zK1 = (char*)transliterate(zWord, nWord); 2815 } 2816 if( zK1==0 ) return SQLITE_NOMEM; 2817 for(i=0; (c = zK1[i])!=0; i++){ 2818 if( c>='A' && c<='Z' ) zK1[i] += 'a' - 'A'; 2819 } 2820 zK2 = (char*)phoneticHash((const unsigned char*)zK1, i); 2821 if( zK2==0 ){ 2822 sqlite3_free(zK1); 2823 return SQLITE_NOMEM; 2824 } 2825 if( sqlite3_value_type(argv[0])==SQLITE_NULL ){ 2826 if( sqlite3_value_type(argv[1])==SQLITE_NULL ){ 2827 spellfix1DbExec(&rc, db, 2828 "INSERT INTO \"%w\".\"%w_vocab\"(rank,langid,word,k1,k2) " 2829 "VALUES(%d,%d,%Q,nullif(%Q,%Q),%Q)", 2830 p->zDbName, p->zTableName, 2831 iRank, iLang, zWord, zK1, zWord, zK2 2832 ); 2833 }else{ 2834 newRowid = sqlite3_value_int64(argv[1]); 2835 spellfix1DbExec(&rc, db, 2836 "INSERT OR %s INTO \"%w\".\"%w_vocab\"(id,rank,langid,word,k1,k2) " 2837 "VALUES(%lld,%d,%d,%Q,nullif(%Q,%Q),%Q)", 2838 zConflict, p->zDbName, p->zTableName, 2839 newRowid, iRank, iLang, zWord, zK1, zWord, zK2 2840 ); 2841 } 2842 *pRowid = sqlite3_last_insert_rowid(db); 2843 }else{ 2844 rowid = sqlite3_value_int64(argv[0]); 2845 newRowid = *pRowid = sqlite3_value_int64(argv[1]); 2846 spellfix1DbExec(&rc, db, 2847 "UPDATE OR %s \"%w\".\"%w_vocab\" SET id=%lld, rank=%d, langid=%d," 2848 " word=%Q, k1=nullif(%Q,%Q), k2=%Q WHERE id=%lld", 2849 zConflict, p->zDbName, p->zTableName, newRowid, iRank, iLang, 2850 zWord, zK1, zWord, zK2, rowid 2851 ); 2852 } 2853 sqlite3_free(zK1); 2854 sqlite3_free(zK2); 2855 } 2856 return rc; 2857 } 2858 2859 /* 2860 ** Rename the spellfix1 table. 2861 */ 2862 static int spellfix1Rename(sqlite3_vtab *pVTab, const char *zNew){ 2863 spellfix1_vtab *p = (spellfix1_vtab*)pVTab; 2864 sqlite3 *db = p->db; 2865 int rc = SQLITE_OK; 2866 char *zNewName = sqlite3_mprintf("%s", zNew); 2867 if( zNewName==0 ){ 2868 return SQLITE_NOMEM; 2869 } 2870 spellfix1DbExec(&rc, db, 2871 "ALTER TABLE \"%w\".\"%w_vocab\" RENAME TO \"%w_vocab\"", 2872 p->zDbName, p->zTableName, zNewName 2873 ); 2874 if( rc==SQLITE_OK ){ 2875 sqlite3_free(p->zTableName); 2876 p->zTableName = zNewName; 2877 }else{ 2878 sqlite3_free(zNewName); 2879 } 2880 return rc; 2881 } 2882 2883 2884 /* 2885 ** A virtual table module that provides fuzzy search. 2886 */ 2887 static sqlite3_module spellfix1Module = { 2888 0, /* iVersion */ 2889 spellfix1Create, /* xCreate - handle CREATE VIRTUAL TABLE */ 2890 spellfix1Connect, /* xConnect - reconnected to an existing table */ 2891 spellfix1BestIndex, /* xBestIndex - figure out how to do a query */ 2892 spellfix1Disconnect, /* xDisconnect - close a connection */ 2893 spellfix1Destroy, /* xDestroy - handle DROP TABLE */ 2894 spellfix1Open, /* xOpen - open a cursor */ 2895 spellfix1Close, /* xClose - close a cursor */ 2896 spellfix1Filter, /* xFilter - configure scan constraints */ 2897 spellfix1Next, /* xNext - advance a cursor */ 2898 spellfix1Eof, /* xEof - check for end of scan */ 2899 spellfix1Column, /* xColumn - read data */ 2900 spellfix1Rowid, /* xRowid - read data */ 2901 spellfix1Update, /* xUpdate */ 2902 0, /* xBegin */ 2903 0, /* xSync */ 2904 0, /* xCommit */ 2905 0, /* xRollback */ 2906 0, /* xFindMethod */ 2907 spellfix1Rename, /* xRename */ 2908 }; 2909 2910 /* 2911 ** Register the various functions and the virtual table. 2912 */ 2913 static int spellfix1Register(sqlite3 *db){ 2914 int rc = SQLITE_OK; 2915 int i; 2916 rc = sqlite3_create_function(db, "spellfix1_translit", 1, 2917 SQLITE_UTF8|SQLITE_DETERMINISTIC, 0, 2918 transliterateSqlFunc, 0, 0); 2919 if( rc==SQLITE_OK ){ 2920 rc = sqlite3_create_function(db, "spellfix1_editdist", 2, 2921 SQLITE_UTF8|SQLITE_DETERMINISTIC, 0, 2922 editdistSqlFunc, 0, 0); 2923 } 2924 if( rc==SQLITE_OK ){ 2925 rc = sqlite3_create_function(db, "spellfix1_phonehash", 1, 2926 SQLITE_UTF8|SQLITE_DETERMINISTIC, 0, 2927 phoneticHashSqlFunc, 0, 0); 2928 } 2929 if( rc==SQLITE_OK ){ 2930 rc = sqlite3_create_function(db, "spellfix1_scriptcode", 1, 2931 SQLITE_UTF8|SQLITE_DETERMINISTIC, 0, 2932 scriptCodeSqlFunc, 0, 0); 2933 } 2934 if( rc==SQLITE_OK ){ 2935 rc = sqlite3_create_module(db, "spellfix1", &spellfix1Module, 0); 2936 } 2937 if( rc==SQLITE_OK ){ 2938 rc = editDist3Install(db); 2939 } 2940 2941 /* Verify sanity of the translit[] table */ 2942 for(i=0; i<sizeof(translit)/sizeof(translit[0])-1; i++){ 2943 assert( translit[i].cFrom<translit[i+1].cFrom ); 2944 } 2945 2946 return rc; 2947 } 2948 2949 #endif /* SQLITE_OMIT_VIRTUALTABLE */ 2950 2951 /* 2952 ** Extension load function. 2953 */ 2954 #ifdef _WIN32 2955 __declspec(dllexport) 2956 #endif 2957 int sqlite3_spellfix_init( 2958 sqlite3 *db, 2959 char **pzErrMsg, 2960 const sqlite3_api_routines *pApi 2961 ){ 2962 SQLITE_EXTENSION_INIT2(pApi); 2963 #ifndef SQLITE_OMIT_VIRTUALTABLE 2964 return spellfix1Register(db); 2965 #endif 2966 return SQLITE_OK; 2967 } 2968