xref: /sqlite-3.40.0/ext/misc/nextchar.c (revision 2b1c2aad)
1ea41dc44Sdrh /*
2ea41dc44Sdrh ** 2013-02-28
3ea41dc44Sdrh **
4ea41dc44Sdrh ** The author disclaims copyright to this source code.  In place of
5ea41dc44Sdrh ** a legal notice, here is a blessing:
6ea41dc44Sdrh **
7ea41dc44Sdrh **    May you do good and not evil.
8ea41dc44Sdrh **    May you find forgiveness for yourself and forgive others.
9ea41dc44Sdrh **    May you share freely, never taking more than you give.
10ea41dc44Sdrh **
11ea41dc44Sdrh ******************************************************************************
12ea41dc44Sdrh **
13d4b473b2Sdrh ** This file contains code to implement the next_char(A,T,F,W,C) SQL function.
14ea41dc44Sdrh **
15d4b473b2Sdrh ** The next_char(A,T,F,W,C) function finds all valid "next" characters for
16d4b473b2Sdrh ** string A given the vocabulary in T.F.  If the W value exists and is a
17d4b473b2Sdrh ** non-empty string, then it is an SQL expression that limits the entries
18d4b473b2Sdrh ** in T.F that will be considered.  If C exists and is a non-empty string,
19d4b473b2Sdrh ** then it is the name of the collating sequence to use for comparison.  If
20d4b473b2Sdrh **
21d4b473b2Sdrh ** Only the first three arguments are required.  If the C parameter is
22d4b473b2Sdrh ** omitted or is NULL or is an empty string, then the default collating
23d4b473b2Sdrh ** sequence of T.F is used for comparision.  If the W parameter is omitted
24d4b473b2Sdrh ** or is NULL or is an empty string, then no filtering of the output is
25d4b473b2Sdrh ** done.
26d4b473b2Sdrh **
27d4b473b2Sdrh ** The T.F column should be indexed using collation C or else this routine
28d4b473b2Sdrh ** will be quite slow.
29ea41dc44Sdrh **
30ea41dc44Sdrh ** For example, suppose an application has a dictionary like this:
31ea41dc44Sdrh **
32ea41dc44Sdrh **   CREATE TABLE dictionary(word TEXT UNIQUE);
33ea41dc44Sdrh **
34ea41dc44Sdrh ** Further suppose that for user keypad entry, it is desired to disable
35ea41dc44Sdrh ** (gray out) keys that are not valid as the next character.  If the
36ea41dc44Sdrh ** the user has previously entered (say) 'cha' then to find all allowed
37ea41dc44Sdrh ** next characters (and thereby determine when keys should not be grayed
38ea41dc44Sdrh ** out) run the following query:
39ea41dc44Sdrh **
40ea41dc44Sdrh **   SELECT next_char('cha','dictionary','word');
41f4274724Sdrh **
42f4274724Sdrh ** IMPLEMENTATION NOTES:
43f4274724Sdrh **
44f4274724Sdrh ** The next_char function is implemented using recursive SQL that makes
45f4274724Sdrh ** use of the table name and column name as part of a query.  If either
46f4274724Sdrh ** the table name or column name are keywords or contain special characters,
47f4274724Sdrh ** then they should be escaped.  For example:
48f4274724Sdrh **
49f4274724Sdrh **   SELECT next_char('cha','[dictionary]','[word]');
50f4274724Sdrh **
51f4274724Sdrh ** This also means that the table name can be a subquery:
52f4274724Sdrh **
53915fe4d7Smistachkin **   SELECT next_char('cha','(SELECT word AS w FROM dictionary)','w');
54ea41dc44Sdrh */
55ea41dc44Sdrh #include "sqlite3ext.h"
56ea41dc44Sdrh SQLITE_EXTENSION_INIT1
57ea41dc44Sdrh #include <string.h>
58ea41dc44Sdrh 
59ea41dc44Sdrh /*
60ea41dc44Sdrh ** A structure to hold context of the next_char() computation across
61ea41dc44Sdrh ** nested function calls.
62ea41dc44Sdrh */
63ea41dc44Sdrh typedef struct nextCharContext nextCharContext;
64ea41dc44Sdrh struct nextCharContext {
65ea41dc44Sdrh   sqlite3 *db;                      /* Database connection */
66ea41dc44Sdrh   sqlite3_stmt *pStmt;              /* Prepared statement used to query */
67ea41dc44Sdrh   const unsigned char *zPrefix;     /* Prefix to scan */
68ea41dc44Sdrh   int nPrefix;                      /* Size of zPrefix in bytes */
69ea41dc44Sdrh   int nAlloc;                       /* Space allocated to aResult */
70ea41dc44Sdrh   int nUsed;                        /* Space used in aResult */
71ea41dc44Sdrh   unsigned int *aResult;            /* Array of next characters */
72ea41dc44Sdrh   int mallocFailed;                 /* True if malloc fails */
73ea41dc44Sdrh   int otherError;                   /* True for any other failure */
74ea41dc44Sdrh };
75ea41dc44Sdrh 
76ea41dc44Sdrh /*
77ea41dc44Sdrh ** Append a result character if the character is not already in the
78ea41dc44Sdrh ** result.
79ea41dc44Sdrh */
nextCharAppend(nextCharContext * p,unsigned c)80ea41dc44Sdrh static void nextCharAppend(nextCharContext *p, unsigned c){
81ea41dc44Sdrh   int i;
82ea41dc44Sdrh   for(i=0; i<p->nUsed; i++){
83ea41dc44Sdrh     if( p->aResult[i]==c ) return;
84ea41dc44Sdrh   }
85ea41dc44Sdrh   if( p->nUsed+1 > p->nAlloc ){
86ea41dc44Sdrh     unsigned int *aNew;
87ea41dc44Sdrh     int n = p->nAlloc*2 + 30;
882d77d80aSdrh     aNew = sqlite3_realloc64(p->aResult, n*sizeof(unsigned int));
89ea41dc44Sdrh     if( aNew==0 ){
90ea41dc44Sdrh       p->mallocFailed = 1;
91ea41dc44Sdrh       return;
92ea41dc44Sdrh     }else{
93ea41dc44Sdrh       p->aResult = aNew;
94ea41dc44Sdrh       p->nAlloc = n;
95ea41dc44Sdrh     }
96ea41dc44Sdrh   }
97ea41dc44Sdrh   p->aResult[p->nUsed++] = c;
98ea41dc44Sdrh }
99ea41dc44Sdrh 
100ea41dc44Sdrh /*
101ea41dc44Sdrh ** Write a character into z[] as UTF8.  Return the number of bytes needed
102ea41dc44Sdrh ** to hold the character
103ea41dc44Sdrh */
writeUtf8(unsigned char * z,unsigned c)104ea41dc44Sdrh static int writeUtf8(unsigned char *z, unsigned c){
105ea41dc44Sdrh   if( c<0x00080 ){
106ea41dc44Sdrh     z[0] = (unsigned char)(c&0xff);
107ea41dc44Sdrh     return 1;
108ea41dc44Sdrh   }
109ea41dc44Sdrh   if( c<0x00800 ){
110ea41dc44Sdrh     z[0] = 0xC0 + (unsigned char)((c>>6)&0x1F);
111ea41dc44Sdrh     z[1] = 0x80 + (unsigned char)(c & 0x3F);
112ea41dc44Sdrh     return 2;
113ea41dc44Sdrh   }
114ea41dc44Sdrh   if( c<0x10000 ){
115ea41dc44Sdrh     z[0] = 0xE0 + (unsigned char)((c>>12)&0x0F);
116ea41dc44Sdrh     z[1] = 0x80 + (unsigned char)((c>>6) & 0x3F);
117ea41dc44Sdrh     z[2] = 0x80 + (unsigned char)(c & 0x3F);
118ea41dc44Sdrh     return 3;
119ea41dc44Sdrh   }
120ea41dc44Sdrh   z[0] = 0xF0 + (unsigned char)((c>>18) & 0x07);
121ea41dc44Sdrh   z[1] = 0x80 + (unsigned char)((c>>12) & 0x3F);
122ea41dc44Sdrh   z[2] = 0x80 + (unsigned char)((c>>6) & 0x3F);
123ea41dc44Sdrh   z[3] = 0x80 + (unsigned char)(c & 0x3F);
124ea41dc44Sdrh   return 4;
125ea41dc44Sdrh }
126ea41dc44Sdrh 
127ea41dc44Sdrh /*
128ea41dc44Sdrh ** Read a UTF8 character out of z[] and write it into *pOut.  Return
129ea41dc44Sdrh ** the number of bytes in z[] that were used to construct the character.
130ea41dc44Sdrh */
readUtf8(const unsigned char * z,unsigned * pOut)131ea41dc44Sdrh static int readUtf8(const unsigned char *z, unsigned *pOut){
132ea41dc44Sdrh   static const unsigned char validBits[] = {
133ea41dc44Sdrh     0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
134ea41dc44Sdrh     0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
135ea41dc44Sdrh     0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
136ea41dc44Sdrh     0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
137ea41dc44Sdrh     0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
138ea41dc44Sdrh     0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
139ea41dc44Sdrh     0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
140ea41dc44Sdrh     0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
141ea41dc44Sdrh   };
142ea41dc44Sdrh   unsigned c = z[0];
143ea41dc44Sdrh   if( c<0xc0 ){
144ea41dc44Sdrh     *pOut = c;
145ea41dc44Sdrh     return 1;
146ea41dc44Sdrh   }else{
147ea41dc44Sdrh     int n = 1;
148ea41dc44Sdrh     c = validBits[c-0xc0];
149ea41dc44Sdrh     while( (z[n] & 0xc0)==0x80 ){
150ea41dc44Sdrh       c = (c<<6) + (0x3f & z[n++]);
151ea41dc44Sdrh     }
152ea41dc44Sdrh     if( c<0x80 || (c&0xFFFFF800)==0xD800 || (c&0xFFFFFFFE)==0xFFFE ){
153ea41dc44Sdrh       c = 0xFFFD;
154ea41dc44Sdrh     }
155ea41dc44Sdrh     *pOut = c;
156ea41dc44Sdrh     return n;
157ea41dc44Sdrh   }
158ea41dc44Sdrh }
159ea41dc44Sdrh 
160ea41dc44Sdrh /*
161ea41dc44Sdrh ** The nextCharContext structure has been set up.  Add all "next" characters
162ea41dc44Sdrh ** to the result set.
163ea41dc44Sdrh */
findNextChars(nextCharContext * p)164ea41dc44Sdrh static void findNextChars(nextCharContext *p){
165ea41dc44Sdrh   unsigned cPrev = 0;
166ea41dc44Sdrh   unsigned char zPrev[8];
167ea41dc44Sdrh   int n, rc;
168ea41dc44Sdrh 
169ea41dc44Sdrh   for(;;){
170ea41dc44Sdrh     sqlite3_bind_text(p->pStmt, 1, (char*)p->zPrefix, p->nPrefix,
171ea41dc44Sdrh                       SQLITE_STATIC);
172ea41dc44Sdrh     n = writeUtf8(zPrev, cPrev+1);
173ea41dc44Sdrh     sqlite3_bind_text(p->pStmt, 2, (char*)zPrev, n, SQLITE_STATIC);
174ea41dc44Sdrh     rc = sqlite3_step(p->pStmt);
175ea41dc44Sdrh     if( rc==SQLITE_DONE ){
176ea41dc44Sdrh       sqlite3_reset(p->pStmt);
177ea41dc44Sdrh       return;
178ea41dc44Sdrh     }else if( rc!=SQLITE_ROW ){
179ea41dc44Sdrh       p->otherError = rc;
180ea41dc44Sdrh       return;
181ea41dc44Sdrh     }else{
182ea41dc44Sdrh       const unsigned char *zOut = sqlite3_column_text(p->pStmt, 0);
183ea41dc44Sdrh       unsigned cNext;
184ea41dc44Sdrh       n = readUtf8(zOut+p->nPrefix, &cNext);
185ea41dc44Sdrh       sqlite3_reset(p->pStmt);
186ea41dc44Sdrh       nextCharAppend(p, cNext);
187ea41dc44Sdrh       cPrev = cNext;
188ea41dc44Sdrh       if( p->mallocFailed ) return;
189ea41dc44Sdrh     }
190ea41dc44Sdrh   }
191ea41dc44Sdrh }
192ea41dc44Sdrh 
193ea41dc44Sdrh 
194ea41dc44Sdrh /*
195ea41dc44Sdrh ** next_character(A,T,F,W)
196ea41dc44Sdrh **
197ea41dc44Sdrh ** Return a string composted of all next possible characters after
198ea41dc44Sdrh ** A for elements of T.F.  If W is supplied, then it is an SQL expression
199ea41dc44Sdrh ** that limits the elements in T.F that are considered.
200ea41dc44Sdrh */
nextCharFunc(sqlite3_context * context,int argc,sqlite3_value ** argv)201ea41dc44Sdrh static void nextCharFunc(
202ea41dc44Sdrh   sqlite3_context *context,
203ea41dc44Sdrh   int argc,
204ea41dc44Sdrh   sqlite3_value **argv
205ea41dc44Sdrh ){
206ea41dc44Sdrh   nextCharContext c;
207ea41dc44Sdrh   const unsigned char *zTable = sqlite3_value_text(argv[1]);
208ea41dc44Sdrh   const unsigned char *zField = sqlite3_value_text(argv[2]);
209ea41dc44Sdrh   const unsigned char *zWhere;
210d4b473b2Sdrh   const unsigned char *zCollName;
211d4b473b2Sdrh   char *zWhereClause = 0;
212d4b473b2Sdrh   char *zColl = 0;
213ea41dc44Sdrh   char *zSql;
214ea41dc44Sdrh   int rc;
215ea41dc44Sdrh 
216ea41dc44Sdrh   memset(&c, 0, sizeof(c));
217ea41dc44Sdrh   c.db = sqlite3_context_db_handle(context);
218ea41dc44Sdrh   c.zPrefix = sqlite3_value_text(argv[0]);
219ea41dc44Sdrh   c.nPrefix = sqlite3_value_bytes(argv[0]);
220ea41dc44Sdrh   if( zTable==0 || zField==0 || c.zPrefix==0 ) return;
221d4b473b2Sdrh   if( argc>=4
222d4b473b2Sdrh    && (zWhere = sqlite3_value_text(argv[3]))!=0
223d4b473b2Sdrh    && zWhere[0]!=0
224ea41dc44Sdrh   ){
225d4b473b2Sdrh     zWhereClause = sqlite3_mprintf("AND (%s)", zWhere);
226d4b473b2Sdrh     if( zWhereClause==0 ){
227d4b473b2Sdrh       sqlite3_result_error_nomem(context);
228d4b473b2Sdrh       return;
229ea41dc44Sdrh     }
230d4b473b2Sdrh   }else{
231d4b473b2Sdrh     zWhereClause = "";
232d4b473b2Sdrh   }
233d4b473b2Sdrh   if( argc>=5
234d4b473b2Sdrh    && (zCollName = sqlite3_value_text(argv[4]))!=0
235d4b473b2Sdrh    && zCollName[0]!=0
236d4b473b2Sdrh   ){
237d4b473b2Sdrh     zColl = sqlite3_mprintf("collate \"%w\"", zCollName);
238d4b473b2Sdrh     if( zColl==0 ){
239d4b473b2Sdrh       sqlite3_result_error_nomem(context);
240d4b473b2Sdrh       if( zWhereClause[0] ) sqlite3_free(zWhereClause);
241d4b473b2Sdrh       return;
242d4b473b2Sdrh     }
243d4b473b2Sdrh   }else{
244d4b473b2Sdrh     zColl = "";
245d4b473b2Sdrh   }
246d4b473b2Sdrh   zSql = sqlite3_mprintf(
247f4274724Sdrh     "SELECT %s FROM %s"
248f4274724Sdrh     " WHERE %s>=(?1 || ?2) %s"
249f4274724Sdrh     "   AND %s<=(?1 || char(1114111)) %s" /* 1114111 == 0x10ffff */
250d4b473b2Sdrh     "   %s"
251d4b473b2Sdrh     " ORDER BY 1 %s ASC LIMIT 1",
252d4b473b2Sdrh     zField, zTable, zField, zColl, zField, zColl, zWhereClause, zColl
253d4b473b2Sdrh   );
254d4b473b2Sdrh   if( zWhereClause[0] ) sqlite3_free(zWhereClause);
255d4b473b2Sdrh   if( zColl[0] ) sqlite3_free(zColl);
256ea41dc44Sdrh   if( zSql==0 ){
257ea41dc44Sdrh     sqlite3_result_error_nomem(context);
258ea41dc44Sdrh     return;
259ea41dc44Sdrh   }
260ea41dc44Sdrh 
261ea41dc44Sdrh   rc = sqlite3_prepare_v2(c.db, zSql, -1, &c.pStmt, 0);
262ea41dc44Sdrh   sqlite3_free(zSql);
263ea41dc44Sdrh   if( rc ){
264ea41dc44Sdrh     sqlite3_result_error(context, sqlite3_errmsg(c.db), -1);
265ea41dc44Sdrh     return;
266ea41dc44Sdrh   }
267ea41dc44Sdrh   findNextChars(&c);
268ea41dc44Sdrh   if( c.mallocFailed ){
269ea41dc44Sdrh     sqlite3_result_error_nomem(context);
270ea41dc44Sdrh   }else{
271ea41dc44Sdrh     unsigned char *pRes;
2722d77d80aSdrh     pRes = sqlite3_malloc64( c.nUsed*4 + 1 );
273ea41dc44Sdrh     if( pRes==0 ){
274ea41dc44Sdrh       sqlite3_result_error_nomem(context);
275ea41dc44Sdrh     }else{
276ea41dc44Sdrh       int i;
277ea41dc44Sdrh       int n = 0;
278ea41dc44Sdrh       for(i=0; i<c.nUsed; i++){
279ea41dc44Sdrh         n += writeUtf8(pRes+n, c.aResult[i]);
280ea41dc44Sdrh       }
281ea41dc44Sdrh       pRes[n] = 0;
282ea41dc44Sdrh       sqlite3_result_text(context, (const char*)pRes, n, sqlite3_free);
283ea41dc44Sdrh     }
284ea41dc44Sdrh   }
285ea41dc44Sdrh   sqlite3_finalize(c.pStmt);
286ea41dc44Sdrh   sqlite3_free(c.aResult);
287ea41dc44Sdrh }
288ea41dc44Sdrh 
289ea41dc44Sdrh #ifdef _WIN32
290ea41dc44Sdrh __declspec(dllexport)
291ea41dc44Sdrh #endif
sqlite3_nextchar_init(sqlite3 * db,char ** pzErrMsg,const sqlite3_api_routines * pApi)292ea41dc44Sdrh int sqlite3_nextchar_init(
293ea41dc44Sdrh   sqlite3 *db,
294ea41dc44Sdrh   char **pzErrMsg,
295ea41dc44Sdrh   const sqlite3_api_routines *pApi
296ea41dc44Sdrh ){
297ea41dc44Sdrh   int rc = SQLITE_OK;
298ea41dc44Sdrh   SQLITE_EXTENSION_INIT2(pApi);
299ea41dc44Sdrh   (void)pzErrMsg;  /* Unused parameter */
300*2b1c2aadSdrh   rc = sqlite3_create_function(db, "next_char", 3,
301*2b1c2aadSdrh                                SQLITE_UTF8|SQLITE_INNOCUOUS, 0,
302ea41dc44Sdrh                                nextCharFunc, 0, 0);
303ea41dc44Sdrh   if( rc==SQLITE_OK ){
304*2b1c2aadSdrh     rc = sqlite3_create_function(db, "next_char", 4,
305*2b1c2aadSdrh                                  SQLITE_UTF8|SQLITE_INNOCUOUS, 0,
306ea41dc44Sdrh                                  nextCharFunc, 0, 0);
307ea41dc44Sdrh   }
308d4b473b2Sdrh   if( rc==SQLITE_OK ){
309*2b1c2aadSdrh     rc = sqlite3_create_function(db, "next_char", 5,
310*2b1c2aadSdrh                                  SQLITE_UTF8|SQLITE_INNOCUOUS, 0,
311d4b473b2Sdrh                                  nextCharFunc, 0, 0);
312d4b473b2Sdrh   }
313ea41dc44Sdrh   return rc;
314ea41dc44Sdrh }
315