1ea41dc44Sdrh /*
2ea41dc44Sdrh ** 2013-02-28
3ea41dc44Sdrh **
4ea41dc44Sdrh ** The author disclaims copyright to this source code. In place of
5ea41dc44Sdrh ** a legal notice, here is a blessing:
6ea41dc44Sdrh **
7ea41dc44Sdrh ** May you do good and not evil.
8ea41dc44Sdrh ** May you find forgiveness for yourself and forgive others.
9ea41dc44Sdrh ** May you share freely, never taking more than you give.
10ea41dc44Sdrh **
11ea41dc44Sdrh ******************************************************************************
12ea41dc44Sdrh **
13d4b473b2Sdrh ** This file contains code to implement the next_char(A,T,F,W,C) SQL function.
14ea41dc44Sdrh **
15d4b473b2Sdrh ** The next_char(A,T,F,W,C) function finds all valid "next" characters for
16d4b473b2Sdrh ** string A given the vocabulary in T.F. If the W value exists and is a
17d4b473b2Sdrh ** non-empty string, then it is an SQL expression that limits the entries
18d4b473b2Sdrh ** in T.F that will be considered. If C exists and is a non-empty string,
19d4b473b2Sdrh ** then it is the name of the collating sequence to use for comparison. If
20d4b473b2Sdrh **
21d4b473b2Sdrh ** Only the first three arguments are required. If the C parameter is
22d4b473b2Sdrh ** omitted or is NULL or is an empty string, then the default collating
23d4b473b2Sdrh ** sequence of T.F is used for comparision. If the W parameter is omitted
24d4b473b2Sdrh ** or is NULL or is an empty string, then no filtering of the output is
25d4b473b2Sdrh ** done.
26d4b473b2Sdrh **
27d4b473b2Sdrh ** The T.F column should be indexed using collation C or else this routine
28d4b473b2Sdrh ** will be quite slow.
29ea41dc44Sdrh **
30ea41dc44Sdrh ** For example, suppose an application has a dictionary like this:
31ea41dc44Sdrh **
32ea41dc44Sdrh ** CREATE TABLE dictionary(word TEXT UNIQUE);
33ea41dc44Sdrh **
34ea41dc44Sdrh ** Further suppose that for user keypad entry, it is desired to disable
35ea41dc44Sdrh ** (gray out) keys that are not valid as the next character. If the
36ea41dc44Sdrh ** the user has previously entered (say) 'cha' then to find all allowed
37ea41dc44Sdrh ** next characters (and thereby determine when keys should not be grayed
38ea41dc44Sdrh ** out) run the following query:
39ea41dc44Sdrh **
40ea41dc44Sdrh ** SELECT next_char('cha','dictionary','word');
41f4274724Sdrh **
42f4274724Sdrh ** IMPLEMENTATION NOTES:
43f4274724Sdrh **
44f4274724Sdrh ** The next_char function is implemented using recursive SQL that makes
45f4274724Sdrh ** use of the table name and column name as part of a query. If either
46f4274724Sdrh ** the table name or column name are keywords or contain special characters,
47f4274724Sdrh ** then they should be escaped. For example:
48f4274724Sdrh **
49f4274724Sdrh ** SELECT next_char('cha','[dictionary]','[word]');
50f4274724Sdrh **
51f4274724Sdrh ** This also means that the table name can be a subquery:
52f4274724Sdrh **
53915fe4d7Smistachkin ** SELECT next_char('cha','(SELECT word AS w FROM dictionary)','w');
54ea41dc44Sdrh */
55ea41dc44Sdrh #include "sqlite3ext.h"
56ea41dc44Sdrh SQLITE_EXTENSION_INIT1
57ea41dc44Sdrh #include <string.h>
58ea41dc44Sdrh
59ea41dc44Sdrh /*
60ea41dc44Sdrh ** A structure to hold context of the next_char() computation across
61ea41dc44Sdrh ** nested function calls.
62ea41dc44Sdrh */
63ea41dc44Sdrh typedef struct nextCharContext nextCharContext;
64ea41dc44Sdrh struct nextCharContext {
65ea41dc44Sdrh sqlite3 *db; /* Database connection */
66ea41dc44Sdrh sqlite3_stmt *pStmt; /* Prepared statement used to query */
67ea41dc44Sdrh const unsigned char *zPrefix; /* Prefix to scan */
68ea41dc44Sdrh int nPrefix; /* Size of zPrefix in bytes */
69ea41dc44Sdrh int nAlloc; /* Space allocated to aResult */
70ea41dc44Sdrh int nUsed; /* Space used in aResult */
71ea41dc44Sdrh unsigned int *aResult; /* Array of next characters */
72ea41dc44Sdrh int mallocFailed; /* True if malloc fails */
73ea41dc44Sdrh int otherError; /* True for any other failure */
74ea41dc44Sdrh };
75ea41dc44Sdrh
76ea41dc44Sdrh /*
77ea41dc44Sdrh ** Append a result character if the character is not already in the
78ea41dc44Sdrh ** result.
79ea41dc44Sdrh */
nextCharAppend(nextCharContext * p,unsigned c)80ea41dc44Sdrh static void nextCharAppend(nextCharContext *p, unsigned c){
81ea41dc44Sdrh int i;
82ea41dc44Sdrh for(i=0; i<p->nUsed; i++){
83ea41dc44Sdrh if( p->aResult[i]==c ) return;
84ea41dc44Sdrh }
85ea41dc44Sdrh if( p->nUsed+1 > p->nAlloc ){
86ea41dc44Sdrh unsigned int *aNew;
87ea41dc44Sdrh int n = p->nAlloc*2 + 30;
882d77d80aSdrh aNew = sqlite3_realloc64(p->aResult, n*sizeof(unsigned int));
89ea41dc44Sdrh if( aNew==0 ){
90ea41dc44Sdrh p->mallocFailed = 1;
91ea41dc44Sdrh return;
92ea41dc44Sdrh }else{
93ea41dc44Sdrh p->aResult = aNew;
94ea41dc44Sdrh p->nAlloc = n;
95ea41dc44Sdrh }
96ea41dc44Sdrh }
97ea41dc44Sdrh p->aResult[p->nUsed++] = c;
98ea41dc44Sdrh }
99ea41dc44Sdrh
100ea41dc44Sdrh /*
101ea41dc44Sdrh ** Write a character into z[] as UTF8. Return the number of bytes needed
102ea41dc44Sdrh ** to hold the character
103ea41dc44Sdrh */
writeUtf8(unsigned char * z,unsigned c)104ea41dc44Sdrh static int writeUtf8(unsigned char *z, unsigned c){
105ea41dc44Sdrh if( c<0x00080 ){
106ea41dc44Sdrh z[0] = (unsigned char)(c&0xff);
107ea41dc44Sdrh return 1;
108ea41dc44Sdrh }
109ea41dc44Sdrh if( c<0x00800 ){
110ea41dc44Sdrh z[0] = 0xC0 + (unsigned char)((c>>6)&0x1F);
111ea41dc44Sdrh z[1] = 0x80 + (unsigned char)(c & 0x3F);
112ea41dc44Sdrh return 2;
113ea41dc44Sdrh }
114ea41dc44Sdrh if( c<0x10000 ){
115ea41dc44Sdrh z[0] = 0xE0 + (unsigned char)((c>>12)&0x0F);
116ea41dc44Sdrh z[1] = 0x80 + (unsigned char)((c>>6) & 0x3F);
117ea41dc44Sdrh z[2] = 0x80 + (unsigned char)(c & 0x3F);
118ea41dc44Sdrh return 3;
119ea41dc44Sdrh }
120ea41dc44Sdrh z[0] = 0xF0 + (unsigned char)((c>>18) & 0x07);
121ea41dc44Sdrh z[1] = 0x80 + (unsigned char)((c>>12) & 0x3F);
122ea41dc44Sdrh z[2] = 0x80 + (unsigned char)((c>>6) & 0x3F);
123ea41dc44Sdrh z[3] = 0x80 + (unsigned char)(c & 0x3F);
124ea41dc44Sdrh return 4;
125ea41dc44Sdrh }
126ea41dc44Sdrh
127ea41dc44Sdrh /*
128ea41dc44Sdrh ** Read a UTF8 character out of z[] and write it into *pOut. Return
129ea41dc44Sdrh ** the number of bytes in z[] that were used to construct the character.
130ea41dc44Sdrh */
readUtf8(const unsigned char * z,unsigned * pOut)131ea41dc44Sdrh static int readUtf8(const unsigned char *z, unsigned *pOut){
132ea41dc44Sdrh static const unsigned char validBits[] = {
133ea41dc44Sdrh 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
134ea41dc44Sdrh 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
135ea41dc44Sdrh 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
136ea41dc44Sdrh 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
137ea41dc44Sdrh 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
138ea41dc44Sdrh 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
139ea41dc44Sdrh 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
140ea41dc44Sdrh 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
141ea41dc44Sdrh };
142ea41dc44Sdrh unsigned c = z[0];
143ea41dc44Sdrh if( c<0xc0 ){
144ea41dc44Sdrh *pOut = c;
145ea41dc44Sdrh return 1;
146ea41dc44Sdrh }else{
147ea41dc44Sdrh int n = 1;
148ea41dc44Sdrh c = validBits[c-0xc0];
149ea41dc44Sdrh while( (z[n] & 0xc0)==0x80 ){
150ea41dc44Sdrh c = (c<<6) + (0x3f & z[n++]);
151ea41dc44Sdrh }
152ea41dc44Sdrh if( c<0x80 || (c&0xFFFFF800)==0xD800 || (c&0xFFFFFFFE)==0xFFFE ){
153ea41dc44Sdrh c = 0xFFFD;
154ea41dc44Sdrh }
155ea41dc44Sdrh *pOut = c;
156ea41dc44Sdrh return n;
157ea41dc44Sdrh }
158ea41dc44Sdrh }
159ea41dc44Sdrh
160ea41dc44Sdrh /*
161ea41dc44Sdrh ** The nextCharContext structure has been set up. Add all "next" characters
162ea41dc44Sdrh ** to the result set.
163ea41dc44Sdrh */
findNextChars(nextCharContext * p)164ea41dc44Sdrh static void findNextChars(nextCharContext *p){
165ea41dc44Sdrh unsigned cPrev = 0;
166ea41dc44Sdrh unsigned char zPrev[8];
167ea41dc44Sdrh int n, rc;
168ea41dc44Sdrh
169ea41dc44Sdrh for(;;){
170ea41dc44Sdrh sqlite3_bind_text(p->pStmt, 1, (char*)p->zPrefix, p->nPrefix,
171ea41dc44Sdrh SQLITE_STATIC);
172ea41dc44Sdrh n = writeUtf8(zPrev, cPrev+1);
173ea41dc44Sdrh sqlite3_bind_text(p->pStmt, 2, (char*)zPrev, n, SQLITE_STATIC);
174ea41dc44Sdrh rc = sqlite3_step(p->pStmt);
175ea41dc44Sdrh if( rc==SQLITE_DONE ){
176ea41dc44Sdrh sqlite3_reset(p->pStmt);
177ea41dc44Sdrh return;
178ea41dc44Sdrh }else if( rc!=SQLITE_ROW ){
179ea41dc44Sdrh p->otherError = rc;
180ea41dc44Sdrh return;
181ea41dc44Sdrh }else{
182ea41dc44Sdrh const unsigned char *zOut = sqlite3_column_text(p->pStmt, 0);
183ea41dc44Sdrh unsigned cNext;
184ea41dc44Sdrh n = readUtf8(zOut+p->nPrefix, &cNext);
185ea41dc44Sdrh sqlite3_reset(p->pStmt);
186ea41dc44Sdrh nextCharAppend(p, cNext);
187ea41dc44Sdrh cPrev = cNext;
188ea41dc44Sdrh if( p->mallocFailed ) return;
189ea41dc44Sdrh }
190ea41dc44Sdrh }
191ea41dc44Sdrh }
192ea41dc44Sdrh
193ea41dc44Sdrh
194ea41dc44Sdrh /*
195ea41dc44Sdrh ** next_character(A,T,F,W)
196ea41dc44Sdrh **
197ea41dc44Sdrh ** Return a string composted of all next possible characters after
198ea41dc44Sdrh ** A for elements of T.F. If W is supplied, then it is an SQL expression
199ea41dc44Sdrh ** that limits the elements in T.F that are considered.
200ea41dc44Sdrh */
nextCharFunc(sqlite3_context * context,int argc,sqlite3_value ** argv)201ea41dc44Sdrh static void nextCharFunc(
202ea41dc44Sdrh sqlite3_context *context,
203ea41dc44Sdrh int argc,
204ea41dc44Sdrh sqlite3_value **argv
205ea41dc44Sdrh ){
206ea41dc44Sdrh nextCharContext c;
207ea41dc44Sdrh const unsigned char *zTable = sqlite3_value_text(argv[1]);
208ea41dc44Sdrh const unsigned char *zField = sqlite3_value_text(argv[2]);
209ea41dc44Sdrh const unsigned char *zWhere;
210d4b473b2Sdrh const unsigned char *zCollName;
211d4b473b2Sdrh char *zWhereClause = 0;
212d4b473b2Sdrh char *zColl = 0;
213ea41dc44Sdrh char *zSql;
214ea41dc44Sdrh int rc;
215ea41dc44Sdrh
216ea41dc44Sdrh memset(&c, 0, sizeof(c));
217ea41dc44Sdrh c.db = sqlite3_context_db_handle(context);
218ea41dc44Sdrh c.zPrefix = sqlite3_value_text(argv[0]);
219ea41dc44Sdrh c.nPrefix = sqlite3_value_bytes(argv[0]);
220ea41dc44Sdrh if( zTable==0 || zField==0 || c.zPrefix==0 ) return;
221d4b473b2Sdrh if( argc>=4
222d4b473b2Sdrh && (zWhere = sqlite3_value_text(argv[3]))!=0
223d4b473b2Sdrh && zWhere[0]!=0
224ea41dc44Sdrh ){
225d4b473b2Sdrh zWhereClause = sqlite3_mprintf("AND (%s)", zWhere);
226d4b473b2Sdrh if( zWhereClause==0 ){
227d4b473b2Sdrh sqlite3_result_error_nomem(context);
228d4b473b2Sdrh return;
229ea41dc44Sdrh }
230d4b473b2Sdrh }else{
231d4b473b2Sdrh zWhereClause = "";
232d4b473b2Sdrh }
233d4b473b2Sdrh if( argc>=5
234d4b473b2Sdrh && (zCollName = sqlite3_value_text(argv[4]))!=0
235d4b473b2Sdrh && zCollName[0]!=0
236d4b473b2Sdrh ){
237d4b473b2Sdrh zColl = sqlite3_mprintf("collate \"%w\"", zCollName);
238d4b473b2Sdrh if( zColl==0 ){
239d4b473b2Sdrh sqlite3_result_error_nomem(context);
240d4b473b2Sdrh if( zWhereClause[0] ) sqlite3_free(zWhereClause);
241d4b473b2Sdrh return;
242d4b473b2Sdrh }
243d4b473b2Sdrh }else{
244d4b473b2Sdrh zColl = "";
245d4b473b2Sdrh }
246d4b473b2Sdrh zSql = sqlite3_mprintf(
247f4274724Sdrh "SELECT %s FROM %s"
248f4274724Sdrh " WHERE %s>=(?1 || ?2) %s"
249f4274724Sdrh " AND %s<=(?1 || char(1114111)) %s" /* 1114111 == 0x10ffff */
250d4b473b2Sdrh " %s"
251d4b473b2Sdrh " ORDER BY 1 %s ASC LIMIT 1",
252d4b473b2Sdrh zField, zTable, zField, zColl, zField, zColl, zWhereClause, zColl
253d4b473b2Sdrh );
254d4b473b2Sdrh if( zWhereClause[0] ) sqlite3_free(zWhereClause);
255d4b473b2Sdrh if( zColl[0] ) sqlite3_free(zColl);
256ea41dc44Sdrh if( zSql==0 ){
257ea41dc44Sdrh sqlite3_result_error_nomem(context);
258ea41dc44Sdrh return;
259ea41dc44Sdrh }
260ea41dc44Sdrh
261ea41dc44Sdrh rc = sqlite3_prepare_v2(c.db, zSql, -1, &c.pStmt, 0);
262ea41dc44Sdrh sqlite3_free(zSql);
263ea41dc44Sdrh if( rc ){
264ea41dc44Sdrh sqlite3_result_error(context, sqlite3_errmsg(c.db), -1);
265ea41dc44Sdrh return;
266ea41dc44Sdrh }
267ea41dc44Sdrh findNextChars(&c);
268ea41dc44Sdrh if( c.mallocFailed ){
269ea41dc44Sdrh sqlite3_result_error_nomem(context);
270ea41dc44Sdrh }else{
271ea41dc44Sdrh unsigned char *pRes;
2722d77d80aSdrh pRes = sqlite3_malloc64( c.nUsed*4 + 1 );
273ea41dc44Sdrh if( pRes==0 ){
274ea41dc44Sdrh sqlite3_result_error_nomem(context);
275ea41dc44Sdrh }else{
276ea41dc44Sdrh int i;
277ea41dc44Sdrh int n = 0;
278ea41dc44Sdrh for(i=0; i<c.nUsed; i++){
279ea41dc44Sdrh n += writeUtf8(pRes+n, c.aResult[i]);
280ea41dc44Sdrh }
281ea41dc44Sdrh pRes[n] = 0;
282ea41dc44Sdrh sqlite3_result_text(context, (const char*)pRes, n, sqlite3_free);
283ea41dc44Sdrh }
284ea41dc44Sdrh }
285ea41dc44Sdrh sqlite3_finalize(c.pStmt);
286ea41dc44Sdrh sqlite3_free(c.aResult);
287ea41dc44Sdrh }
288ea41dc44Sdrh
289ea41dc44Sdrh #ifdef _WIN32
290ea41dc44Sdrh __declspec(dllexport)
291ea41dc44Sdrh #endif
sqlite3_nextchar_init(sqlite3 * db,char ** pzErrMsg,const sqlite3_api_routines * pApi)292ea41dc44Sdrh int sqlite3_nextchar_init(
293ea41dc44Sdrh sqlite3 *db,
294ea41dc44Sdrh char **pzErrMsg,
295ea41dc44Sdrh const sqlite3_api_routines *pApi
296ea41dc44Sdrh ){
297ea41dc44Sdrh int rc = SQLITE_OK;
298ea41dc44Sdrh SQLITE_EXTENSION_INIT2(pApi);
299ea41dc44Sdrh (void)pzErrMsg; /* Unused parameter */
300*2b1c2aadSdrh rc = sqlite3_create_function(db, "next_char", 3,
301*2b1c2aadSdrh SQLITE_UTF8|SQLITE_INNOCUOUS, 0,
302ea41dc44Sdrh nextCharFunc, 0, 0);
303ea41dc44Sdrh if( rc==SQLITE_OK ){
304*2b1c2aadSdrh rc = sqlite3_create_function(db, "next_char", 4,
305*2b1c2aadSdrh SQLITE_UTF8|SQLITE_INNOCUOUS, 0,
306ea41dc44Sdrh nextCharFunc, 0, 0);
307ea41dc44Sdrh }
308d4b473b2Sdrh if( rc==SQLITE_OK ){
309*2b1c2aadSdrh rc = sqlite3_create_function(db, "next_char", 5,
310*2b1c2aadSdrh SQLITE_UTF8|SQLITE_INNOCUOUS, 0,
311d4b473b2Sdrh nextCharFunc, 0, 0);
312d4b473b2Sdrh }
313ea41dc44Sdrh return rc;
314ea41dc44Sdrh }
315