xref: /sqlite-3.40.0/src/whereInt.h (revision cc285c5a)
1 /*
2 ** 2013-11-12
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 *************************************************************************
12 **
13 ** This file contains structure and macro definitions for the query
14 ** planner logic in "where.c".  These definitions are broken out into
15 ** a separate source file for easier editing.
16 */
17 
18 /*
19 ** Trace output macros
20 */
21 #if defined(SQLITE_TEST) || defined(SQLITE_DEBUG)
22 /***/ int sqlite3WhereTrace = 0;
23 #endif
24 #if defined(SQLITE_DEBUG) \
25     && (defined(SQLITE_TEST) || defined(SQLITE_ENABLE_WHERETRACE))
26 # define WHERETRACE(K,X)  if(sqlite3WhereTrace&(K)) sqlite3DebugPrintf X
27 # define WHERETRACE_ENABLED 1
28 #else
29 # define WHERETRACE(K,X)
30 #endif
31 
32 /* Forward references
33 */
34 typedef struct WhereClause WhereClause;
35 typedef struct WhereMaskSet WhereMaskSet;
36 typedef struct WhereOrInfo WhereOrInfo;
37 typedef struct WhereAndInfo WhereAndInfo;
38 typedef struct WhereLevel WhereLevel;
39 typedef struct WhereLoop WhereLoop;
40 typedef struct WherePath WherePath;
41 typedef struct WhereTerm WhereTerm;
42 typedef struct WhereLoopBuilder WhereLoopBuilder;
43 typedef struct WhereScan WhereScan;
44 typedef struct WhereOrCost WhereOrCost;
45 typedef struct WhereOrSet WhereOrSet;
46 
47 /*
48 ** This object contains information needed to implement a single nested
49 ** loop in WHERE clause.
50 **
51 ** Contrast this object with WhereLoop.  This object describes the
52 ** implementation of the loop.  WhereLoop describes the algorithm.
53 ** This object contains a pointer to the WhereLoop algorithm as one of
54 ** its elements.
55 **
56 ** The WhereInfo object contains a single instance of this object for
57 ** each term in the FROM clause (which is to say, for each of the
58 ** nested loops as implemented).  The order of WhereLevel objects determines
59 ** the loop nested order, with WhereInfo.a[0] being the outer loop and
60 ** WhereInfo.a[WhereInfo.nLevel-1] being the inner loop.
61 */
62 struct WhereLevel {
63   int iLeftJoin;        /* Memory cell used to implement LEFT OUTER JOIN */
64   int iTabCur;          /* The VDBE cursor used to access the table */
65   int iIdxCur;          /* The VDBE cursor used to access pIdx */
66   int addrBrk;          /* Jump here to break out of the loop */
67   int addrNxt;          /* Jump here to start the next IN combination */
68   int addrSkip;         /* Jump here for next iteration of skip-scan */
69   int addrCont;         /* Jump here to continue with the next loop cycle */
70   int addrFirst;        /* First instruction of interior of the loop */
71   int addrBody;         /* Beginning of the body of this loop */
72   int iLikeRepCntr;     /* LIKE range processing counter register */
73   int addrLikeRep;      /* LIKE range processing address */
74   u8 iFrom;             /* Which entry in the FROM clause */
75   u8 op, p3, p5;        /* Opcode, P3 & P5 of the opcode that ends the loop */
76   int p1, p2;           /* Operands of the opcode used to ends the loop */
77   union {               /* Information that depends on pWLoop->wsFlags */
78     struct {
79       int nIn;              /* Number of entries in aInLoop[] */
80       struct InLoop {
81         int iCur;              /* The VDBE cursor used by this IN operator */
82         int addrInTop;         /* Top of the IN loop */
83         u8 eEndLoopOp;         /* IN Loop terminator. OP_Next or OP_Prev */
84       } *aInLoop;           /* Information about each nested IN operator */
85     } in;                 /* Used when pWLoop->wsFlags&WHERE_IN_ABLE */
86     Index *pCovidx;       /* Possible covering index for WHERE_MULTI_OR */
87   } u;
88   struct WhereLoop *pWLoop;  /* The selected WhereLoop object */
89   Bitmask notReady;          /* FROM entries not usable at this level */
90 #ifdef SQLITE_ENABLE_STMT_SCANSTATUS
91   int addrVisit;        /* Address at which row is visited */
92 #endif
93 };
94 
95 /*
96 ** Each instance of this object represents an algorithm for evaluating one
97 ** term of a join.  Every term of the FROM clause will have at least
98 ** one corresponding WhereLoop object (unless INDEXED BY constraints
99 ** prevent a query solution - which is an error) and many terms of the
100 ** FROM clause will have multiple WhereLoop objects, each describing a
101 ** potential way of implementing that FROM-clause term, together with
102 ** dependencies and cost estimates for using the chosen algorithm.
103 **
104 ** Query planning consists of building up a collection of these WhereLoop
105 ** objects, then computing a particular sequence of WhereLoop objects, with
106 ** one WhereLoop object per FROM clause term, that satisfy all dependencies
107 ** and that minimize the overall cost.
108 */
109 struct WhereLoop {
110   Bitmask prereq;       /* Bitmask of other loops that must run first */
111   Bitmask maskSelf;     /* Bitmask identifying table iTab */
112 #ifdef SQLITE_DEBUG
113   char cId;             /* Symbolic ID of this loop for debugging use */
114 #endif
115   u8 iTab;              /* Position in FROM clause of table for this loop */
116   u8 iSortIdx;          /* Sorting index number.  0==None */
117   LogEst rSetup;        /* One-time setup cost (ex: create transient index) */
118   LogEst rRun;          /* Cost of running each loop */
119   LogEst nOut;          /* Estimated number of output rows */
120   union {
121     struct {               /* Information for internal btree tables */
122       u16 nEq;               /* Number of equality constraints */
123       Index *pIndex;         /* Index used, or NULL */
124     } btree;
125     struct {               /* Information for virtual tables */
126       int idxNum;            /* Index number */
127       u8 needFree;           /* True if sqlite3_free(idxStr) is needed */
128       i8 isOrdered;          /* True if satisfies ORDER BY */
129       u16 omitMask;          /* Terms that may be omitted */
130       char *idxStr;          /* Index identifier string */
131     } vtab;
132   } u;
133   u32 wsFlags;          /* WHERE_* flags describing the plan */
134   u16 nLTerm;           /* Number of entries in aLTerm[] */
135   u16 nSkip;            /* Number of NULL aLTerm[] entries */
136   /**** whereLoopXfer() copies fields above ***********************/
137 # define WHERE_LOOP_XFER_SZ offsetof(WhereLoop,nLSlot)
138   u16 nLSlot;           /* Number of slots allocated for aLTerm[] */
139   WhereTerm **aLTerm;   /* WhereTerms used */
140   WhereLoop *pNextLoop; /* Next WhereLoop object in the WhereClause */
141   WhereTerm *aLTermSpace[3];  /* Initial aLTerm[] space */
142 };
143 
144 /* This object holds the prerequisites and the cost of running a
145 ** subquery on one operand of an OR operator in the WHERE clause.
146 ** See WhereOrSet for additional information
147 */
148 struct WhereOrCost {
149   Bitmask prereq;     /* Prerequisites */
150   LogEst rRun;        /* Cost of running this subquery */
151   LogEst nOut;        /* Number of outputs for this subquery */
152 };
153 
154 /* The WhereOrSet object holds a set of possible WhereOrCosts that
155 ** correspond to the subquery(s) of OR-clause processing.  Only the
156 ** best N_OR_COST elements are retained.
157 */
158 #define N_OR_COST 3
159 struct WhereOrSet {
160   u16 n;                      /* Number of valid a[] entries */
161   WhereOrCost a[N_OR_COST];   /* Set of best costs */
162 };
163 
164 
165 /* Forward declaration of methods */
166 static int whereLoopResize(sqlite3*, WhereLoop*, int);
167 
168 /*
169 ** Each instance of this object holds a sequence of WhereLoop objects
170 ** that implement some or all of a query plan.
171 **
172 ** Think of each WhereLoop object as a node in a graph with arcs
173 ** showing dependencies and costs for travelling between nodes.  (That is
174 ** not a completely accurate description because WhereLoop costs are a
175 ** vector, not a scalar, and because dependencies are many-to-one, not
176 ** one-to-one as are graph nodes.  But it is a useful visualization aid.)
177 ** Then a WherePath object is a path through the graph that visits some
178 ** or all of the WhereLoop objects once.
179 **
180 ** The "solver" works by creating the N best WherePath objects of length
181 ** 1.  Then using those as a basis to compute the N best WherePath objects
182 ** of length 2.  And so forth until the length of WherePaths equals the
183 ** number of nodes in the FROM clause.  The best (lowest cost) WherePath
184 ** at the end is the chosen query plan.
185 */
186 struct WherePath {
187   Bitmask maskLoop;     /* Bitmask of all WhereLoop objects in this path */
188   Bitmask revLoop;      /* aLoop[]s that should be reversed for ORDER BY */
189   LogEst nRow;          /* Estimated number of rows generated by this path */
190   LogEst rCost;         /* Total cost of this path */
191   LogEst rUnsorted;     /* Total cost of this path ignoring sorting costs */
192   i8 isOrdered;         /* No. of ORDER BY terms satisfied. -1 for unknown */
193   WhereLoop **aLoop;    /* Array of WhereLoop objects implementing this path */
194 };
195 
196 /*
197 ** The query generator uses an array of instances of this structure to
198 ** help it analyze the subexpressions of the WHERE clause.  Each WHERE
199 ** clause subexpression is separated from the others by AND operators,
200 ** usually, or sometimes subexpressions separated by OR.
201 **
202 ** All WhereTerms are collected into a single WhereClause structure.
203 ** The following identity holds:
204 **
205 **        WhereTerm.pWC->a[WhereTerm.idx] == WhereTerm
206 **
207 ** When a term is of the form:
208 **
209 **              X <op> <expr>
210 **
211 ** where X is a column name and <op> is one of certain operators,
212 ** then WhereTerm.leftCursor and WhereTerm.u.leftColumn record the
213 ** cursor number and column number for X.  WhereTerm.eOperator records
214 ** the <op> using a bitmask encoding defined by WO_xxx below.  The
215 ** use of a bitmask encoding for the operator allows us to search
216 ** quickly for terms that match any of several different operators.
217 **
218 ** A WhereTerm might also be two or more subterms connected by OR:
219 **
220 **         (t1.X <op> <expr>) OR (t1.Y <op> <expr>) OR ....
221 **
222 ** In this second case, wtFlag has the TERM_ORINFO bit set and eOperator==WO_OR
223 ** and the WhereTerm.u.pOrInfo field points to auxiliary information that
224 ** is collected about the OR clause.
225 **
226 ** If a term in the WHERE clause does not match either of the two previous
227 ** categories, then eOperator==0.  The WhereTerm.pExpr field is still set
228 ** to the original subexpression content and wtFlags is set up appropriately
229 ** but no other fields in the WhereTerm object are meaningful.
230 **
231 ** When eOperator!=0, prereqRight and prereqAll record sets of cursor numbers,
232 ** but they do so indirectly.  A single WhereMaskSet structure translates
233 ** cursor number into bits and the translated bit is stored in the prereq
234 ** fields.  The translation is used in order to maximize the number of
235 ** bits that will fit in a Bitmask.  The VDBE cursor numbers might be
236 ** spread out over the non-negative integers.  For example, the cursor
237 ** numbers might be 3, 8, 9, 10, 20, 23, 41, and 45.  The WhereMaskSet
238 ** translates these sparse cursor numbers into consecutive integers
239 ** beginning with 0 in order to make the best possible use of the available
240 ** bits in the Bitmask.  So, in the example above, the cursor numbers
241 ** would be mapped into integers 0 through 7.
242 **
243 ** The number of terms in a join is limited by the number of bits
244 ** in prereqRight and prereqAll.  The default is 64 bits, hence SQLite
245 ** is only able to process joins with 64 or fewer tables.
246 */
247 struct WhereTerm {
248   Expr *pExpr;            /* Pointer to the subexpression that is this term */
249   int iParent;            /* Disable pWC->a[iParent] when this term disabled */
250   int leftCursor;         /* Cursor number of X in "X <op> <expr>" */
251   union {
252     int leftColumn;         /* Column number of X in "X <op> <expr>" */
253     WhereOrInfo *pOrInfo;   /* Extra information if (eOperator & WO_OR)!=0 */
254     WhereAndInfo *pAndInfo; /* Extra information if (eOperator& WO_AND)!=0 */
255   } u;
256   LogEst truthProb;       /* Probability of truth for this expression */
257   u16 eOperator;          /* A WO_xx value describing <op> */
258   u16 wtFlags;            /* TERM_xxx bit flags.  See below */
259   u8 nChild;              /* Number of children that must disable us */
260   WhereClause *pWC;       /* The clause this term is part of */
261   Bitmask prereqRight;    /* Bitmask of tables used by pExpr->pRight */
262   Bitmask prereqAll;      /* Bitmask of tables referenced by pExpr */
263 };
264 
265 /*
266 ** Allowed values of WhereTerm.wtFlags
267 */
268 #define TERM_DYNAMIC    0x01   /* Need to call sqlite3ExprDelete(db, pExpr) */
269 #define TERM_VIRTUAL    0x02   /* Added by the optimizer.  Do not code */
270 #define TERM_CODED      0x04   /* This term is already coded */
271 #define TERM_COPIED     0x08   /* Has a child */
272 #define TERM_ORINFO     0x10   /* Need to free the WhereTerm.u.pOrInfo object */
273 #define TERM_ANDINFO    0x20   /* Need to free the WhereTerm.u.pAndInfo obj */
274 #define TERM_OR_OK      0x40   /* Used during OR-clause processing */
275 #ifdef SQLITE_ENABLE_STAT3_OR_STAT4
276 #  define TERM_VNULL    0x80   /* Manufactured x>NULL or x<=NULL term */
277 #else
278 #  define TERM_VNULL    0x00   /* Disabled if not using stat3 */
279 #endif
280 #define TERM_LIKEOPT    0x100  /* Virtual terms from the LIKE optimization */
281 #define TERM_LIKECOND   0x200  /* Conditionally this LIKE operator term */
282 #define TERM_LIKE       0x400  /* The original LIKE operator */
283 
284 /*
285 ** An instance of the WhereScan object is used as an iterator for locating
286 ** terms in the WHERE clause that are useful to the query planner.
287 */
288 struct WhereScan {
289   WhereClause *pOrigWC;      /* Original, innermost WhereClause */
290   WhereClause *pWC;          /* WhereClause currently being scanned */
291   char *zCollName;           /* Required collating sequence, if not NULL */
292   char idxaff;               /* Must match this affinity, if zCollName!=NULL */
293   unsigned char nEquiv;      /* Number of entries in aEquiv[] */
294   unsigned char iEquiv;      /* Next unused slot in aEquiv[] */
295   u32 opMask;                /* Acceptable operators */
296   int k;                     /* Resume scanning at this->pWC->a[this->k] */
297   int aEquiv[22];            /* Cursor,Column pairs for equivalence classes */
298 };
299 
300 /*
301 ** An instance of the following structure holds all information about a
302 ** WHERE clause.  Mostly this is a container for one or more WhereTerms.
303 **
304 ** Explanation of pOuter:  For a WHERE clause of the form
305 **
306 **           a AND ((b AND c) OR (d AND e)) AND f
307 **
308 ** There are separate WhereClause objects for the whole clause and for
309 ** the subclauses "(b AND c)" and "(d AND e)".  The pOuter field of the
310 ** subclauses points to the WhereClause object for the whole clause.
311 */
312 struct WhereClause {
313   WhereInfo *pWInfo;       /* WHERE clause processing context */
314   WhereClause *pOuter;     /* Outer conjunction */
315   u8 op;                   /* Split operator.  TK_AND or TK_OR */
316   int nTerm;               /* Number of terms */
317   int nSlot;               /* Number of entries in a[] */
318   WhereTerm *a;            /* Each a[] describes a term of the WHERE cluase */
319 #if defined(SQLITE_SMALL_STACK)
320   WhereTerm aStatic[1];    /* Initial static space for a[] */
321 #else
322   WhereTerm aStatic[8];    /* Initial static space for a[] */
323 #endif
324 };
325 
326 /*
327 ** A WhereTerm with eOperator==WO_OR has its u.pOrInfo pointer set to
328 ** a dynamically allocated instance of the following structure.
329 */
330 struct WhereOrInfo {
331   WhereClause wc;          /* Decomposition into subterms */
332   Bitmask indexable;       /* Bitmask of all indexable tables in the clause */
333 };
334 
335 /*
336 ** A WhereTerm with eOperator==WO_AND has its u.pAndInfo pointer set to
337 ** a dynamically allocated instance of the following structure.
338 */
339 struct WhereAndInfo {
340   WhereClause wc;          /* The subexpression broken out */
341 };
342 
343 /*
344 ** An instance of the following structure keeps track of a mapping
345 ** between VDBE cursor numbers and bits of the bitmasks in WhereTerm.
346 **
347 ** The VDBE cursor numbers are small integers contained in
348 ** SrcList_item.iCursor and Expr.iTable fields.  For any given WHERE
349 ** clause, the cursor numbers might not begin with 0 and they might
350 ** contain gaps in the numbering sequence.  But we want to make maximum
351 ** use of the bits in our bitmasks.  This structure provides a mapping
352 ** from the sparse cursor numbers into consecutive integers beginning
353 ** with 0.
354 **
355 ** If WhereMaskSet.ix[A]==B it means that The A-th bit of a Bitmask
356 ** corresponds VDBE cursor number B.  The A-th bit of a bitmask is 1<<A.
357 **
358 ** For example, if the WHERE clause expression used these VDBE
359 ** cursors:  4, 5, 8, 29, 57, 73.  Then the  WhereMaskSet structure
360 ** would map those cursor numbers into bits 0 through 5.
361 **
362 ** Note that the mapping is not necessarily ordered.  In the example
363 ** above, the mapping might go like this:  4->3, 5->1, 8->2, 29->0,
364 ** 57->5, 73->4.  Or one of 719 other combinations might be used. It
365 ** does not really matter.  What is important is that sparse cursor
366 ** numbers all get mapped into bit numbers that begin with 0 and contain
367 ** no gaps.
368 */
369 struct WhereMaskSet {
370   int n;                        /* Number of assigned cursor values */
371   int ix[BMS];                  /* Cursor assigned to each bit */
372 };
373 
374 /*
375 ** This object is a convenience wrapper holding all information needed
376 ** to construct WhereLoop objects for a particular query.
377 */
378 struct WhereLoopBuilder {
379   WhereInfo *pWInfo;        /* Information about this WHERE */
380   WhereClause *pWC;         /* WHERE clause terms */
381   ExprList *pOrderBy;       /* ORDER BY clause */
382   WhereLoop *pNew;          /* Template WhereLoop */
383   WhereOrSet *pOrSet;       /* Record best loops here, if not NULL */
384 #ifdef SQLITE_ENABLE_STAT3_OR_STAT4
385   UnpackedRecord *pRec;     /* Probe for stat4 (if required) */
386   int nRecValid;            /* Number of valid fields currently in pRec */
387 #endif
388 };
389 
390 /*
391 ** The WHERE clause processing routine has two halves.  The
392 ** first part does the start of the WHERE loop and the second
393 ** half does the tail of the WHERE loop.  An instance of
394 ** this structure is returned by the first half and passed
395 ** into the second half to give some continuity.
396 **
397 ** An instance of this object holds the complete state of the query
398 ** planner.
399 */
400 struct WhereInfo {
401   Parse *pParse;            /* Parsing and code generating context */
402   SrcList *pTabList;        /* List of tables in the join */
403   ExprList *pOrderBy;       /* The ORDER BY clause or NULL */
404   ExprList *pResultSet;     /* Result set. DISTINCT operates on these */
405   WhereLoop *pLoops;        /* List of all WhereLoop objects */
406   Bitmask revMask;          /* Mask of ORDER BY terms that need reversing */
407   LogEst nRowOut;           /* Estimated number of output rows */
408   u16 wctrlFlags;           /* Flags originally passed to sqlite3WhereBegin() */
409   i8 nOBSat;                /* Number of ORDER BY terms satisfied by indices */
410   u8 sorted;                /* True if really sorted (not just grouped) */
411   u8 okOnePass;             /* Ok to use one-pass algorithm for UPDATE/DELETE */
412   u8 untestedTerms;         /* Not all WHERE terms resolved by outer loop */
413   u8 eDistinct;             /* One of the WHERE_DISTINCT_* values below */
414   u8 nLevel;                /* Number of nested loop */
415   int iTop;                 /* The very beginning of the WHERE loop */
416   int iContinue;            /* Jump here to continue with next record */
417   int iBreak;               /* Jump here to break out of the loop */
418   int savedNQueryLoop;      /* pParse->nQueryLoop outside the WHERE loop */
419   int aiCurOnePass[2];      /* OP_OpenWrite cursors for the ONEPASS opt */
420   WhereMaskSet sMaskSet;    /* Map cursor numbers to bitmasks */
421   WhereClause sWC;          /* Decomposition of the WHERE clause */
422   WhereLevel a[1];          /* Information about each nest loop in WHERE */
423 };
424 
425 /*
426 ** Bitmasks for the operators on WhereTerm objects.  These are all
427 ** operators that are of interest to the query planner.  An
428 ** OR-ed combination of these values can be used when searching for
429 ** particular WhereTerms within a WhereClause.
430 */
431 #define WO_IN     0x001
432 #define WO_EQ     0x002
433 #define WO_LT     (WO_EQ<<(TK_LT-TK_EQ))
434 #define WO_LE     (WO_EQ<<(TK_LE-TK_EQ))
435 #define WO_GT     (WO_EQ<<(TK_GT-TK_EQ))
436 #define WO_GE     (WO_EQ<<(TK_GE-TK_EQ))
437 #define WO_MATCH  0x040
438 #define WO_ISNULL 0x080
439 #define WO_OR     0x100       /* Two or more OR-connected terms */
440 #define WO_AND    0x200       /* Two or more AND-connected terms */
441 #define WO_EQUIV  0x400       /* Of the form A==B, both columns */
442 #define WO_NOOP   0x800       /* This term does not restrict search space */
443 
444 #define WO_ALL    0xfff       /* Mask of all possible WO_* values */
445 #define WO_SINGLE 0x0ff       /* Mask of all non-compound WO_* values */
446 
447 /*
448 ** These are definitions of bits in the WhereLoop.wsFlags field.
449 ** The particular combination of bits in each WhereLoop help to
450 ** determine the algorithm that WhereLoop represents.
451 */
452 #define WHERE_COLUMN_EQ    0x00000001  /* x=EXPR */
453 #define WHERE_COLUMN_RANGE 0x00000002  /* x<EXPR and/or x>EXPR */
454 #define WHERE_COLUMN_IN    0x00000004  /* x IN (...) */
455 #define WHERE_COLUMN_NULL  0x00000008  /* x IS NULL */
456 #define WHERE_CONSTRAINT   0x0000000f  /* Any of the WHERE_COLUMN_xxx values */
457 #define WHERE_TOP_LIMIT    0x00000010  /* x<EXPR or x<=EXPR constraint */
458 #define WHERE_BTM_LIMIT    0x00000020  /* x>EXPR or x>=EXPR constraint */
459 #define WHERE_BOTH_LIMIT   0x00000030  /* Both x>EXPR and x<EXPR */
460 #define WHERE_IDX_ONLY     0x00000040  /* Use index only - omit table */
461 #define WHERE_IPK          0x00000100  /* x is the INTEGER PRIMARY KEY */
462 #define WHERE_INDEXED      0x00000200  /* WhereLoop.u.btree.pIndex is valid */
463 #define WHERE_VIRTUALTABLE 0x00000400  /* WhereLoop.u.vtab is valid */
464 #define WHERE_IN_ABLE      0x00000800  /* Able to support an IN operator */
465 #define WHERE_ONEROW       0x00001000  /* Selects no more than one row */
466 #define WHERE_MULTI_OR     0x00002000  /* OR using multiple indices */
467 #define WHERE_AUTO_INDEX   0x00004000  /* Uses an ephemeral index */
468 #define WHERE_SKIPSCAN     0x00008000  /* Uses the skip-scan algorithm */
469 #define WHERE_UNQ_WANTED   0x00010000  /* WHERE_ONEROW would have been helpful*/
470 #define WHERE_PARTIALIDX   0x00020000  /* The automatic index is partial */
471