xref: /sqlite-3.40.0/src/dbstat.c (revision 6bcaba70)
1 /*
2 ** 2010 July 12
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 **
13 ** This file contains an implementation of the "dbstat" virtual table.
14 **
15 ** The dbstat virtual table is used to extract low-level storage
16 ** information from an SQLite database in order to implement the
17 ** "sqlite3_analyzer" utility.  See the ../tool/spaceanal.tcl script
18 ** for an example implementation.
19 **
20 ** Additional information is available on the "dbstat.html" page of the
21 ** official SQLite documentation.
22 */
23 
24 #include "sqliteInt.h"   /* Requires access to internal data structures */
25 #if (defined(SQLITE_ENABLE_DBSTAT_VTAB) || defined(SQLITE_TEST)) \
26     && !defined(SQLITE_OMIT_VIRTUALTABLE)
27 
28 /*
29 ** Page paths:
30 **
31 **   The value of the 'path' column describes the path taken from the
32 **   root-node of the b-tree structure to each page. The value of the
33 **   root-node path is '/'.
34 **
35 **   The value of the path for the left-most child page of the root of
36 **   a b-tree is '/000/'. (Btrees store content ordered from left to right
37 **   so the pages to the left have smaller keys than the pages to the right.)
38 **   The next to left-most child of the root page is
39 **   '/001', and so on, each sibling page identified by a 3-digit hex
40 **   value. The children of the 451st left-most sibling have paths such
41 **   as '/1c2/000/, '/1c2/001/' etc.
42 **
43 **   Overflow pages are specified by appending a '+' character and a
44 **   six-digit hexadecimal value to the path to the cell they are linked
45 **   from. For example, the three overflow pages in a chain linked from
46 **   the left-most cell of the 450th child of the root page are identified
47 **   by the paths:
48 **
49 **      '/1c2/000+000000'         // First page in overflow chain
50 **      '/1c2/000+000001'         // Second page in overflow chain
51 **      '/1c2/000+000002'         // Third page in overflow chain
52 **
53 **   If the paths are sorted using the BINARY collation sequence, then
54 **   the overflow pages associated with a cell will appear earlier in the
55 **   sort-order than its child page:
56 **
57 **      '/1c2/000/'               // Left-most child of 451st child of root
58 */
59 static const char zDbstatSchema[] =
60   "CREATE TABLE x("
61   " name       TEXT,"          /*  0 Name of table or index */
62   " path       TEXT,"          /*  1 Path to page from root (NULL for agg) */
63   " pageno     INTEGER,"       /*  2 Page number (page count for aggregates) */
64   " pagetype   TEXT,"          /*  3 'internal', 'leaf', 'overflow', or NULL */
65   " ncell      INTEGER,"       /*  4 Cells on page (0 for overflow) */
66   " payload    INTEGER,"       /*  5 Bytes of payload on this page */
67   " unused     INTEGER,"       /*  6 Bytes of unused space on this page */
68   " mx_payload INTEGER,"       /*  7 Largest payload size of all cells */
69   " pgoffset   INTEGER,"       /*  8 Offset of page in file (NULL for agg) */
70   " pgsize     INTEGER,"       /*  9 Size of the page (sum for aggregate) */
71   " schema     TEXT HIDDEN,"   /* 10 Database schema being analyzed */
72   " aggregate  BOOLEAN HIDDEN" /* 11 aggregate info for each table */
73   ")"
74 ;
75 
76 /* Forward reference to data structured used in this module */
77 typedef struct StatTable StatTable;
78 typedef struct StatCursor StatCursor;
79 typedef struct StatPage StatPage;
80 typedef struct StatCell StatCell;
81 
82 /* Size information for a single cell within a btree page */
83 struct StatCell {
84   int nLocal;                     /* Bytes of local payload */
85   u32 iChildPg;                   /* Child node (or 0 if this is a leaf) */
86   int nOvfl;                      /* Entries in aOvfl[] */
87   u32 *aOvfl;                     /* Array of overflow page numbers */
88   int nLastOvfl;                  /* Bytes of payload on final overflow page */
89   int iOvfl;                      /* Iterates through aOvfl[] */
90 };
91 
92 /* Size information for a single btree page */
93 struct StatPage {
94   u32 iPgno;                      /* Page number */
95   u8 *aPg;                        /* Page buffer from sqlite3_malloc() */
96   int iCell;                      /* Current cell */
97   char *zPath;                    /* Path to this page */
98 
99   /* Variables populated by statDecodePage(): */
100   u8 flags;                       /* Copy of flags byte */
101   int nCell;                      /* Number of cells on page */
102   int nUnused;                    /* Number of unused bytes on page */
103   StatCell *aCell;                /* Array of parsed cells */
104   u32 iRightChildPg;              /* Right-child page number (or 0) */
105   int nMxPayload;                 /* Largest payload of any cell on the page */
106 };
107 
108 /* The cursor for scanning the dbstat virtual table */
109 struct StatCursor {
110   sqlite3_vtab_cursor base;       /* base class.  MUST BE FIRST! */
111   sqlite3_stmt *pStmt;            /* Iterates through set of root pages */
112   u8 isEof;                       /* After pStmt has returned SQLITE_DONE */
113   u8 isAgg;                       /* Aggregate results for each table */
114   int iDb;                        /* Schema used for this query */
115 
116   StatPage aPage[32];             /* Pages in path to current page */
117   int iPage;                      /* Current entry in aPage[] */
118 
119   /* Values to return. */
120   u32 iPageno;                    /* Value of 'pageno' column */
121   char *zName;                    /* Value of 'name' column */
122   char *zPath;                    /* Value of 'path' column */
123   char *zPagetype;                /* Value of 'pagetype' column */
124   int nPage;                      /* Number of pages in current btree */
125   int nCell;                      /* Value of 'ncell' column */
126   int nMxPayload;                 /* Value of 'mx_payload' column */
127   i64 nUnused;                    /* Value of 'unused' column */
128   i64 nPayload;                   /* Value of 'payload' column */
129   i64 iOffset;                    /* Value of 'pgOffset' column */
130   i64 szPage;                     /* Value of 'pgSize' column */
131 };
132 
133 /* An instance of the DBSTAT virtual table */
134 struct StatTable {
135   sqlite3_vtab base;              /* base class.  MUST BE FIRST! */
136   sqlite3 *db;                    /* Database connection that owns this vtab */
137   int iDb;                        /* Index of database to analyze */
138 };
139 
140 #ifndef get2byte
141 # define get2byte(x)   ((x)[0]<<8 | (x)[1])
142 #endif
143 
144 /*
145 ** Connect to or create a new DBSTAT virtual table.
146 */
147 static int statConnect(
148   sqlite3 *db,
149   void *pAux,
150   int argc, const char *const*argv,
151   sqlite3_vtab **ppVtab,
152   char **pzErr
153 ){
154   StatTable *pTab = 0;
155   int rc = SQLITE_OK;
156   int iDb;
157 
158   if( argc>=4 ){
159     Token nm;
160     sqlite3TokenInit(&nm, (char*)argv[3]);
161     iDb = sqlite3FindDb(db, &nm);
162     if( iDb<0 ){
163       *pzErr = sqlite3_mprintf("no such database: %s", argv[3]);
164       return SQLITE_ERROR;
165     }
166   }else{
167     iDb = 0;
168   }
169   sqlite3_vtab_config(db, SQLITE_VTAB_DIRECTONLY);
170   rc = sqlite3_declare_vtab(db, zDbstatSchema);
171   if( rc==SQLITE_OK ){
172     pTab = (StatTable *)sqlite3_malloc64(sizeof(StatTable));
173     if( pTab==0 ) rc = SQLITE_NOMEM_BKPT;
174   }
175 
176   assert( rc==SQLITE_OK || pTab==0 );
177   if( rc==SQLITE_OK ){
178     memset(pTab, 0, sizeof(StatTable));
179     pTab->db = db;
180     pTab->iDb = iDb;
181   }
182 
183   *ppVtab = (sqlite3_vtab*)pTab;
184   return rc;
185 }
186 
187 /*
188 ** Disconnect from or destroy the DBSTAT virtual table.
189 */
190 static int statDisconnect(sqlite3_vtab *pVtab){
191   sqlite3_free(pVtab);
192   return SQLITE_OK;
193 }
194 
195 /*
196 ** Compute the best query strategy and return the result in idxNum.
197 **
198 **   idxNum-Bit        Meaning
199 **   ----------        ----------------------------------------------
200 **      0x01           There is a schema=? term in the WHERE clause
201 **      0x02           There is a name=? term in the WHERE clause
202 **      0x04           There is an aggregate=? term in the WHERE clause
203 **      0x08           Output should be ordered by name and path
204 */
205 static int statBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){
206   int i;
207   int iSchema = -1;
208   int iName = -1;
209   int iAgg = -1;
210 
211   /* Look for a valid schema=? constraint.  If found, change the idxNum to
212   ** 1 and request the value of that constraint be sent to xFilter.  And
213   ** lower the cost estimate to encourage the constrained version to be
214   ** used.
215   */
216   for(i=0; i<pIdxInfo->nConstraint; i++){
217     if( pIdxInfo->aConstraint[i].op!=SQLITE_INDEX_CONSTRAINT_EQ ) continue;
218     if( pIdxInfo->aConstraint[i].usable==0 ){
219       /* Force DBSTAT table should always be the right-most table in a join */
220       return SQLITE_CONSTRAINT;
221     }
222     switch( pIdxInfo->aConstraint[i].iColumn ){
223       case 0: {    /* name */
224         iName = i;
225         break;
226       }
227       case 10: {   /* schema */
228         iSchema = i;
229         break;
230       }
231       case 11: {   /* aggregate */
232         iAgg = i;
233         break;
234       }
235     }
236   }
237   i = 0;
238   if( iSchema>=0 ){
239     pIdxInfo->aConstraintUsage[iSchema].argvIndex = ++i;
240     pIdxInfo->aConstraintUsage[iSchema].omit = 1;
241     pIdxInfo->idxNum |= 0x01;
242   }
243   if( iName>=0 ){
244     pIdxInfo->aConstraintUsage[iName].argvIndex = ++i;
245     pIdxInfo->idxNum |= 0x02;
246   }
247   if( iAgg>=0 ){
248     pIdxInfo->aConstraintUsage[iAgg].argvIndex = ++i;
249     pIdxInfo->idxNum |= 0x04;
250   }
251   pIdxInfo->estimatedCost = 1.0;
252 
253   /* Records are always returned in ascending order of (name, path).
254   ** If this will satisfy the client, set the orderByConsumed flag so that
255   ** SQLite does not do an external sort.
256   */
257   if( ( pIdxInfo->nOrderBy==1
258      && pIdxInfo->aOrderBy[0].iColumn==0
259      && pIdxInfo->aOrderBy[0].desc==0
260      ) ||
261       ( pIdxInfo->nOrderBy==2
262      && pIdxInfo->aOrderBy[0].iColumn==0
263      && pIdxInfo->aOrderBy[0].desc==0
264      && pIdxInfo->aOrderBy[1].iColumn==1
265      && pIdxInfo->aOrderBy[1].desc==0
266      )
267   ){
268     pIdxInfo->orderByConsumed = 1;
269     pIdxInfo->idxNum |= 0x08;
270   }
271 
272   return SQLITE_OK;
273 }
274 
275 /*
276 ** Open a new DBSTAT cursor.
277 */
278 static int statOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
279   StatTable *pTab = (StatTable *)pVTab;
280   StatCursor *pCsr;
281 
282   pCsr = (StatCursor *)sqlite3_malloc64(sizeof(StatCursor));
283   if( pCsr==0 ){
284     return SQLITE_NOMEM_BKPT;
285   }else{
286     memset(pCsr, 0, sizeof(StatCursor));
287     pCsr->base.pVtab = pVTab;
288     pCsr->iDb = pTab->iDb;
289   }
290 
291   *ppCursor = (sqlite3_vtab_cursor *)pCsr;
292   return SQLITE_OK;
293 }
294 
295 static void statClearCells(StatPage *p){
296   int i;
297   if( p->aCell ){
298     for(i=0; i<p->nCell; i++){
299       sqlite3_free(p->aCell[i].aOvfl);
300     }
301     sqlite3_free(p->aCell);
302   }
303   p->nCell = 0;
304   p->aCell = 0;
305 }
306 
307 static void statClearPage(StatPage *p){
308   u8 *aPg = p->aPg;
309   statClearCells(p);
310   sqlite3_free(p->zPath);
311   memset(p, 0, sizeof(StatPage));
312   p->aPg = aPg;
313 }
314 
315 static void statResetCsr(StatCursor *pCsr){
316   int i;
317   /* In some circumstances, specifically if an OOM has occurred, the call
318   ** to sqlite3_reset() may cause the pager to be reset (emptied). It is
319   ** important that statClearPage() is called to free any page refs before
320   ** this happens. dbsqlfuzz 9ed3e4e3816219d3509d711636c38542bf3f40b1. */
321   for(i=0; i<ArraySize(pCsr->aPage); i++){
322     statClearPage(&pCsr->aPage[i]);
323     sqlite3_free(pCsr->aPage[i].aPg);
324     pCsr->aPage[i].aPg = 0;
325   }
326   sqlite3_reset(pCsr->pStmt);
327   pCsr->iPage = 0;
328   sqlite3_free(pCsr->zPath);
329   pCsr->zPath = 0;
330   pCsr->isEof = 0;
331 }
332 
333 /* Resize the space-used counters inside of the cursor */
334 static void statResetCounts(StatCursor *pCsr){
335   pCsr->nCell = 0;
336   pCsr->nMxPayload = 0;
337   pCsr->nUnused = 0;
338   pCsr->nPayload = 0;
339   pCsr->szPage = 0;
340   pCsr->nPage = 0;
341 }
342 
343 /*
344 ** Close a DBSTAT cursor.
345 */
346 static int statClose(sqlite3_vtab_cursor *pCursor){
347   StatCursor *pCsr = (StatCursor *)pCursor;
348   statResetCsr(pCsr);
349   sqlite3_finalize(pCsr->pStmt);
350   sqlite3_free(pCsr);
351   return SQLITE_OK;
352 }
353 
354 /*
355 ** For a single cell on a btree page, compute the number of bytes of
356 ** content (payload) stored on that page.  That is to say, compute the
357 ** number of bytes of content not found on overflow pages.
358 */
359 static int getLocalPayload(
360   int nUsable,                    /* Usable bytes per page */
361   u8 flags,                       /* Page flags */
362   int nTotal                      /* Total record (payload) size */
363 ){
364   int nLocal;
365   int nMinLocal;
366   int nMaxLocal;
367 
368   if( flags==0x0D ){              /* Table leaf node */
369     nMinLocal = (nUsable - 12) * 32 / 255 - 23;
370     nMaxLocal = nUsable - 35;
371   }else{                          /* Index interior and leaf nodes */
372     nMinLocal = (nUsable - 12) * 32 / 255 - 23;
373     nMaxLocal = (nUsable - 12) * 64 / 255 - 23;
374   }
375 
376   nLocal = nMinLocal + (nTotal - nMinLocal) % (nUsable - 4);
377   if( nLocal>nMaxLocal ) nLocal = nMinLocal;
378   return nLocal;
379 }
380 
381 /* Populate the StatPage object with information about the all
382 ** cells found on the page currently under analysis.
383 */
384 static int statDecodePage(Btree *pBt, StatPage *p){
385   int nUnused;
386   int iOff;
387   int nHdr;
388   int isLeaf;
389   int szPage;
390 
391   u8 *aData = p->aPg;
392   u8 *aHdr = &aData[p->iPgno==1 ? 100 : 0];
393 
394   p->flags = aHdr[0];
395   if( p->flags==0x0A || p->flags==0x0D ){
396     isLeaf = 1;
397     nHdr = 8;
398   }else if( p->flags==0x05 || p->flags==0x02 ){
399     isLeaf = 0;
400     nHdr = 12;
401   }else{
402     goto statPageIsCorrupt;
403   }
404   if( p->iPgno==1 ) nHdr += 100;
405   p->nCell = get2byte(&aHdr[3]);
406   p->nMxPayload = 0;
407   szPage = sqlite3BtreeGetPageSize(pBt);
408 
409   nUnused = get2byte(&aHdr[5]) - nHdr - 2*p->nCell;
410   nUnused += (int)aHdr[7];
411   iOff = get2byte(&aHdr[1]);
412   while( iOff ){
413     int iNext;
414     if( iOff>=szPage ) goto statPageIsCorrupt;
415     nUnused += get2byte(&aData[iOff+2]);
416     iNext = get2byte(&aData[iOff]);
417     if( iNext<iOff+4 && iNext>0 ) goto statPageIsCorrupt;
418     iOff = iNext;
419   }
420   p->nUnused = nUnused;
421   p->iRightChildPg = isLeaf ? 0 : sqlite3Get4byte(&aHdr[8]);
422 
423   if( p->nCell ){
424     int i;                        /* Used to iterate through cells */
425     int nUsable;                  /* Usable bytes per page */
426 
427     sqlite3BtreeEnter(pBt);
428     nUsable = szPage - sqlite3BtreeGetReserveNoMutex(pBt);
429     sqlite3BtreeLeave(pBt);
430     p->aCell = sqlite3_malloc64((p->nCell+1) * sizeof(StatCell));
431     if( p->aCell==0 ) return SQLITE_NOMEM_BKPT;
432     memset(p->aCell, 0, (p->nCell+1) * sizeof(StatCell));
433 
434     for(i=0; i<p->nCell; i++){
435       StatCell *pCell = &p->aCell[i];
436 
437       iOff = get2byte(&aData[nHdr+i*2]);
438       if( iOff<nHdr || iOff>=szPage ) goto statPageIsCorrupt;
439       if( !isLeaf ){
440         pCell->iChildPg = sqlite3Get4byte(&aData[iOff]);
441         iOff += 4;
442       }
443       if( p->flags==0x05 ){
444         /* A table interior node. nPayload==0. */
445       }else{
446         u32 nPayload;             /* Bytes of payload total (local+overflow) */
447         int nLocal;               /* Bytes of payload stored locally */
448         iOff += getVarint32(&aData[iOff], nPayload);
449         if( p->flags==0x0D ){
450           u64 dummy;
451           iOff += sqlite3GetVarint(&aData[iOff], &dummy);
452         }
453         if( nPayload>(u32)p->nMxPayload ) p->nMxPayload = nPayload;
454         nLocal = getLocalPayload(nUsable, p->flags, nPayload);
455         if( nLocal<0 ) goto statPageIsCorrupt;
456         pCell->nLocal = nLocal;
457         assert( nPayload>=(u32)nLocal );
458         assert( nLocal<=(nUsable-35) );
459         if( nPayload>(u32)nLocal ){
460           int j;
461           int nOvfl = ((nPayload - nLocal) + nUsable-4 - 1) / (nUsable - 4);
462           if( iOff+nLocal>nUsable || nPayload>0x7fffffff ){
463             goto statPageIsCorrupt;
464           }
465           pCell->nLastOvfl = (nPayload-nLocal) - (nOvfl-1) * (nUsable-4);
466           pCell->nOvfl = nOvfl;
467           pCell->aOvfl = sqlite3_malloc64(sizeof(u32)*nOvfl);
468           if( pCell->aOvfl==0 ) return SQLITE_NOMEM_BKPT;
469           pCell->aOvfl[0] = sqlite3Get4byte(&aData[iOff+nLocal]);
470           for(j=1; j<nOvfl; j++){
471             int rc;
472             u32 iPrev = pCell->aOvfl[j-1];
473             DbPage *pPg = 0;
474             rc = sqlite3PagerGet(sqlite3BtreePager(pBt), iPrev, &pPg, 0);
475             if( rc!=SQLITE_OK ){
476               assert( pPg==0 );
477               return rc;
478             }
479             pCell->aOvfl[j] = sqlite3Get4byte(sqlite3PagerGetData(pPg));
480             sqlite3PagerUnref(pPg);
481           }
482         }
483       }
484     }
485   }
486 
487   return SQLITE_OK;
488 
489 statPageIsCorrupt:
490   p->flags = 0;
491   statClearCells(p);
492   return SQLITE_OK;
493 }
494 
495 /*
496 ** Populate the pCsr->iOffset and pCsr->szPage member variables. Based on
497 ** the current value of pCsr->iPageno.
498 */
499 static void statSizeAndOffset(StatCursor *pCsr){
500   StatTable *pTab = (StatTable *)((sqlite3_vtab_cursor *)pCsr)->pVtab;
501   Btree *pBt = pTab->db->aDb[pTab->iDb].pBt;
502   Pager *pPager = sqlite3BtreePager(pBt);
503   sqlite3_file *fd;
504   sqlite3_int64 x[2];
505 
506   /* If connected to a ZIPVFS backend, find the page size and
507   ** offset from ZIPVFS.
508   */
509   fd = sqlite3PagerFile(pPager);
510   x[0] = pCsr->iPageno;
511   if( sqlite3OsFileControl(fd, 230440, &x)==SQLITE_OK ){
512     pCsr->iOffset = x[0];
513     pCsr->szPage += x[1];
514   }else{
515     /* Not ZIPVFS: The default page size and offset */
516     pCsr->szPage += sqlite3BtreeGetPageSize(pBt);
517     pCsr->iOffset = (i64)pCsr->szPage * (pCsr->iPageno - 1);
518   }
519 }
520 
521 /*
522 ** Load a copy of the page data for page iPg into the buffer belonging
523 ** to page object pPg. Allocate the buffer if necessary. Return SQLITE_OK
524 ** if successful, or an SQLite error code otherwise.
525 */
526 static int statGetPage(
527   Btree *pBt,                     /* Load page from this b-tree */
528   u32 iPg,                        /* Page number to load */
529   StatPage *pPg                   /* Load page into this object */
530 ){
531   int pgsz = sqlite3BtreeGetPageSize(pBt);
532   DbPage *pDbPage = 0;
533   int rc;
534 
535   if( pPg->aPg==0 ){
536     pPg->aPg = (u8*)sqlite3_malloc(pgsz);
537     if( pPg->aPg==0 ){
538       return SQLITE_NOMEM_BKPT;
539     }
540   }
541 
542   rc = sqlite3PagerGet(sqlite3BtreePager(pBt), iPg, &pDbPage, 0);
543   if( rc==SQLITE_OK ){
544     const u8 *a = sqlite3PagerGetData(pDbPage);
545     memcpy(pPg->aPg, a, pgsz);
546     sqlite3PagerUnref(pDbPage);
547   }
548 
549   return rc;
550 }
551 
552 /*
553 ** Move a DBSTAT cursor to the next entry.  Normally, the next
554 ** entry will be the next page, but in aggregated mode (pCsr->isAgg!=0),
555 ** the next entry is the next btree.
556 */
557 static int statNext(sqlite3_vtab_cursor *pCursor){
558   int rc;
559   int nPayload;
560   char *z;
561   StatCursor *pCsr = (StatCursor *)pCursor;
562   StatTable *pTab = (StatTable *)pCursor->pVtab;
563   Btree *pBt = pTab->db->aDb[pCsr->iDb].pBt;
564   Pager *pPager = sqlite3BtreePager(pBt);
565 
566   sqlite3_free(pCsr->zPath);
567   pCsr->zPath = 0;
568 
569 statNextRestart:
570   if( pCsr->iPage<0 ){
571     /* Start measuring space on the next btree */
572     statResetCounts(pCsr);
573     rc = sqlite3_step(pCsr->pStmt);
574     if( rc==SQLITE_ROW ){
575       int nPage;
576       u32 iRoot = (u32)sqlite3_column_int64(pCsr->pStmt, 1);
577       sqlite3PagerPagecount(pPager, &nPage);
578       if( nPage==0 ){
579         pCsr->isEof = 1;
580         return sqlite3_reset(pCsr->pStmt);
581       }
582       rc = statGetPage(pBt, iRoot, &pCsr->aPage[0]);
583       pCsr->aPage[0].iPgno = iRoot;
584       pCsr->aPage[0].iCell = 0;
585       if( !pCsr->isAgg ){
586         pCsr->aPage[0].zPath = z = sqlite3_mprintf("/");
587         if( z==0 ) rc = SQLITE_NOMEM_BKPT;
588       }
589       pCsr->iPage = 0;
590       pCsr->nPage = 1;
591     }else{
592       pCsr->isEof = 1;
593       return sqlite3_reset(pCsr->pStmt);
594     }
595   }else{
596     /* Continue analyzing the btree previously started */
597     StatPage *p = &pCsr->aPage[pCsr->iPage];
598     if( !pCsr->isAgg ) statResetCounts(pCsr);
599     while( p->iCell<p->nCell ){
600       StatCell *pCell = &p->aCell[p->iCell];
601       while( pCell->iOvfl<pCell->nOvfl ){
602         int nUsable, iOvfl;
603         sqlite3BtreeEnter(pBt);
604         nUsable = sqlite3BtreeGetPageSize(pBt) -
605                         sqlite3BtreeGetReserveNoMutex(pBt);
606         sqlite3BtreeLeave(pBt);
607         pCsr->nPage++;
608         statSizeAndOffset(pCsr);
609         if( pCell->iOvfl<pCell->nOvfl-1 ){
610           pCsr->nPayload += nUsable - 4;
611         }else{
612           pCsr->nPayload += pCell->nLastOvfl;
613           pCsr->nUnused += nUsable - 4 - pCell->nLastOvfl;
614         }
615         iOvfl = pCell->iOvfl;
616         pCell->iOvfl++;
617         if( !pCsr->isAgg ){
618           pCsr->zName = (char *)sqlite3_column_text(pCsr->pStmt, 0);
619           pCsr->iPageno = pCell->aOvfl[iOvfl];
620           pCsr->zPagetype = "overflow";
621           pCsr->zPath = z = sqlite3_mprintf(
622               "%s%.3x+%.6x", p->zPath, p->iCell, iOvfl
623           );
624           return z==0 ? SQLITE_NOMEM_BKPT : SQLITE_OK;
625         }
626       }
627       if( p->iRightChildPg ) break;
628       p->iCell++;
629     }
630 
631     if( !p->iRightChildPg || p->iCell>p->nCell ){
632       statClearPage(p);
633       pCsr->iPage--;
634       if( pCsr->isAgg && pCsr->iPage<0 ){
635         /* label-statNext-done:  When computing aggregate space usage over
636         ** an entire btree, this is the exit point from this function */
637         return SQLITE_OK;
638       }
639       goto statNextRestart; /* Tail recursion */
640     }
641     pCsr->iPage++;
642     if( pCsr->iPage>=ArraySize(pCsr->aPage) ){
643       statResetCsr(pCsr);
644       return SQLITE_CORRUPT_BKPT;
645     }
646     assert( p==&pCsr->aPage[pCsr->iPage-1] );
647 
648     if( p->iCell==p->nCell ){
649       p[1].iPgno = p->iRightChildPg;
650     }else{
651       p[1].iPgno = p->aCell[p->iCell].iChildPg;
652     }
653     rc = statGetPage(pBt, p[1].iPgno, &p[1]);
654     pCsr->nPage++;
655     p[1].iCell = 0;
656     if( !pCsr->isAgg ){
657       p[1].zPath = z = sqlite3_mprintf("%s%.3x/", p->zPath, p->iCell);
658       if( z==0 ) rc = SQLITE_NOMEM_BKPT;
659     }
660     p->iCell++;
661   }
662 
663 
664   /* Populate the StatCursor fields with the values to be returned
665   ** by the xColumn() and xRowid() methods.
666   */
667   if( rc==SQLITE_OK ){
668     int i;
669     StatPage *p = &pCsr->aPage[pCsr->iPage];
670     pCsr->zName = (char *)sqlite3_column_text(pCsr->pStmt, 0);
671     pCsr->iPageno = p->iPgno;
672 
673     rc = statDecodePage(pBt, p);
674     if( rc==SQLITE_OK ){
675       statSizeAndOffset(pCsr);
676 
677       switch( p->flags ){
678         case 0x05:             /* table internal */
679         case 0x02:             /* index internal */
680           pCsr->zPagetype = "internal";
681           break;
682         case 0x0D:             /* table leaf */
683         case 0x0A:             /* index leaf */
684           pCsr->zPagetype = "leaf";
685           break;
686         default:
687           pCsr->zPagetype = "corrupted";
688           break;
689       }
690       pCsr->nCell += p->nCell;
691       pCsr->nUnused += p->nUnused;
692       if( p->nMxPayload>pCsr->nMxPayload ) pCsr->nMxPayload = p->nMxPayload;
693       if( !pCsr->isAgg ){
694         pCsr->zPath = z = sqlite3_mprintf("%s", p->zPath);
695         if( z==0 ) rc = SQLITE_NOMEM_BKPT;
696       }
697       nPayload = 0;
698       for(i=0; i<p->nCell; i++){
699         nPayload += p->aCell[i].nLocal;
700       }
701       pCsr->nPayload += nPayload;
702 
703       /* If computing aggregate space usage by btree, continue with the
704       ** next page.  The loop will exit via the return at label-statNext-done
705       */
706       if( pCsr->isAgg ) goto statNextRestart;
707     }
708   }
709 
710   return rc;
711 }
712 
713 static int statEof(sqlite3_vtab_cursor *pCursor){
714   StatCursor *pCsr = (StatCursor *)pCursor;
715   return pCsr->isEof;
716 }
717 
718 /* Initialize a cursor according to the query plan idxNum using the
719 ** arguments in argv[0].  See statBestIndex() for a description of the
720 ** meaning of the bits in idxNum.
721 */
722 static int statFilter(
723   sqlite3_vtab_cursor *pCursor,
724   int idxNum, const char *idxStr,
725   int argc, sqlite3_value **argv
726 ){
727   StatCursor *pCsr = (StatCursor *)pCursor;
728   StatTable *pTab = (StatTable*)(pCursor->pVtab);
729   sqlite3_str *pSql;      /* Query of btrees to analyze */
730   char *zSql;             /* String value of pSql */
731   int iArg = 0;           /* Count of argv[] parameters used so far */
732   int rc = SQLITE_OK;     /* Result of this operation */
733   const char *zName = 0;  /* Only provide analysis of this table */
734 
735   statResetCsr(pCsr);
736   sqlite3_finalize(pCsr->pStmt);
737   pCsr->pStmt = 0;
738   if( idxNum & 0x01 ){
739     /* schema=? constraint is present.  Get its value */
740     const char *zDbase = (const char*)sqlite3_value_text(argv[iArg++]);
741     pCsr->iDb = sqlite3FindDbName(pTab->db, zDbase);
742     if( pCsr->iDb<0 ){
743       pCsr->iDb = 0;
744       pCsr->isEof = 1;
745       return SQLITE_OK;
746     }
747   }else{
748     pCsr->iDb = pTab->iDb;
749   }
750   if( idxNum & 0x02 ){
751     /* name=? constraint is present */
752     zName = (const char*)sqlite3_value_text(argv[iArg++]);
753   }
754   if( idxNum & 0x04 ){
755     /* aggregate=? constraint is present */
756     pCsr->isAgg = sqlite3_value_double(argv[iArg++])!=0.0;
757   }else{
758     pCsr->isAgg = 0;
759   }
760   pSql = sqlite3_str_new(pTab->db);
761   sqlite3_str_appendf(pSql,
762       "SELECT * FROM ("
763         "SELECT 'sqlite_schema' AS name,1 AS rootpage,'table' AS type"
764         " UNION ALL "
765         "SELECT name,rootpage,type"
766         " FROM \"%w\".sqlite_schema WHERE rootpage!=0)",
767       pTab->db->aDb[pCsr->iDb].zDbSName);
768   if( zName ){
769     sqlite3_str_appendf(pSql, "WHERE name=%Q", zName);
770   }
771   if( idxNum & 0x08 ){
772     sqlite3_str_appendf(pSql, " ORDER BY name");
773   }
774   zSql = sqlite3_str_finish(pSql);
775   if( zSql==0 ){
776     return SQLITE_NOMEM_BKPT;
777   }else{
778     rc = sqlite3_prepare_v2(pTab->db, zSql, -1, &pCsr->pStmt, 0);
779     sqlite3_free(zSql);
780   }
781 
782   if( rc==SQLITE_OK ){
783     pCsr->iPage = -1;
784     rc = statNext(pCursor);
785   }
786   return rc;
787 }
788 
789 static int statColumn(
790   sqlite3_vtab_cursor *pCursor,
791   sqlite3_context *ctx,
792   int i
793 ){
794   StatCursor *pCsr = (StatCursor *)pCursor;
795   switch( i ){
796     case 0:            /* name */
797       sqlite3_result_text(ctx, pCsr->zName, -1, SQLITE_TRANSIENT);
798       break;
799     case 1:            /* path */
800       if( !pCsr->isAgg ){
801         sqlite3_result_text(ctx, pCsr->zPath, -1, SQLITE_TRANSIENT);
802       }
803       break;
804     case 2:            /* pageno */
805       if( pCsr->isAgg ){
806         sqlite3_result_int64(ctx, pCsr->nPage);
807       }else{
808         sqlite3_result_int64(ctx, pCsr->iPageno);
809       }
810       break;
811     case 3:            /* pagetype */
812       if( !pCsr->isAgg ){
813         sqlite3_result_text(ctx, pCsr->zPagetype, -1, SQLITE_STATIC);
814       }
815       break;
816     case 4:            /* ncell */
817       sqlite3_result_int(ctx, pCsr->nCell);
818       break;
819     case 5:            /* payload */
820       sqlite3_result_int(ctx, pCsr->nPayload);
821       break;
822     case 6:            /* unused */
823       sqlite3_result_int(ctx, pCsr->nUnused);
824       break;
825     case 7:            /* mx_payload */
826       sqlite3_result_int(ctx, pCsr->nMxPayload);
827       break;
828     case 8:            /* pgoffset */
829       if( !pCsr->isAgg ){
830         sqlite3_result_int64(ctx, pCsr->iOffset);
831       }
832       break;
833     case 9:            /* pgsize */
834       sqlite3_result_int(ctx, pCsr->szPage);
835       break;
836     case 10: {         /* schema */
837       sqlite3 *db = sqlite3_context_db_handle(ctx);
838       int iDb = pCsr->iDb;
839       sqlite3_result_text(ctx, db->aDb[iDb].zDbSName, -1, SQLITE_STATIC);
840       break;
841     }
842     default: {         /* aggregate */
843       sqlite3_result_int(ctx, pCsr->isAgg);
844       break;
845     }
846   }
847   return SQLITE_OK;
848 }
849 
850 static int statRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
851   StatCursor *pCsr = (StatCursor *)pCursor;
852   *pRowid = pCsr->iPageno;
853   return SQLITE_OK;
854 }
855 
856 /*
857 ** Invoke this routine to register the "dbstat" virtual table module
858 */
859 int sqlite3DbstatRegister(sqlite3 *db){
860   static sqlite3_module dbstat_module = {
861     0,                            /* iVersion */
862     statConnect,                  /* xCreate */
863     statConnect,                  /* xConnect */
864     statBestIndex,                /* xBestIndex */
865     statDisconnect,               /* xDisconnect */
866     statDisconnect,               /* xDestroy */
867     statOpen,                     /* xOpen - open a cursor */
868     statClose,                    /* xClose - close a cursor */
869     statFilter,                   /* xFilter - configure scan constraints */
870     statNext,                     /* xNext - advance a cursor */
871     statEof,                      /* xEof - check for end of scan */
872     statColumn,                   /* xColumn - read data */
873     statRowid,                    /* xRowid - read data */
874     0,                            /* xUpdate */
875     0,                            /* xBegin */
876     0,                            /* xSync */
877     0,                            /* xCommit */
878     0,                            /* xRollback */
879     0,                            /* xFindMethod */
880     0,                            /* xRename */
881     0,                            /* xSavepoint */
882     0,                            /* xRelease */
883     0,                            /* xRollbackTo */
884     0                             /* xShadowName */
885   };
886   return sqlite3_create_module(db, "dbstat", &dbstat_module, 0);
887 }
888 #elif defined(SQLITE_ENABLE_DBSTAT_VTAB)
889 int sqlite3DbstatRegister(sqlite3 *db){ return SQLITE_OK; }
890 #endif /* SQLITE_ENABLE_DBSTAT_VTAB */
891