xref: /sqlite-3.40.0/src/dbstat.c (revision 3c648882)
1 /*
2 ** 2010 July 12
3 **
4 ** The author disclaims copyright to this source code.  In place of
5 ** a legal notice, here is a blessing:
6 **
7 **    May you do good and not evil.
8 **    May you find forgiveness for yourself and forgive others.
9 **    May you share freely, never taking more than you give.
10 **
11 ******************************************************************************
12 **
13 ** This file contains an implementation of the "dbstat" virtual table.
14 **
15 ** The dbstat virtual table is used to extract low-level storage
16 ** information from an SQLite database in order to implement the
17 ** "sqlite3_analyzer" utility.  See the ../tool/spaceanal.tcl script
18 ** for an example implementation.
19 **
20 ** Additional information is available on the "dbstat.html" page of the
21 ** official SQLite documentation.
22 */
23 
24 #include "sqliteInt.h"   /* Requires access to internal data structures */
25 #if (defined(SQLITE_ENABLE_DBSTAT_VTAB) || defined(SQLITE_TEST)) \
26     && !defined(SQLITE_OMIT_VIRTUALTABLE)
27 
28 /*
29 ** The pager and btree modules arrange objects in memory so that there are
30 ** always approximately 200 bytes of addressable memory following each page
31 ** buffer. This way small buffer overreads caused by corrupt database pages
32 ** do not cause undefined behaviour. This module pads each page buffer
33 ** by the following number of bytes for the same purpose.
34 */
35 #define DBSTAT_PAGE_PADDING_BYTES 256
36 
37 /*
38 ** Page paths:
39 **
40 **   The value of the 'path' column describes the path taken from the
41 **   root-node of the b-tree structure to each page. The value of the
42 **   root-node path is '/'.
43 **
44 **   The value of the path for the left-most child page of the root of
45 **   a b-tree is '/000/'. (Btrees store content ordered from left to right
46 **   so the pages to the left have smaller keys than the pages to the right.)
47 **   The next to left-most child of the root page is
48 **   '/001', and so on, each sibling page identified by a 3-digit hex
49 **   value. The children of the 451st left-most sibling have paths such
50 **   as '/1c2/000/, '/1c2/001/' etc.
51 **
52 **   Overflow pages are specified by appending a '+' character and a
53 **   six-digit hexadecimal value to the path to the cell they are linked
54 **   from. For example, the three overflow pages in a chain linked from
55 **   the left-most cell of the 450th child of the root page are identified
56 **   by the paths:
57 **
58 **      '/1c2/000+000000'         // First page in overflow chain
59 **      '/1c2/000+000001'         // Second page in overflow chain
60 **      '/1c2/000+000002'         // Third page in overflow chain
61 **
62 **   If the paths are sorted using the BINARY collation sequence, then
63 **   the overflow pages associated with a cell will appear earlier in the
64 **   sort-order than its child page:
65 **
66 **      '/1c2/000/'               // Left-most child of 451st child of root
67 */
68 static const char zDbstatSchema[] =
69   "CREATE TABLE x("
70   " name       TEXT,"          /*  0 Name of table or index */
71   " path       TEXT,"          /*  1 Path to page from root (NULL for agg) */
72   " pageno     INTEGER,"       /*  2 Page number (page count for aggregates) */
73   " pagetype   TEXT,"          /*  3 'internal', 'leaf', 'overflow', or NULL */
74   " ncell      INTEGER,"       /*  4 Cells on page (0 for overflow) */
75   " payload    INTEGER,"       /*  5 Bytes of payload on this page */
76   " unused     INTEGER,"       /*  6 Bytes of unused space on this page */
77   " mx_payload INTEGER,"       /*  7 Largest payload size of all cells */
78   " pgoffset   INTEGER,"       /*  8 Offset of page in file (NULL for agg) */
79   " pgsize     INTEGER,"       /*  9 Size of the page (sum for aggregate) */
80   " schema     TEXT HIDDEN,"   /* 10 Database schema being analyzed */
81   " aggregate  BOOLEAN HIDDEN" /* 11 aggregate info for each table */
82   ")"
83 ;
84 
85 /* Forward reference to data structured used in this module */
86 typedef struct StatTable StatTable;
87 typedef struct StatCursor StatCursor;
88 typedef struct StatPage StatPage;
89 typedef struct StatCell StatCell;
90 
91 /* Size information for a single cell within a btree page */
92 struct StatCell {
93   int nLocal;                     /* Bytes of local payload */
94   u32 iChildPg;                   /* Child node (or 0 if this is a leaf) */
95   int nOvfl;                      /* Entries in aOvfl[] */
96   u32 *aOvfl;                     /* Array of overflow page numbers */
97   int nLastOvfl;                  /* Bytes of payload on final overflow page */
98   int iOvfl;                      /* Iterates through aOvfl[] */
99 };
100 
101 /* Size information for a single btree page */
102 struct StatPage {
103   u32 iPgno;                      /* Page number */
104   u8 *aPg;                        /* Page buffer from sqlite3_malloc() */
105   int iCell;                      /* Current cell */
106   char *zPath;                    /* Path to this page */
107 
108   /* Variables populated by statDecodePage(): */
109   u8 flags;                       /* Copy of flags byte */
110   int nCell;                      /* Number of cells on page */
111   int nUnused;                    /* Number of unused bytes on page */
112   StatCell *aCell;                /* Array of parsed cells */
113   u32 iRightChildPg;              /* Right-child page number (or 0) */
114   int nMxPayload;                 /* Largest payload of any cell on the page */
115 };
116 
117 /* The cursor for scanning the dbstat virtual table */
118 struct StatCursor {
119   sqlite3_vtab_cursor base;       /* base class.  MUST BE FIRST! */
120   sqlite3_stmt *pStmt;            /* Iterates through set of root pages */
121   u8 isEof;                       /* After pStmt has returned SQLITE_DONE */
122   u8 isAgg;                       /* Aggregate results for each table */
123   int iDb;                        /* Schema used for this query */
124 
125   StatPage aPage[32];             /* Pages in path to current page */
126   int iPage;                      /* Current entry in aPage[] */
127 
128   /* Values to return. */
129   u32 iPageno;                    /* Value of 'pageno' column */
130   char *zName;                    /* Value of 'name' column */
131   char *zPath;                    /* Value of 'path' column */
132   char *zPagetype;                /* Value of 'pagetype' column */
133   int nPage;                      /* Number of pages in current btree */
134   int nCell;                      /* Value of 'ncell' column */
135   int nMxPayload;                 /* Value of 'mx_payload' column */
136   i64 nUnused;                    /* Value of 'unused' column */
137   i64 nPayload;                   /* Value of 'payload' column */
138   i64 iOffset;                    /* Value of 'pgOffset' column */
139   i64 szPage;                     /* Value of 'pgSize' column */
140 };
141 
142 /* An instance of the DBSTAT virtual table */
143 struct StatTable {
144   sqlite3_vtab base;              /* base class.  MUST BE FIRST! */
145   sqlite3 *db;                    /* Database connection that owns this vtab */
146   int iDb;                        /* Index of database to analyze */
147 };
148 
149 #ifndef get2byte
150 # define get2byte(x)   ((x)[0]<<8 | (x)[1])
151 #endif
152 
153 /*
154 ** Connect to or create a new DBSTAT virtual table.
155 */
156 static int statConnect(
157   sqlite3 *db,
158   void *pAux,
159   int argc, const char *const*argv,
160   sqlite3_vtab **ppVtab,
161   char **pzErr
162 ){
163   StatTable *pTab = 0;
164   int rc = SQLITE_OK;
165   int iDb;
166 
167   if( argc>=4 ){
168     Token nm;
169     sqlite3TokenInit(&nm, (char*)argv[3]);
170     iDb = sqlite3FindDb(db, &nm);
171     if( iDb<0 ){
172       *pzErr = sqlite3_mprintf("no such database: %s", argv[3]);
173       return SQLITE_ERROR;
174     }
175   }else{
176     iDb = 0;
177   }
178   sqlite3_vtab_config(db, SQLITE_VTAB_DIRECTONLY);
179   rc = sqlite3_declare_vtab(db, zDbstatSchema);
180   if( rc==SQLITE_OK ){
181     pTab = (StatTable *)sqlite3_malloc64(sizeof(StatTable));
182     if( pTab==0 ) rc = SQLITE_NOMEM_BKPT;
183   }
184 
185   assert( rc==SQLITE_OK || pTab==0 );
186   if( rc==SQLITE_OK ){
187     memset(pTab, 0, sizeof(StatTable));
188     pTab->db = db;
189     pTab->iDb = iDb;
190   }
191 
192   *ppVtab = (sqlite3_vtab*)pTab;
193   return rc;
194 }
195 
196 /*
197 ** Disconnect from or destroy the DBSTAT virtual table.
198 */
199 static int statDisconnect(sqlite3_vtab *pVtab){
200   sqlite3_free(pVtab);
201   return SQLITE_OK;
202 }
203 
204 /*
205 ** Compute the best query strategy and return the result in idxNum.
206 **
207 **   idxNum-Bit        Meaning
208 **   ----------        ----------------------------------------------
209 **      0x01           There is a schema=? term in the WHERE clause
210 **      0x02           There is a name=? term in the WHERE clause
211 **      0x04           There is an aggregate=? term in the WHERE clause
212 **      0x08           Output should be ordered by name and path
213 */
214 static int statBestIndex(sqlite3_vtab *tab, sqlite3_index_info *pIdxInfo){
215   int i;
216   int iSchema = -1;
217   int iName = -1;
218   int iAgg = -1;
219 
220   /* Look for a valid schema=? constraint.  If found, change the idxNum to
221   ** 1 and request the value of that constraint be sent to xFilter.  And
222   ** lower the cost estimate to encourage the constrained version to be
223   ** used.
224   */
225   for(i=0; i<pIdxInfo->nConstraint; i++){
226     if( pIdxInfo->aConstraint[i].op!=SQLITE_INDEX_CONSTRAINT_EQ ) continue;
227     if( pIdxInfo->aConstraint[i].usable==0 ){
228       /* Force DBSTAT table should always be the right-most table in a join */
229       return SQLITE_CONSTRAINT;
230     }
231     switch( pIdxInfo->aConstraint[i].iColumn ){
232       case 0: {    /* name */
233         iName = i;
234         break;
235       }
236       case 10: {   /* schema */
237         iSchema = i;
238         break;
239       }
240       case 11: {   /* aggregate */
241         iAgg = i;
242         break;
243       }
244     }
245   }
246   i = 0;
247   if( iSchema>=0 ){
248     pIdxInfo->aConstraintUsage[iSchema].argvIndex = ++i;
249     pIdxInfo->aConstraintUsage[iSchema].omit = 1;
250     pIdxInfo->idxNum |= 0x01;
251   }
252   if( iName>=0 ){
253     pIdxInfo->aConstraintUsage[iName].argvIndex = ++i;
254     pIdxInfo->idxNum |= 0x02;
255   }
256   if( iAgg>=0 ){
257     pIdxInfo->aConstraintUsage[iAgg].argvIndex = ++i;
258     pIdxInfo->idxNum |= 0x04;
259   }
260   pIdxInfo->estimatedCost = 1.0;
261 
262   /* Records are always returned in ascending order of (name, path).
263   ** If this will satisfy the client, set the orderByConsumed flag so that
264   ** SQLite does not do an external sort.
265   */
266   if( ( pIdxInfo->nOrderBy==1
267      && pIdxInfo->aOrderBy[0].iColumn==0
268      && pIdxInfo->aOrderBy[0].desc==0
269      ) ||
270       ( pIdxInfo->nOrderBy==2
271      && pIdxInfo->aOrderBy[0].iColumn==0
272      && pIdxInfo->aOrderBy[0].desc==0
273      && pIdxInfo->aOrderBy[1].iColumn==1
274      && pIdxInfo->aOrderBy[1].desc==0
275      )
276   ){
277     pIdxInfo->orderByConsumed = 1;
278     pIdxInfo->idxNum |= 0x08;
279   }
280 
281   return SQLITE_OK;
282 }
283 
284 /*
285 ** Open a new DBSTAT cursor.
286 */
287 static int statOpen(sqlite3_vtab *pVTab, sqlite3_vtab_cursor **ppCursor){
288   StatTable *pTab = (StatTable *)pVTab;
289   StatCursor *pCsr;
290 
291   pCsr = (StatCursor *)sqlite3_malloc64(sizeof(StatCursor));
292   if( pCsr==0 ){
293     return SQLITE_NOMEM_BKPT;
294   }else{
295     memset(pCsr, 0, sizeof(StatCursor));
296     pCsr->base.pVtab = pVTab;
297     pCsr->iDb = pTab->iDb;
298   }
299 
300   *ppCursor = (sqlite3_vtab_cursor *)pCsr;
301   return SQLITE_OK;
302 }
303 
304 static void statClearCells(StatPage *p){
305   int i;
306   if( p->aCell ){
307     for(i=0; i<p->nCell; i++){
308       sqlite3_free(p->aCell[i].aOvfl);
309     }
310     sqlite3_free(p->aCell);
311   }
312   p->nCell = 0;
313   p->aCell = 0;
314 }
315 
316 static void statClearPage(StatPage *p){
317   u8 *aPg = p->aPg;
318   statClearCells(p);
319   sqlite3_free(p->zPath);
320   memset(p, 0, sizeof(StatPage));
321   p->aPg = aPg;
322 }
323 
324 static void statResetCsr(StatCursor *pCsr){
325   int i;
326   /* In some circumstances, specifically if an OOM has occurred, the call
327   ** to sqlite3_reset() may cause the pager to be reset (emptied). It is
328   ** important that statClearPage() is called to free any page refs before
329   ** this happens. dbsqlfuzz 9ed3e4e3816219d3509d711636c38542bf3f40b1. */
330   for(i=0; i<ArraySize(pCsr->aPage); i++){
331     statClearPage(&pCsr->aPage[i]);
332     sqlite3_free(pCsr->aPage[i].aPg);
333     pCsr->aPage[i].aPg = 0;
334   }
335   sqlite3_reset(pCsr->pStmt);
336   pCsr->iPage = 0;
337   sqlite3_free(pCsr->zPath);
338   pCsr->zPath = 0;
339   pCsr->isEof = 0;
340 }
341 
342 /* Resize the space-used counters inside of the cursor */
343 static void statResetCounts(StatCursor *pCsr){
344   pCsr->nCell = 0;
345   pCsr->nMxPayload = 0;
346   pCsr->nUnused = 0;
347   pCsr->nPayload = 0;
348   pCsr->szPage = 0;
349   pCsr->nPage = 0;
350 }
351 
352 /*
353 ** Close a DBSTAT cursor.
354 */
355 static int statClose(sqlite3_vtab_cursor *pCursor){
356   StatCursor *pCsr = (StatCursor *)pCursor;
357   statResetCsr(pCsr);
358   sqlite3_finalize(pCsr->pStmt);
359   sqlite3_free(pCsr);
360   return SQLITE_OK;
361 }
362 
363 /*
364 ** For a single cell on a btree page, compute the number of bytes of
365 ** content (payload) stored on that page.  That is to say, compute the
366 ** number of bytes of content not found on overflow pages.
367 */
368 static int getLocalPayload(
369   int nUsable,                    /* Usable bytes per page */
370   u8 flags,                       /* Page flags */
371   int nTotal                      /* Total record (payload) size */
372 ){
373   int nLocal;
374   int nMinLocal;
375   int nMaxLocal;
376 
377   if( flags==0x0D ){              /* Table leaf node */
378     nMinLocal = (nUsable - 12) * 32 / 255 - 23;
379     nMaxLocal = nUsable - 35;
380   }else{                          /* Index interior and leaf nodes */
381     nMinLocal = (nUsable - 12) * 32 / 255 - 23;
382     nMaxLocal = (nUsable - 12) * 64 / 255 - 23;
383   }
384 
385   nLocal = nMinLocal + (nTotal - nMinLocal) % (nUsable - 4);
386   if( nLocal>nMaxLocal ) nLocal = nMinLocal;
387   return nLocal;
388 }
389 
390 /* Populate the StatPage object with information about the all
391 ** cells found on the page currently under analysis.
392 */
393 static int statDecodePage(Btree *pBt, StatPage *p){
394   int nUnused;
395   int iOff;
396   int nHdr;
397   int isLeaf;
398   int szPage;
399 
400   u8 *aData = p->aPg;
401   u8 *aHdr = &aData[p->iPgno==1 ? 100 : 0];
402 
403   p->flags = aHdr[0];
404   if( p->flags==0x0A || p->flags==0x0D ){
405     isLeaf = 1;
406     nHdr = 8;
407   }else if( p->flags==0x05 || p->flags==0x02 ){
408     isLeaf = 0;
409     nHdr = 12;
410   }else{
411     goto statPageIsCorrupt;
412   }
413   if( p->iPgno==1 ) nHdr += 100;
414   p->nCell = get2byte(&aHdr[3]);
415   p->nMxPayload = 0;
416   szPage = sqlite3BtreeGetPageSize(pBt);
417 
418   nUnused = get2byte(&aHdr[5]) - nHdr - 2*p->nCell;
419   nUnused += (int)aHdr[7];
420   iOff = get2byte(&aHdr[1]);
421   while( iOff ){
422     int iNext;
423     if( iOff>=szPage ) goto statPageIsCorrupt;
424     nUnused += get2byte(&aData[iOff+2]);
425     iNext = get2byte(&aData[iOff]);
426     if( iNext<iOff+4 && iNext>0 ) goto statPageIsCorrupt;
427     iOff = iNext;
428   }
429   p->nUnused = nUnused;
430   p->iRightChildPg = isLeaf ? 0 : sqlite3Get4byte(&aHdr[8]);
431 
432   if( p->nCell ){
433     int i;                        /* Used to iterate through cells */
434     int nUsable;                  /* Usable bytes per page */
435 
436     sqlite3BtreeEnter(pBt);
437     nUsable = szPage - sqlite3BtreeGetReserveNoMutex(pBt);
438     sqlite3BtreeLeave(pBt);
439     p->aCell = sqlite3_malloc64((p->nCell+1) * sizeof(StatCell));
440     if( p->aCell==0 ) return SQLITE_NOMEM_BKPT;
441     memset(p->aCell, 0, (p->nCell+1) * sizeof(StatCell));
442 
443     for(i=0; i<p->nCell; i++){
444       StatCell *pCell = &p->aCell[i];
445 
446       iOff = get2byte(&aData[nHdr+i*2]);
447       if( iOff<nHdr || iOff>=szPage ) goto statPageIsCorrupt;
448       if( !isLeaf ){
449         pCell->iChildPg = sqlite3Get4byte(&aData[iOff]);
450         iOff += 4;
451       }
452       if( p->flags==0x05 ){
453         /* A table interior node. nPayload==0. */
454       }else{
455         u32 nPayload;             /* Bytes of payload total (local+overflow) */
456         int nLocal;               /* Bytes of payload stored locally */
457         iOff += getVarint32(&aData[iOff], nPayload);
458         if( p->flags==0x0D ){
459           u64 dummy;
460           iOff += sqlite3GetVarint(&aData[iOff], &dummy);
461         }
462         if( nPayload>(u32)p->nMxPayload ) p->nMxPayload = nPayload;
463         nLocal = getLocalPayload(nUsable, p->flags, nPayload);
464         if( nLocal<0 ) goto statPageIsCorrupt;
465         pCell->nLocal = nLocal;
466         assert( nPayload>=(u32)nLocal );
467         assert( nLocal<=(nUsable-35) );
468         if( nPayload>(u32)nLocal ){
469           int j;
470           int nOvfl = ((nPayload - nLocal) + nUsable-4 - 1) / (nUsable - 4);
471           if( iOff+nLocal+4>nUsable || nPayload>0x7fffffff ){
472             goto statPageIsCorrupt;
473           }
474           pCell->nLastOvfl = (nPayload-nLocal) - (nOvfl-1) * (nUsable-4);
475           pCell->nOvfl = nOvfl;
476           pCell->aOvfl = sqlite3_malloc64(sizeof(u32)*nOvfl);
477           if( pCell->aOvfl==0 ) return SQLITE_NOMEM_BKPT;
478           pCell->aOvfl[0] = sqlite3Get4byte(&aData[iOff+nLocal]);
479           for(j=1; j<nOvfl; j++){
480             int rc;
481             u32 iPrev = pCell->aOvfl[j-1];
482             DbPage *pPg = 0;
483             rc = sqlite3PagerGet(sqlite3BtreePager(pBt), iPrev, &pPg, 0);
484             if( rc!=SQLITE_OK ){
485               assert( pPg==0 );
486               return rc;
487             }
488             pCell->aOvfl[j] = sqlite3Get4byte(sqlite3PagerGetData(pPg));
489             sqlite3PagerUnref(pPg);
490           }
491         }
492       }
493     }
494   }
495 
496   return SQLITE_OK;
497 
498 statPageIsCorrupt:
499   p->flags = 0;
500   statClearCells(p);
501   return SQLITE_OK;
502 }
503 
504 /*
505 ** Populate the pCsr->iOffset and pCsr->szPage member variables. Based on
506 ** the current value of pCsr->iPageno.
507 */
508 static void statSizeAndOffset(StatCursor *pCsr){
509   StatTable *pTab = (StatTable *)((sqlite3_vtab_cursor *)pCsr)->pVtab;
510   Btree *pBt = pTab->db->aDb[pTab->iDb].pBt;
511   Pager *pPager = sqlite3BtreePager(pBt);
512   sqlite3_file *fd;
513   sqlite3_int64 x[2];
514 
515   /* If connected to a ZIPVFS backend, find the page size and
516   ** offset from ZIPVFS.
517   */
518   fd = sqlite3PagerFile(pPager);
519   x[0] = pCsr->iPageno;
520   if( sqlite3OsFileControl(fd, 230440, &x)==SQLITE_OK ){
521     pCsr->iOffset = x[0];
522     pCsr->szPage += x[1];
523   }else{
524     /* Not ZIPVFS: The default page size and offset */
525     pCsr->szPage += sqlite3BtreeGetPageSize(pBt);
526     pCsr->iOffset = (i64)pCsr->szPage * (pCsr->iPageno - 1);
527   }
528 }
529 
530 /*
531 ** Load a copy of the page data for page iPg into the buffer belonging
532 ** to page object pPg. Allocate the buffer if necessary. Return SQLITE_OK
533 ** if successful, or an SQLite error code otherwise.
534 */
535 static int statGetPage(
536   Btree *pBt,                     /* Load page from this b-tree */
537   u32 iPg,                        /* Page number to load */
538   StatPage *pPg                   /* Load page into this object */
539 ){
540   int pgsz = sqlite3BtreeGetPageSize(pBt);
541   DbPage *pDbPage = 0;
542   int rc;
543 
544   if( pPg->aPg==0 ){
545     pPg->aPg = (u8*)sqlite3_malloc(pgsz + DBSTAT_PAGE_PADDING_BYTES);
546     if( pPg->aPg==0 ){
547       return SQLITE_NOMEM_BKPT;
548     }
549     memset(&pPg->aPg[pgsz], 0, DBSTAT_PAGE_PADDING_BYTES);
550   }
551 
552   rc = sqlite3PagerGet(sqlite3BtreePager(pBt), iPg, &pDbPage, 0);
553   if( rc==SQLITE_OK ){
554     const u8 *a = sqlite3PagerGetData(pDbPage);
555     memcpy(pPg->aPg, a, pgsz);
556     sqlite3PagerUnref(pDbPage);
557   }
558 
559   return rc;
560 }
561 
562 /*
563 ** Move a DBSTAT cursor to the next entry.  Normally, the next
564 ** entry will be the next page, but in aggregated mode (pCsr->isAgg!=0),
565 ** the next entry is the next btree.
566 */
567 static int statNext(sqlite3_vtab_cursor *pCursor){
568   int rc;
569   int nPayload;
570   char *z;
571   StatCursor *pCsr = (StatCursor *)pCursor;
572   StatTable *pTab = (StatTable *)pCursor->pVtab;
573   Btree *pBt = pTab->db->aDb[pCsr->iDb].pBt;
574   Pager *pPager = sqlite3BtreePager(pBt);
575 
576   sqlite3_free(pCsr->zPath);
577   pCsr->zPath = 0;
578 
579 statNextRestart:
580   if( pCsr->iPage<0 ){
581     /* Start measuring space on the next btree */
582     statResetCounts(pCsr);
583     rc = sqlite3_step(pCsr->pStmt);
584     if( rc==SQLITE_ROW ){
585       int nPage;
586       u32 iRoot = (u32)sqlite3_column_int64(pCsr->pStmt, 1);
587       sqlite3PagerPagecount(pPager, &nPage);
588       if( nPage==0 ){
589         pCsr->isEof = 1;
590         return sqlite3_reset(pCsr->pStmt);
591       }
592       rc = statGetPage(pBt, iRoot, &pCsr->aPage[0]);
593       pCsr->aPage[0].iPgno = iRoot;
594       pCsr->aPage[0].iCell = 0;
595       if( !pCsr->isAgg ){
596         pCsr->aPage[0].zPath = z = sqlite3_mprintf("/");
597         if( z==0 ) rc = SQLITE_NOMEM_BKPT;
598       }
599       pCsr->iPage = 0;
600       pCsr->nPage = 1;
601     }else{
602       pCsr->isEof = 1;
603       return sqlite3_reset(pCsr->pStmt);
604     }
605   }else{
606     /* Continue analyzing the btree previously started */
607     StatPage *p = &pCsr->aPage[pCsr->iPage];
608     if( !pCsr->isAgg ) statResetCounts(pCsr);
609     while( p->iCell<p->nCell ){
610       StatCell *pCell = &p->aCell[p->iCell];
611       while( pCell->iOvfl<pCell->nOvfl ){
612         int nUsable, iOvfl;
613         sqlite3BtreeEnter(pBt);
614         nUsable = sqlite3BtreeGetPageSize(pBt) -
615                         sqlite3BtreeGetReserveNoMutex(pBt);
616         sqlite3BtreeLeave(pBt);
617         pCsr->nPage++;
618         statSizeAndOffset(pCsr);
619         if( pCell->iOvfl<pCell->nOvfl-1 ){
620           pCsr->nPayload += nUsable - 4;
621         }else{
622           pCsr->nPayload += pCell->nLastOvfl;
623           pCsr->nUnused += nUsable - 4 - pCell->nLastOvfl;
624         }
625         iOvfl = pCell->iOvfl;
626         pCell->iOvfl++;
627         if( !pCsr->isAgg ){
628           pCsr->zName = (char *)sqlite3_column_text(pCsr->pStmt, 0);
629           pCsr->iPageno = pCell->aOvfl[iOvfl];
630           pCsr->zPagetype = "overflow";
631           pCsr->zPath = z = sqlite3_mprintf(
632               "%s%.3x+%.6x", p->zPath, p->iCell, iOvfl
633           );
634           return z==0 ? SQLITE_NOMEM_BKPT : SQLITE_OK;
635         }
636       }
637       if( p->iRightChildPg ) break;
638       p->iCell++;
639     }
640 
641     if( !p->iRightChildPg || p->iCell>p->nCell ){
642       statClearPage(p);
643       pCsr->iPage--;
644       if( pCsr->isAgg && pCsr->iPage<0 ){
645         /* label-statNext-done:  When computing aggregate space usage over
646         ** an entire btree, this is the exit point from this function */
647         return SQLITE_OK;
648       }
649       goto statNextRestart; /* Tail recursion */
650     }
651     pCsr->iPage++;
652     if( pCsr->iPage>=ArraySize(pCsr->aPage) ){
653       statResetCsr(pCsr);
654       return SQLITE_CORRUPT_BKPT;
655     }
656     assert( p==&pCsr->aPage[pCsr->iPage-1] );
657 
658     if( p->iCell==p->nCell ){
659       p[1].iPgno = p->iRightChildPg;
660     }else{
661       p[1].iPgno = p->aCell[p->iCell].iChildPg;
662     }
663     rc = statGetPage(pBt, p[1].iPgno, &p[1]);
664     pCsr->nPage++;
665     p[1].iCell = 0;
666     if( !pCsr->isAgg ){
667       p[1].zPath = z = sqlite3_mprintf("%s%.3x/", p->zPath, p->iCell);
668       if( z==0 ) rc = SQLITE_NOMEM_BKPT;
669     }
670     p->iCell++;
671   }
672 
673 
674   /* Populate the StatCursor fields with the values to be returned
675   ** by the xColumn() and xRowid() methods.
676   */
677   if( rc==SQLITE_OK ){
678     int i;
679     StatPage *p = &pCsr->aPage[pCsr->iPage];
680     pCsr->zName = (char *)sqlite3_column_text(pCsr->pStmt, 0);
681     pCsr->iPageno = p->iPgno;
682 
683     rc = statDecodePage(pBt, p);
684     if( rc==SQLITE_OK ){
685       statSizeAndOffset(pCsr);
686 
687       switch( p->flags ){
688         case 0x05:             /* table internal */
689         case 0x02:             /* index internal */
690           pCsr->zPagetype = "internal";
691           break;
692         case 0x0D:             /* table leaf */
693         case 0x0A:             /* index leaf */
694           pCsr->zPagetype = "leaf";
695           break;
696         default:
697           pCsr->zPagetype = "corrupted";
698           break;
699       }
700       pCsr->nCell += p->nCell;
701       pCsr->nUnused += p->nUnused;
702       if( p->nMxPayload>pCsr->nMxPayload ) pCsr->nMxPayload = p->nMxPayload;
703       if( !pCsr->isAgg ){
704         pCsr->zPath = z = sqlite3_mprintf("%s", p->zPath);
705         if( z==0 ) rc = SQLITE_NOMEM_BKPT;
706       }
707       nPayload = 0;
708       for(i=0; i<p->nCell; i++){
709         nPayload += p->aCell[i].nLocal;
710       }
711       pCsr->nPayload += nPayload;
712 
713       /* If computing aggregate space usage by btree, continue with the
714       ** next page.  The loop will exit via the return at label-statNext-done
715       */
716       if( pCsr->isAgg ) goto statNextRestart;
717     }
718   }
719 
720   return rc;
721 }
722 
723 static int statEof(sqlite3_vtab_cursor *pCursor){
724   StatCursor *pCsr = (StatCursor *)pCursor;
725   return pCsr->isEof;
726 }
727 
728 /* Initialize a cursor according to the query plan idxNum using the
729 ** arguments in argv[0].  See statBestIndex() for a description of the
730 ** meaning of the bits in idxNum.
731 */
732 static int statFilter(
733   sqlite3_vtab_cursor *pCursor,
734   int idxNum, const char *idxStr,
735   int argc, sqlite3_value **argv
736 ){
737   StatCursor *pCsr = (StatCursor *)pCursor;
738   StatTable *pTab = (StatTable*)(pCursor->pVtab);
739   sqlite3_str *pSql;      /* Query of btrees to analyze */
740   char *zSql;             /* String value of pSql */
741   int iArg = 0;           /* Count of argv[] parameters used so far */
742   int rc = SQLITE_OK;     /* Result of this operation */
743   const char *zName = 0;  /* Only provide analysis of this table */
744 
745   statResetCsr(pCsr);
746   sqlite3_finalize(pCsr->pStmt);
747   pCsr->pStmt = 0;
748   if( idxNum & 0x01 ){
749     /* schema=? constraint is present.  Get its value */
750     const char *zDbase = (const char*)sqlite3_value_text(argv[iArg++]);
751     pCsr->iDb = sqlite3FindDbName(pTab->db, zDbase);
752     if( pCsr->iDb<0 ){
753       pCsr->iDb = 0;
754       pCsr->isEof = 1;
755       return SQLITE_OK;
756     }
757   }else{
758     pCsr->iDb = pTab->iDb;
759   }
760   if( idxNum & 0x02 ){
761     /* name=? constraint is present */
762     zName = (const char*)sqlite3_value_text(argv[iArg++]);
763   }
764   if( idxNum & 0x04 ){
765     /* aggregate=? constraint is present */
766     pCsr->isAgg = sqlite3_value_double(argv[iArg++])!=0.0;
767   }else{
768     pCsr->isAgg = 0;
769   }
770   pSql = sqlite3_str_new(pTab->db);
771   sqlite3_str_appendf(pSql,
772       "SELECT * FROM ("
773         "SELECT 'sqlite_schema' AS name,1 AS rootpage,'table' AS type"
774         " UNION ALL "
775         "SELECT name,rootpage,type"
776         " FROM \"%w\".sqlite_schema WHERE rootpage!=0)",
777       pTab->db->aDb[pCsr->iDb].zDbSName);
778   if( zName ){
779     sqlite3_str_appendf(pSql, "WHERE name=%Q", zName);
780   }
781   if( idxNum & 0x08 ){
782     sqlite3_str_appendf(pSql, " ORDER BY name");
783   }
784   zSql = sqlite3_str_finish(pSql);
785   if( zSql==0 ){
786     return SQLITE_NOMEM_BKPT;
787   }else{
788     rc = sqlite3_prepare_v2(pTab->db, zSql, -1, &pCsr->pStmt, 0);
789     sqlite3_free(zSql);
790   }
791 
792   if( rc==SQLITE_OK ){
793     pCsr->iPage = -1;
794     rc = statNext(pCursor);
795   }
796   return rc;
797 }
798 
799 static int statColumn(
800   sqlite3_vtab_cursor *pCursor,
801   sqlite3_context *ctx,
802   int i
803 ){
804   StatCursor *pCsr = (StatCursor *)pCursor;
805   switch( i ){
806     case 0:            /* name */
807       sqlite3_result_text(ctx, pCsr->zName, -1, SQLITE_TRANSIENT);
808       break;
809     case 1:            /* path */
810       if( !pCsr->isAgg ){
811         sqlite3_result_text(ctx, pCsr->zPath, -1, SQLITE_TRANSIENT);
812       }
813       break;
814     case 2:            /* pageno */
815       if( pCsr->isAgg ){
816         sqlite3_result_int64(ctx, pCsr->nPage);
817       }else{
818         sqlite3_result_int64(ctx, pCsr->iPageno);
819       }
820       break;
821     case 3:            /* pagetype */
822       if( !pCsr->isAgg ){
823         sqlite3_result_text(ctx, pCsr->zPagetype, -1, SQLITE_STATIC);
824       }
825       break;
826     case 4:            /* ncell */
827       sqlite3_result_int(ctx, pCsr->nCell);
828       break;
829     case 5:            /* payload */
830       sqlite3_result_int(ctx, pCsr->nPayload);
831       break;
832     case 6:            /* unused */
833       sqlite3_result_int(ctx, pCsr->nUnused);
834       break;
835     case 7:            /* mx_payload */
836       sqlite3_result_int(ctx, pCsr->nMxPayload);
837       break;
838     case 8:            /* pgoffset */
839       if( !pCsr->isAgg ){
840         sqlite3_result_int64(ctx, pCsr->iOffset);
841       }
842       break;
843     case 9:            /* pgsize */
844       sqlite3_result_int(ctx, pCsr->szPage);
845       break;
846     case 10: {         /* schema */
847       sqlite3 *db = sqlite3_context_db_handle(ctx);
848       int iDb = pCsr->iDb;
849       sqlite3_result_text(ctx, db->aDb[iDb].zDbSName, -1, SQLITE_STATIC);
850       break;
851     }
852     default: {         /* aggregate */
853       sqlite3_result_int(ctx, pCsr->isAgg);
854       break;
855     }
856   }
857   return SQLITE_OK;
858 }
859 
860 static int statRowid(sqlite3_vtab_cursor *pCursor, sqlite_int64 *pRowid){
861   StatCursor *pCsr = (StatCursor *)pCursor;
862   *pRowid = pCsr->iPageno;
863   return SQLITE_OK;
864 }
865 
866 /*
867 ** Invoke this routine to register the "dbstat" virtual table module
868 */
869 int sqlite3DbstatRegister(sqlite3 *db){
870   static sqlite3_module dbstat_module = {
871     0,                            /* iVersion */
872     statConnect,                  /* xCreate */
873     statConnect,                  /* xConnect */
874     statBestIndex,                /* xBestIndex */
875     statDisconnect,               /* xDisconnect */
876     statDisconnect,               /* xDestroy */
877     statOpen,                     /* xOpen - open a cursor */
878     statClose,                    /* xClose - close a cursor */
879     statFilter,                   /* xFilter - configure scan constraints */
880     statNext,                     /* xNext - advance a cursor */
881     statEof,                      /* xEof - check for end of scan */
882     statColumn,                   /* xColumn - read data */
883     statRowid,                    /* xRowid - read data */
884     0,                            /* xUpdate */
885     0,                            /* xBegin */
886     0,                            /* xSync */
887     0,                            /* xCommit */
888     0,                            /* xRollback */
889     0,                            /* xFindMethod */
890     0,                            /* xRename */
891     0,                            /* xSavepoint */
892     0,                            /* xRelease */
893     0,                            /* xRollbackTo */
894     0                             /* xShadowName */
895   };
896   return sqlite3_create_module(db, "dbstat", &dbstat_module, 0);
897 }
898 #elif defined(SQLITE_ENABLE_DBSTAT_VTAB)
899 int sqlite3DbstatRegister(sqlite3 *db){ return SQLITE_OK; }
900 #endif /* SQLITE_ENABLE_DBSTAT_VTAB */
901