xref: /sqlite-3.40.0/tool/spaceanal.tcl (revision 480f5e3e)
1# Run this TCL script using an SQLite-enabled TCL interpreter to get a report
2# on how much disk space is used by a particular data to actually store data
3# versus how much space is unused.
4#
5# The dbstat virtual table is required.
6#
7
8if {[catch {
9
10# Argument $tname is the name of a table within the database opened by
11# database handle [db]. Return true if it is a WITHOUT ROWID table, or
12# false otherwise.
13#
14proc is_without_rowid {tname} {
15  set t [string map {' ''} $tname]
16  db eval "PRAGMA index_list = '$t'" o {
17    if {$o(origin) == "pk"} {
18      set n $o(name)
19      if {0==[db one { SELECT count(*) FROM sqlite_schema WHERE name=$n }]} {
20        return 1
21      }
22    }
23  }
24  return 0
25}
26
27# Read and run TCL commands from standard input.  Used to implement
28# the --tclsh option.
29#
30proc tclsh {} {
31  set line {}
32  while {![eof stdin]} {
33    if {$line!=""} {
34      puts -nonewline "> "
35    } else {
36      puts -nonewline "% "
37    }
38    flush stdout
39    append line [gets stdin]
40    if {[info complete $line]} {
41      if {[catch {uplevel #0 $line} result]} {
42        puts stderr "Error: $result"
43      } elseif {$result!=""} {
44        puts $result
45      }
46      set line {}
47    } else {
48      append line \n
49    }
50  }
51}
52
53
54# Get the name of the database to analyze
55#
56proc usage {} {
57  set argv0 [file rootname [file tail [info nameofexecutable]]]
58  puts stderr "Usage: $argv0 ?--pageinfo? ?--stats? database-filename"
59  puts stderr {
60Analyze the SQLite3 database file specified by the "database-filename"
61argument and output a report detailing size and storage efficiency
62information for the database and its constituent tables and indexes.
63
64Options:
65
66   --pageinfo   Show how each page of the database-file is used
67
68   --stats      Output SQL text that creates a new database containing
69                statistics about the database that was analyzed
70
71   --tclsh      Run the built-in TCL interpreter interactively (for debugging)
72
73   --version    Show the version number of SQLite
74}
75  exit 1
76}
77set file_to_analyze {}
78set flags(-pageinfo) 0
79set flags(-stats) 0
80set flags(-debug) 0
81append argv {}
82foreach arg $argv {
83  if {[regexp {^-+pageinfo$} $arg]} {
84    set flags(-pageinfo) 1
85  } elseif {[regexp {^-+stats$} $arg]} {
86    set flags(-stats) 1
87  } elseif {[regexp {^-+debug$} $arg]} {
88    set flags(-debug) 1
89  } elseif {[regexp {^-+tclsh$} $arg]} {
90    tclsh
91    exit 0
92  } elseif {[regexp {^-+version$} $arg]} {
93    sqlite3 mem :memory:
94    puts [mem one {SELECT sqlite_version()||' '||sqlite_source_id()}]
95    mem close
96    exit 0
97  } elseif {[regexp {^-} $arg]} {
98    puts stderr "Unknown option: $arg"
99    usage
100  } elseif {$file_to_analyze!=""} {
101    usage
102  } else {
103    set file_to_analyze $arg
104  }
105}
106if {$file_to_analyze==""} usage
107set root_filename $file_to_analyze
108regexp {^file:(//)?([^?]*)} $file_to_analyze all x1 root_filename
109if {![file exists $root_filename]} {
110  puts stderr "No such file: $root_filename"
111  exit 1
112}
113if {![file readable $root_filename]} {
114  puts stderr "File is not readable: $root_filename"
115  exit 1
116}
117set true_file_size [file size $root_filename]
118if {$true_file_size<512} {
119  puts stderr "Empty or malformed database: $root_filename"
120  exit 1
121}
122
123# Compute the total file size assuming test_multiplexor is being used.
124# Assume that SQLITE_ENABLE_8_3_NAMES might be enabled
125#
126set extension [file extension $root_filename]
127set pattern $root_filename
128append pattern {[0-3][0-9][0-9]}
129foreach f [glob -nocomplain $pattern] {
130  incr true_file_size [file size $f]
131  set extension {}
132}
133if {[string length $extension]>=2 && [string length $extension]<=4} {
134  set pattern [file rootname $root_filename]
135  append pattern {.[0-3][0-9][0-9]}
136  foreach f [glob -nocomplain $pattern] {
137    incr true_file_size [file size $f]
138  }
139}
140
141# Open the database
142#
143if {[catch {sqlite3 db $file_to_analyze -uri 1} msg]} {
144  puts stderr "error trying to open $file_to_analyze: $msg"
145  exit 1
146}
147if {$flags(-debug)} {
148  proc dbtrace {txt} {puts $txt; flush stdout;}
149  db trace ::dbtrace
150}
151
152# Make sure all required compile-time options are available
153#
154if {![db exists {SELECT 1 FROM pragma_compile_options
155                WHERE compile_options='ENABLE_DBSTAT_VTAB'}]} {
156  puts "The SQLite database engine linked with this application\
157        lacks required capabilities. Recompile using the\
158        -DSQLITE_ENABLE_DBSTAT_VTAB compile-time option to fix\
159        this problem."
160  exit 1
161}
162
163db eval {SELECT count(*) FROM sqlite_schema}
164set pageSize [expr {wide([db one {PRAGMA page_size}])}]
165
166if {$flags(-pageinfo)} {
167  db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat}
168  db eval {SELECT name, path, pageno FROM temp.stat ORDER BY pageno} {
169    puts "$pageno $name $path"
170  }
171  exit 0
172}
173if {$flags(-stats)} {
174  db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat}
175  puts "BEGIN;"
176  puts "CREATE TABLE stats("
177  puts "  name       STRING,           /* Name of table or index */"
178  puts "  path       INTEGER,          /* Path to page from root */"
179  puts "  pageno     INTEGER,          /* Page number */"
180  puts "  pagetype   STRING,           /* 'internal', 'leaf' or 'overflow' */"
181  puts "  ncell      INTEGER,          /* Cells on page (0 for overflow) */"
182  puts "  payload    INTEGER,          /* Bytes of payload on this page */"
183  puts "  unused     INTEGER,          /* Bytes of unused space on this page */"
184  puts "  mx_payload INTEGER,          /* Largest payload size of all cells */"
185  puts "  pgoffset   INTEGER,          /* Offset of page in file */"
186  puts "  pgsize     INTEGER           /* Size of the page */"
187  puts ");"
188  db eval {SELECT quote(name) || ',' ||
189                  quote(path) || ',' ||
190                  quote(pageno) || ',' ||
191                  quote(pagetype) || ',' ||
192                  quote(ncell) || ',' ||
193                  quote(payload) || ',' ||
194                  quote(unused) || ',' ||
195                  quote(mx_payload) || ',' ||
196                  quote(pgoffset) || ',' ||
197                  quote(pgsize) AS x FROM stat} {
198    puts "INSERT INTO stats VALUES($x);"
199  }
200  puts "COMMIT;"
201  exit 0
202}
203
204
205# In-memory database for collecting statistics. This script loops through
206# the tables and indices in the database being analyzed, adding a row for each
207# to an in-memory database (for which the schema is shown below). It then
208# queries the in-memory db to produce the space-analysis report.
209#
210sqlite3 mem :memory:
211if {$flags(-debug)} {
212  proc dbtrace {txt} {puts $txt; flush stdout;}
213  mem trace ::dbtrace
214}
215set tabledef {CREATE TABLE space_used(
216   name clob,        -- Name of a table or index in the database file
217   tblname clob,     -- Name of associated table
218   is_index boolean, -- TRUE if it is an index, false for a table
219   is_without_rowid boolean, -- TRUE if WITHOUT ROWID table
220   nentry int,       -- Number of entries in the BTree
221   leaf_entries int, -- Number of leaf entries
222   depth int,        -- Depth of the b-tree
223   payload int,      -- Total amount of data stored in this table or index
224   ovfl_payload int, -- Total amount of data stored on overflow pages
225   ovfl_cnt int,     -- Number of entries that use overflow
226   mx_payload int,   -- Maximum payload size
227   int_pages int,    -- Number of interior pages used
228   leaf_pages int,   -- Number of leaf pages used
229   ovfl_pages int,   -- Number of overflow pages used
230   int_unused int,   -- Number of unused bytes on interior pages
231   leaf_unused int,  -- Number of unused bytes on primary pages
232   ovfl_unused int,  -- Number of unused bytes on overflow pages
233   gap_cnt int,      -- Number of gaps in the page layout
234   compressed_size int  -- Total bytes stored on disk
235);}
236mem eval $tabledef
237
238# Create a temporary "dbstat" virtual table.
239#
240db eval {CREATE VIRTUAL TABLE temp.stat USING dbstat}
241db eval {CREATE TEMP TABLE dbstat AS SELECT * FROM temp.stat
242         ORDER BY name, path}
243db eval {DROP TABLE temp.stat}
244
245set isCompressed 0
246set compressOverhead 0
247set depth 0
248set sql { SELECT name, tbl_name FROM sqlite_schema WHERE rootpage>0 }
249foreach {name tblname} [concat sqlite_schema sqlite_schema [db eval $sql]] {
250
251  set is_index [expr {$name!=$tblname}]
252  set is_without_rowid [is_without_rowid $name]
253  db eval {
254    SELECT
255      sum(ncell) AS nentry,
256      sum((pagetype=='leaf')*ncell) AS leaf_entries,
257      sum(payload) AS payload,
258      sum((pagetype=='overflow') * payload) AS ovfl_payload,
259      sum(path LIKE '%+000000') AS ovfl_cnt,
260      max(mx_payload) AS mx_payload,
261      sum(pagetype=='internal') AS int_pages,
262      sum(pagetype=='leaf') AS leaf_pages,
263      sum(pagetype=='overflow') AS ovfl_pages,
264      sum((pagetype=='internal') * unused) AS int_unused,
265      sum((pagetype=='leaf') * unused) AS leaf_unused,
266      sum((pagetype=='overflow') * unused) AS ovfl_unused,
267      sum(pgsize) AS compressed_size,
268      max((length(CASE WHEN path LIKE '%+%' THEN '' ELSE path END)+3)/4)
269        AS depth
270    FROM temp.dbstat WHERE name = $name
271  } break
272
273  set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}]
274  set storage [expr {$total_pages*$pageSize}]
275  if {!$isCompressed && $storage>$compressed_size} {
276    set isCompressed 1
277    set compressOverhead 14
278  }
279
280  # Column 'gap_cnt' is set to the number of non-contiguous entries in the
281  # list of pages visited if the b-tree structure is traversed in a top-down
282  # fashion (each node visited before its child-tree is passed). Any overflow
283  # chains present are traversed from start to finish before any child-tree
284  # is.
285  #
286  set gap_cnt 0
287  set prev 0
288  db eval {
289    SELECT pageno, pagetype FROM temp.dbstat
290     WHERE name=$name
291     ORDER BY pageno
292  } {
293    if {$prev>0 && $pagetype=="leaf" && $pageno!=$prev+1} {
294      incr gap_cnt
295    }
296    set prev $pageno
297  }
298  mem eval {
299    INSERT INTO space_used VALUES(
300      $name,
301      $tblname,
302      $is_index,
303      $is_without_rowid,
304      $nentry,
305      $leaf_entries,
306      $depth,
307      $payload,
308      $ovfl_payload,
309      $ovfl_cnt,
310      $mx_payload,
311      $int_pages,
312      $leaf_pages,
313      $ovfl_pages,
314      $int_unused,
315      $leaf_unused,
316      $ovfl_unused,
317      $gap_cnt,
318      $compressed_size
319    );
320  }
321}
322
323proc integerify {real} {
324  if {[string is double -strict $real]} {
325    return [expr {wide($real)}]
326  } else {
327    return 0
328  }
329}
330mem function int integerify
331
332# Quote a string for use in an SQL query. Examples:
333#
334# [quote {hello world}]   == {'hello world'}
335# [quote {hello world's}] == {'hello world''s'}
336#
337proc quote {txt} {
338  return [string map {' ''} $txt]
339}
340
341# Output a title line
342#
343proc titleline {title} {
344  if {$title==""} {
345    puts [string repeat * 79]
346  } else {
347    set len [string length $title]
348    set stars [string repeat * [expr 79-$len-5]]
349    puts "*** $title $stars"
350  }
351}
352
353# Generate a single line of output in the statistics section of the
354# report.
355#
356proc statline {title value {extra {}}} {
357  set len [string length $title]
358  set dots [string repeat . [expr 50-$len]]
359  set len [string length $value]
360  set sp2 [string range {          } $len end]
361  if {$extra ne ""} {
362    set extra " $extra"
363  }
364  puts "$title$dots $value$sp2$extra"
365}
366
367# Generate a formatted percentage value for $num/$denom
368#
369proc percent {num denom {of {}}} {
370  if {$denom==0.0} {return ""}
371  set v [expr {$num*100.0/$denom}]
372  set of {}
373  if {$v==100.0 || $v<0.001 || ($v>1.0 && $v<99.0)} {
374    return [format {%5.1f%% %s} $v $of]
375  } elseif {$v<0.1 || $v>99.9} {
376    return [format {%7.3f%% %s} $v $of]
377  } else {
378    return [format {%6.2f%% %s} $v $of]
379  }
380}
381
382proc divide {num denom} {
383  if {$denom==0} {return 0.0}
384  return [format %.2f [expr double($num)/double($denom)]]
385}
386
387# Generate a subreport that covers some subset of the database.
388# the $where clause determines which subset to analyze.
389#
390proc subreport {title where showFrag} {
391  global pageSize file_pgcnt compressOverhead
392
393  # Query the in-memory database for the sum of various statistics
394  # for the subset of tables/indices identified by the WHERE clause in
395  # $where. Note that even if the WHERE clause matches no rows, the
396  # following query returns exactly one row (because it is an aggregate).
397  #
398  # The results of the query are stored directly by SQLite into local
399  # variables (i.e. $nentry, $payload etc.).
400  #
401  mem eval "
402    SELECT
403      int(sum(
404        CASE WHEN (is_without_rowid OR is_index) THEN nentry
405             ELSE leaf_entries
406        END
407      )) AS nentry,
408      int(sum(payload)) AS payload,
409      int(sum(ovfl_payload)) AS ovfl_payload,
410      max(mx_payload) AS mx_payload,
411      int(sum(ovfl_cnt)) as ovfl_cnt,
412      int(sum(leaf_pages)) AS leaf_pages,
413      int(sum(int_pages)) AS int_pages,
414      int(sum(ovfl_pages)) AS ovfl_pages,
415      int(sum(leaf_unused)) AS leaf_unused,
416      int(sum(int_unused)) AS int_unused,
417      int(sum(ovfl_unused)) AS ovfl_unused,
418      int(sum(gap_cnt)) AS gap_cnt,
419      int(sum(compressed_size)) AS compressed_size,
420      int(max(depth)) AS depth,
421      count(*) AS cnt
422    FROM space_used WHERE $where" {} {}
423
424  # Output the sub-report title, nicely decorated with * characters.
425  #
426  puts ""
427  titleline $title
428  puts ""
429
430  # Calculate statistics and store the results in TCL variables, as follows:
431  #
432  # total_pages: Database pages consumed.
433  # total_pages_percent: Pages consumed as a percentage of the file.
434  # storage: Bytes consumed.
435  # payload_percent: Payload bytes used as a percentage of $storage.
436  # total_unused: Unused bytes on pages.
437  # avg_payload: Average payload per btree entry.
438  # avg_fanout: Average fanout for internal pages.
439  # avg_unused: Average unused bytes per btree entry.
440  # avg_meta: Average metadata overhead per entry.
441  # ovfl_cnt_percent: Percentage of btree entries that use overflow pages.
442  #
443  set total_pages [expr {$leaf_pages+$int_pages+$ovfl_pages}]
444  set total_pages_percent [percent $total_pages $file_pgcnt]
445  set storage [expr {$total_pages*$pageSize}]
446  set payload_percent [percent $payload $storage {of storage consumed}]
447  set total_unused [expr {$ovfl_unused+$int_unused+$leaf_unused}]
448  set avg_payload [divide $payload $nentry]
449  set avg_unused [divide $total_unused $nentry]
450  set total_meta [expr {$storage - $payload - $total_unused}]
451  set total_meta [expr {$total_meta + 4*($ovfl_pages - $ovfl_cnt)}]
452  set meta_percent [percent $total_meta $storage {of metadata}]
453  set avg_meta [divide $total_meta $nentry]
454  if {$int_pages>0} {
455    # TODO: Is this formula correct?
456    set nTab [mem eval "
457      SELECT count(*) FROM (
458          SELECT DISTINCT tblname FROM space_used WHERE $where AND is_index=0
459      )
460    "]
461    set avg_fanout [mem eval "
462      SELECT (sum(leaf_pages+int_pages)-$nTab)/sum(int_pages) FROM space_used
463          WHERE $where
464    "]
465    set avg_fanout [format %.2f $avg_fanout]
466  }
467  set ovfl_cnt_percent [percent $ovfl_cnt $nentry {of all entries}]
468
469  # Print out the sub-report statistics.
470  #
471  statline {Percentage of total database} $total_pages_percent
472  statline {Number of entries} $nentry
473  statline {Bytes of storage consumed} $storage
474  if {$compressed_size!=$storage} {
475    set compressed_size [expr {$compressed_size+$compressOverhead*$total_pages}]
476    set pct [expr {$compressed_size*100.0/$storage}]
477    set pct [format {%5.1f%%} $pct]
478    statline {Bytes used after compression} $compressed_size $pct
479  }
480  statline {Bytes of payload} $payload $payload_percent
481  statline {Bytes of metadata} $total_meta $meta_percent
482  if {$cnt==1} {statline {B-tree depth} $depth}
483  statline {Average payload per entry} $avg_payload
484  statline {Average unused bytes per entry} $avg_unused
485  statline {Average metadata per entry} $avg_meta
486  if {[info exists avg_fanout]} {
487    statline {Average fanout} $avg_fanout
488  }
489  if {$showFrag && $total_pages>1} {
490    set fragmentation [percent $gap_cnt [expr {$total_pages-1}]]
491    statline {Non-sequential pages} $gap_cnt $fragmentation
492  }
493  statline {Maximum payload per entry} $mx_payload
494  statline {Entries that use overflow} $ovfl_cnt $ovfl_cnt_percent
495  if {$int_pages>0} {
496    statline {Index pages used} $int_pages
497  }
498  statline {Primary pages used} $leaf_pages
499  statline {Overflow pages used} $ovfl_pages
500  statline {Total pages used} $total_pages
501  if {$int_unused>0} {
502    set int_unused_percent [
503         percent $int_unused [expr {$int_pages*$pageSize}] {of index space}]
504    statline "Unused bytes on index pages" $int_unused $int_unused_percent
505  }
506  statline "Unused bytes on primary pages" $leaf_unused [
507     percent $leaf_unused [expr {$leaf_pages*$pageSize}] {of primary space}]
508  statline "Unused bytes on overflow pages" $ovfl_unused [
509     percent $ovfl_unused [expr {$ovfl_pages*$pageSize}] {of overflow space}]
510  statline "Unused bytes on all pages" $total_unused [
511               percent $total_unused $storage {of all space}]
512  return 1
513}
514
515# Calculate the overhead in pages caused by auto-vacuum.
516#
517# This procedure calculates and returns the number of pages used by the
518# auto-vacuum 'pointer-map'. If the database does not support auto-vacuum,
519# then 0 is returned. The two arguments are the size of the database file in
520# pages and the page size used by the database (in bytes).
521proc autovacuum_overhead {filePages pageSize} {
522
523  # Set $autovacuum to non-zero for databases that support auto-vacuum.
524  set autovacuum [db one {PRAGMA auto_vacuum}]
525
526  # If the database is not an auto-vacuum database or the file consists
527  # of one page only then there is no overhead for auto-vacuum. Return zero.
528  if {0==$autovacuum || $filePages==1} {
529    return 0
530  }
531
532  # The number of entries on each pointer map page. The layout of the
533  # database file is one pointer-map page, followed by $ptrsPerPage other
534  # pages, followed by a pointer-map page etc. The first pointer-map page
535  # is the second page of the file overall.
536  set ptrsPerPage [expr double($pageSize/5)]
537
538  # Return the number of pointer map pages in the database.
539  return [expr wide(ceil( ($filePages-1.0)/($ptrsPerPage+1.0) ))]
540}
541
542
543# Calculate the summary statistics for the database and store the results
544# in TCL variables. They are output below. Variables are as follows:
545#
546# pageSize:      Size of each page in bytes.
547# file_bytes:    File size in bytes.
548# file_pgcnt:    Number of pages in the file.
549# file_pgcnt2:   Number of pages in the file (calculated).
550# av_pgcnt:      Pages consumed by the auto-vacuum pointer-map.
551# av_percent:    Percentage of the file consumed by auto-vacuum pointer-map.
552# inuse_pgcnt:   Data pages in the file.
553# inuse_percent: Percentage of pages used to store data.
554# free_pgcnt:    Free pages calculated as (<total pages> - <in-use pages>)
555# free_pgcnt2:   Free pages in the file according to the file header.
556# free_percent:  Percentage of file consumed by free pages (calculated).
557# free_percent2: Percentage of file consumed by free pages (header).
558# ntable:        Number of tables in the db.
559# nindex:        Number of indices in the db.
560# nautoindex:    Number of indices created automatically.
561# nmanindex:     Number of indices created manually.
562# user_payload:  Number of bytes of payload in table btrees
563#                (not including sqlite_schema)
564# user_percent:  $user_payload as a percentage of total file size.
565
566### The following, setting $file_bytes based on the actual size of the file
567### on disk, causes this tool to choke on zipvfs databases. So set it based
568### on the return of [PRAGMA page_count] instead.
569if 0 {
570  set file_bytes  [file size $file_to_analyze]
571  set file_pgcnt  [expr {$file_bytes/$pageSize}]
572}
573set file_pgcnt  [db one {PRAGMA page_count}]
574set file_bytes  [expr {$file_pgcnt * $pageSize}]
575
576set av_pgcnt    [autovacuum_overhead $file_pgcnt $pageSize]
577set av_percent  [percent $av_pgcnt $file_pgcnt]
578
579set sql {SELECT sum(leaf_pages+int_pages+ovfl_pages) FROM space_used}
580set inuse_pgcnt   [expr wide([mem eval $sql])]
581set inuse_percent [percent $inuse_pgcnt $file_pgcnt]
582
583set free_pgcnt    [expr {$file_pgcnt-$inuse_pgcnt-$av_pgcnt}]
584set free_percent  [percent $free_pgcnt $file_pgcnt]
585set free_pgcnt2   [db one {PRAGMA freelist_count}]
586set free_percent2 [percent $free_pgcnt2 $file_pgcnt]
587
588set file_pgcnt2 [expr {$inuse_pgcnt+$free_pgcnt2+$av_pgcnt}]
589
590# Account for the lockbyte page
591if {$file_pgcnt2*$pageSize>1073742335} {incr file_pgcnt2}
592
593set ntable [db eval {SELECT count(*)+1 FROM sqlite_schema WHERE type='table'}]
594set nindex [db eval {SELECT count(*) FROM sqlite_schema WHERE type='index'}]
595set sql {SELECT count(*) FROM sqlite_schema WHERE name LIKE 'sqlite_autoindex%'}
596set nautoindex [db eval $sql]
597set nmanindex [expr {$nindex-$nautoindex}]
598
599# set total_payload [mem eval "SELECT sum(payload) FROM space_used"]
600set user_payload [mem one {SELECT int(sum(payload)) FROM space_used
601     WHERE NOT is_index AND name NOT LIKE 'sqlite_schema'}]
602set user_percent [percent $user_payload $file_bytes]
603
604# Output the summary statistics calculated above.
605#
606puts "/** Disk-Space Utilization Report For $root_filename"
607puts ""
608statline {Page size in bytes} $pageSize
609statline {Pages in the whole file (measured)} $file_pgcnt
610statline {Pages in the whole file (calculated)} $file_pgcnt2
611statline {Pages that store data} $inuse_pgcnt $inuse_percent
612statline {Pages on the freelist (per header)} $free_pgcnt2 $free_percent2
613statline {Pages on the freelist (calculated)} $free_pgcnt $free_percent
614statline {Pages of auto-vacuum overhead} $av_pgcnt $av_percent
615statline {Number of tables in the database} $ntable
616statline {Number of indices} $nindex
617statline {Number of defined indices} $nmanindex
618statline {Number of implied indices} $nautoindex
619if {$isCompressed} {
620  statline {Size of uncompressed content in bytes} $file_bytes
621  set efficiency [percent $true_file_size $file_bytes]
622  statline {Size of compressed file on disk} $true_file_size $efficiency
623} else {
624  statline {Size of the file in bytes} $file_bytes
625}
626statline {Bytes of user payload stored} $user_payload $user_percent
627
628# Output table rankings
629#
630puts ""
631titleline "Page counts for all tables with their indices"
632puts ""
633mem eval {SELECT tblname, count(*) AS cnt,
634              int(sum(int_pages+leaf_pages+ovfl_pages)) AS size
635          FROM space_used GROUP BY tblname ORDER BY size+0 DESC, tblname} {} {
636  statline [string toupper $tblname] $size [percent $size $file_pgcnt]
637}
638puts ""
639titleline "Page counts for all tables and indices separately"
640puts ""
641mem eval {
642  SELECT
643       upper(name) AS nm,
644       int(int_pages+leaf_pages+ovfl_pages) AS size
645    FROM space_used
646   ORDER BY size+0 DESC, name} {} {
647  statline $nm $size [percent $size $file_pgcnt]
648}
649if {$isCompressed} {
650  puts ""
651  titleline "Bytes of disk space used after compression"
652  puts ""
653  set csum 0
654  mem eval {SELECT tblname,
655                  int(sum(compressed_size)) +
656                         $compressOverhead*sum(int_pages+leaf_pages+ovfl_pages)
657                        AS csize
658          FROM space_used GROUP BY tblname ORDER BY csize+0 DESC, tblname} {} {
659    incr csum $csize
660    statline [string toupper $tblname] $csize [percent $csize $true_file_size]
661  }
662  set overhead [expr {$true_file_size - $csum}]
663  if {$overhead>0} {
664    statline {Header and free space} $overhead [percent $overhead $true_file_size]
665  }
666}
667
668# Output subreports
669#
670if {$nindex>0} {
671  subreport {All tables and indices} 1 0
672}
673subreport {All tables} {NOT is_index} 0
674if {$nindex>0} {
675  subreport {All indices} {is_index} 0
676}
677foreach tbl [mem eval {SELECT DISTINCT tblname name FROM space_used
678                       ORDER BY name}] {
679  set qn [quote $tbl]
680  set name [string toupper $tbl]
681  set n [mem eval {SELECT count(*) FROM space_used WHERE tblname=$tbl}]
682  if {$n>1} {
683    set idxlist [mem eval "SELECT name FROM space_used
684                            WHERE tblname='$qn' AND is_index
685                            ORDER BY 1"]
686    subreport "Table $name and all its indices" "tblname='$qn'" 0
687    subreport "Table $name w/o any indices" "name='$qn'" 1
688    if {[llength $idxlist]>1} {
689      subreport "Indices of table $name" "tblname='$qn' AND is_index" 0
690    }
691    foreach idx $idxlist {
692      set qidx [quote $idx]
693      subreport "Index [string toupper $idx] of table $name" "name='$qidx'" 1
694    }
695  } else {
696    subreport "Table $name" "name='$qn'" 1
697  }
698}
699
700# Output instructions on what the numbers above mean.
701#
702puts ""
703titleline Definitions
704puts {
705Page size in bytes
706
707    The number of bytes in a single page of the database file.
708    Usually 1024.
709
710Number of pages in the whole file
711}
712puts "    The number of $pageSize-byte pages that go into forming the complete
713    database"
714puts {
715Pages that store data
716
717    The number of pages that store data, either as primary B*Tree pages or
718    as overflow pages.  The number at the right is the data pages divided by
719    the total number of pages in the file.
720
721Pages on the freelist
722
723    The number of pages that are not currently in use but are reserved for
724    future use.  The percentage at the right is the number of freelist pages
725    divided by the total number of pages in the file.
726
727Pages of auto-vacuum overhead
728
729    The number of pages that store data used by the database to facilitate
730    auto-vacuum. This is zero for databases that do not support auto-vacuum.
731
732Number of tables in the database
733
734    The number of tables in the database, including the SQLITE_SCHEMA table
735    used to store schema information.
736
737Number of indices
738
739    The total number of indices in the database.
740
741Number of defined indices
742
743    The number of indices created using an explicit CREATE INDEX statement.
744
745Number of implied indices
746
747    The number of indices used to implement PRIMARY KEY or UNIQUE constraints
748    on tables.
749
750Size of the file in bytes
751
752    The total amount of disk space used by the entire database files.
753
754Bytes of user payload stored
755
756    The total number of bytes of user payload stored in the database. The
757    schema information in the SQLITE_SCHEMA table is not counted when
758    computing this number.  The percentage at the right shows the payload
759    divided by the total file size.
760
761Percentage of total database
762
763    The amount of the complete database file that is devoted to storing
764    information described by this category.
765
766Number of entries
767
768    The total number of B-Tree key/value pairs stored under this category.
769
770Bytes of storage consumed
771
772    The total amount of disk space required to store all B-Tree entries
773    under this category.  The is the total number of pages used times
774    the pages size.
775
776Bytes of payload
777
778    The amount of payload stored under this category.  Payload is the data
779    part of table entries and the key part of index entries.  The percentage
780    at the right is the bytes of payload divided by the bytes of storage
781    consumed.
782
783Bytes of metadata
784
785    The amount of formatting and structural information stored in the
786    table or index.  Metadata includes the btree page header, the cell pointer
787    array, the size field for each cell, the left child pointer or non-leaf
788    cells, the overflow pointers for overflow cells, and the rowid value for
789    rowid table cells.  In other words, metadata is everything that is neither
790    unused space nor content.  The record header in the payload is counted as
791    content, not metadata.
792
793Average payload per entry
794
795    The average amount of payload on each entry.  This is just the bytes of
796    payload divided by the number of entries.
797
798Average unused bytes per entry
799
800    The average amount of free space remaining on all pages under this
801    category on a per-entry basis.  This is the number of unused bytes on
802    all pages divided by the number of entries.
803
804Non-sequential pages
805
806    The number of pages in the table or index that are out of sequence.
807    Many filesystems are optimized for sequential file access so a small
808    number of non-sequential pages might result in faster queries,
809    especially for larger database files that do not fit in the disk cache.
810    Note that after running VACUUM, the root page of each table or index is
811    at the beginning of the database file and all other pages are in a
812    separate part of the database file, resulting in a single non-
813    sequential page.
814
815Maximum payload per entry
816
817    The largest payload size of any entry.
818
819Entries that use overflow
820
821    The number of entries that user one or more overflow pages.
822
823Total pages used
824
825    This is the number of pages used to hold all information in the current
826    category.  This is the sum of index, primary, and overflow pages.
827
828Index pages used
829
830    This is the number of pages in a table B-tree that hold only key (rowid)
831    information and no data.
832
833Primary pages used
834
835    This is the number of B-tree pages that hold both key and data.
836
837Overflow pages used
838
839    The total number of overflow pages used for this category.
840
841Unused bytes on index pages
842
843    The total number of bytes of unused space on all index pages.  The
844    percentage at the right is the number of unused bytes divided by the
845    total number of bytes on index pages.
846
847Unused bytes on primary pages
848
849    The total number of bytes of unused space on all primary pages.  The
850    percentage at the right is the number of unused bytes divided by the
851    total number of bytes on primary pages.
852
853Unused bytes on overflow pages
854
855    The total number of bytes of unused space on all overflow pages.  The
856    percentage at the right is the number of unused bytes divided by the
857    total number of bytes on overflow pages.
858
859Unused bytes on all pages
860
861    The total number of bytes of unused space on all primary and overflow
862    pages.  The percentage at the right is the number of unused bytes
863    divided by the total number of bytes.
864}
865
866# Output a dump of the in-memory database. This can be used for more
867# complex offline analysis.
868#
869titleline {}
870puts "The entire text of this report can be sourced into any SQL database"
871puts "engine for further analysis.  All of the text above is an SQL comment."
872puts "The data used to generate this report follows:"
873puts "*/"
874puts "BEGIN;"
875puts $tabledef
876unset -nocomplain x
877mem eval {SELECT * FROM space_used} x {
878  puts -nonewline "INSERT INTO space_used VALUES"
879  set sep (
880  foreach col $x(*) {
881    set v $x($col)
882    if {$v=="" || ![string is double $v]} {set v '[quote $v]'}
883    puts -nonewline $sep$v
884    set sep ,
885  }
886  puts ");"
887}
888puts "COMMIT;"
889
890} err]} {
891  puts "ERROR: $err"
892  puts $errorInfo
893  exit 1
894}
895