xref: /sqlite-3.40.0/test/analyze5.test (revision 175b8f06)
1e847d324Sdrh# 2011 January 19
2e847d324Sdrh#
3e847d324Sdrh# The author disclaims copyright to this source code.  In place of
4e847d324Sdrh# a legal notice, here is a blessing:
5e847d324Sdrh#
6e847d324Sdrh#    May you do good and not evil.
7e847d324Sdrh#    May you find forgiveness for yourself and forgive others.
8e847d324Sdrh#    May you share freely, never taking more than you give.
9e847d324Sdrh#
10e847d324Sdrh#***********************************************************************
11e847d324Sdrh#
12e847d324Sdrh# This file implements tests for SQLite library.  The focus of the tests
13f52bb8d3Sdan# in this file is the use of the sqlite_stat4 histogram data on tables
14e847d324Sdrh# with many repeated values and only a few distinct values.
15e847d324Sdrh#
16e847d324Sdrh
17e847d324Sdrhset testdir [file dirname $argv0]
18e847d324Sdrhsource $testdir/tester.tcl
19e847d324Sdrh
20*175b8f06Sdrhifcapable !stat4 {
21e847d324Sdrh  finish_test
22e847d324Sdrh  return
23e847d324Sdrh}
24e847d324Sdrh
25e847d324Sdrhset testprefix analyze5
26e847d324Sdrh
27e847d324Sdrhproc eqp {sql {db db}} {
28e847d324Sdrh  uplevel execsql [list "EXPLAIN QUERY PLAN $sql"] $db
29e847d324Sdrh}
30e847d324Sdrh
31f52bb8d3Sdanproc alpha {blob} {
32f52bb8d3Sdan  set ret ""
33f52bb8d3Sdan  foreach c [split $blob {}] {
34f52bb8d3Sdan    if {[string is alpha $c]} {append ret $c}
35f52bb8d3Sdan  }
36f52bb8d3Sdan  return $ret
37f52bb8d3Sdan}
38f52bb8d3Sdandb func alpha alpha
39f52bb8d3Sdan
40dd6e1f19Sdandb func lindex lindex
41dd6e1f19Sdan
42fc449136Sdrhunset -nocomplain i t u v w x y z
43e847d324Sdrhdo_test analyze5-1.0 {
44fc449136Sdrh  db eval {CREATE TABLE t1(t,u,v TEXT COLLATE nocase,w,x,y,z)}
45e847d324Sdrh  for {set i 0} {$i < 1000} {incr i} {
46fc449136Sdrh    set y [expr {$i>=25 && $i<=50}]
47fc449136Sdrh    set z [expr {($i>=400) + ($i>=700) + ($i>=875)}]
48fc449136Sdrh    set x $z
49fc449136Sdrh    set w $z
50fc449136Sdrh    set t [expr {$z+0.5}]
51fc449136Sdrh    switch $z {
52fc449136Sdrh      0 {set u "alpha"; unset x}
53fc449136Sdrh      1 {set u "bravo"}
54fc449136Sdrh      2 {set u "charlie"}
55fc449136Sdrh      3 {set u "delta"; unset w}
56e847d324Sdrh    }
57fc449136Sdrh    if {$i%2} {set v $u} {set v [string toupper $u]}
58fc449136Sdrh    db eval {INSERT INTO t1 VALUES($t,$u,$v,$w,$x,$y,$z)}
59fc449136Sdrh  }
60fc449136Sdrh  db eval {
61fc449136Sdrh    CREATE INDEX t1t ON t1(t);  -- 0.5, 1.5, 2.5, and 3.5
62fc449136Sdrh    CREATE INDEX t1u ON t1(u);  -- text
63fc449136Sdrh    CREATE INDEX t1v ON t1(v);  -- mixed case text
64fc449136Sdrh    CREATE INDEX t1w ON t1(w);  -- integers 0, 1, 2 and a few NULLs
65fc449136Sdrh    CREATE INDEX t1x ON t1(x);  -- integers 1, 2, 3 and many NULLs
66fc449136Sdrh    CREATE INDEX t1y ON t1(y);  -- integers 0 and very few 1s
67fc449136Sdrh    CREATE INDEX t1z ON t1(z);  -- integers 0, 1, 2, and 3
68e847d324Sdrh    ANALYZE;
698ad169abSdan  }
708ad169abSdan  db eval {
71dd6e1f19Sdan    SELECT DISTINCT lindex(test_decode(sample),0)
72dd6e1f19Sdan      FROM sqlite_stat4 WHERE idx='t1u' ORDER BY nlt;
73e847d324Sdrh  }
744e50c5ecSdrh} {alpha bravo charlie delta}
75fc449136Sdrh
764e50c5ecSdrhdo_test analyze5-1.1 {
77f52bb8d3Sdan  db eval {
78dd6e1f19Sdan    SELECT DISTINCT lower(lindex(test_decode(sample), 0))
79dd6e1f19Sdan      FROM sqlite_stat4 WHERE idx='t1v' ORDER BY 1
80f52bb8d3Sdan  }
814e50c5ecSdrh} {alpha bravo charlie delta}
824e50c5ecSdrhdo_test analyze5-1.2 {
83f52bb8d3Sdan  db eval {SELECT idx, count(*) FROM sqlite_stat4 GROUP BY 1 ORDER BY 1}
84dd6e1f19Sdan} {t1t 8 t1u 8 t1v 8 t1w 8 t1x 8 t1y 9 t1z 8}
85e847d324Sdrh
86e847d324Sdrh# Verify that range queries generate the correct row count estimates
87e847d324Sdrh#
88fc449136Sdrhforeach {testid where index rows} {
89fc449136Sdrh    1  {z>=0 AND z<=0}       t1z  400
90fc449136Sdrh    2  {z>=1 AND z<=1}       t1z  300
914e50c5ecSdrh    3  {z>=2 AND z<=2}       t1z  175
924e50c5ecSdrh    4  {z>=3 AND z<=3}       t1z  125
934e50c5ecSdrh    5  {z>=4 AND z<=4}       t1z    1
944e50c5ecSdrh    6  {z>=-1 AND z<=-1}     t1z    1
954e50c5ecSdrh    7  {z>1 AND z<3}         t1z  175
96fc449136Sdrh    8  {z>0 AND z<100}       t1z  600
97fc449136Sdrh    9  {z>=1 AND z<100}      t1z  600
98fc449136Sdrh   10  {z>1 AND z<100}       t1z  300
99fc449136Sdrh   11  {z>=2 AND z<100}      t1z  300
1004e50c5ecSdrh   12  {z>2 AND z<100}       t1z  125
1014e50c5ecSdrh   13  {z>=3 AND z<100}      t1z  125
1024e50c5ecSdrh   14  {z>3 AND z<100}       t1z    1
1034e50c5ecSdrh   15  {z>=4 AND z<100}      t1z    1
1044e50c5ecSdrh   16  {z>=-100 AND z<=-1}   t1z    1
105fc449136Sdrh   17  {z>=-100 AND z<=0}    t1z  400
1064e50c5ecSdrh   18  {z>=-100 AND z<0}     t1z    1
107fc449136Sdrh   19  {z>=-100 AND z<=1}    t1z  700
108fc449136Sdrh   20  {z>=-100 AND z<2}     t1z  700
1094e50c5ecSdrh   21  {z>=-100 AND z<=2}    t1z  875
1104e50c5ecSdrh   22  {z>=-100 AND z<3}     t1z  875
111e847d324Sdrh
112fc449136Sdrh   31  {z>=0.0 AND z<=0.0}   t1z  400
113fc449136Sdrh   32  {z>=1.0 AND z<=1.0}   t1z  300
1144e50c5ecSdrh   33  {z>=2.0 AND z<=2.0}   t1z  175
1154e50c5ecSdrh   34  {z>=3.0 AND z<=3.0}   t1z  125
1164e50c5ecSdrh   35  {z>=4.0 AND z<=4.0}   t1z    1
1174e50c5ecSdrh   36  {z>=-1.0 AND z<=-1.0} t1z    1
1184e50c5ecSdrh   37  {z>1.5 AND z<3.0}     t1z  174
1194e50c5ecSdrh   38  {z>0.5 AND z<100}     t1z  599
120fc449136Sdrh   39  {z>=1.0 AND z<100}    t1z  600
1214e50c5ecSdrh   40  {z>1.5 AND z<100}     t1z  299
122fc449136Sdrh   41  {z>=2.0 AND z<100}    t1z  300
1234e50c5ecSdrh   42  {z>2.1 AND z<100}     t1z  124
1244e50c5ecSdrh   43  {z>=3.0 AND z<100}    t1z  125
1254e50c5ecSdrh   44  {z>3.2 AND z<100}     t1z    1
1264e50c5ecSdrh   45  {z>=4.0 AND z<100}    t1z    1
1274e50c5ecSdrh   46  {z>=-100 AND z<=-1.0} t1z    1
128fc449136Sdrh   47  {z>=-100 AND z<=0.0}  t1z  400
1294e50c5ecSdrh   48  {z>=-100 AND z<0.0}   t1z    1
130fc449136Sdrh   49  {z>=-100 AND z<=1.0}  t1z  700
131fc449136Sdrh   50  {z>=-100 AND z<2.0}   t1z  700
1324e50c5ecSdrh   51  {z>=-100 AND z<=2.0}  t1z  875
1334e50c5ecSdrh   52  {z>=-100 AND z<3.0}   t1z  875
134fc449136Sdrh
1354e50c5ecSdrh  101  {z=-1}                t1z    1
136fc449136Sdrh  102  {z=0}                 t1z  400
137fc449136Sdrh  103  {z=1}                 t1z  300
1384e50c5ecSdrh  104  {z=2}                 t1z  175
1394e50c5ecSdrh  105  {z=3}                 t1z  125
1404e50c5ecSdrh  106  {z=4}                 t1z    1
1414e50c5ecSdrh  107  {z=-10.0}             t1z    1
142fc449136Sdrh  108  {z=0.0}               t1z  400
143fc449136Sdrh  109  {z=1.0}               t1z  300
1444e50c5ecSdrh  110  {z=2.0}               t1z  175
1454e50c5ecSdrh  111  {z=3.0}               t1z  125
1464e50c5ecSdrh  112  {z=4.0}               t1z    1
1474e50c5ecSdrh  113  {z=1.5}               t1z    1
1484e50c5ecSdrh  114  {z=2.5}               t1z    1
149fc449136Sdrh
1504e50c5ecSdrh  201  {z IN (-1)}           t1z    1
151fc449136Sdrh  202  {z IN (0)}            t1z  400
152fc449136Sdrh  203  {z IN (1)}            t1z  300
1534e50c5ecSdrh  204  {z IN (2)}            t1z  175
1544e50c5ecSdrh  205  {z IN (3)}            t1z  125
1554e50c5ecSdrh  206  {z IN (4)}            t1z    1
1564e50c5ecSdrh  207  {z IN (0.5)}          t1z    1
157fc449136Sdrh  208  {z IN (0,1)}          t1z  700
1584e50c5ecSdrh  209  {z IN (0,1,2)}        t1z  875
159fc449136Sdrh  210  {z IN (0,1,2,3)}      {}   100
160fc449136Sdrh  211  {z IN (0,1,2,3,4,5)}  {}   100
1614e50c5ecSdrh  212  {z IN (1,2)}          t1z  475
162fc449136Sdrh  213  {z IN (2,3)}          t1z  300
163fc449136Sdrh  214  {z=3 OR z=2}          t1z  300
1644e50c5ecSdrh  215  {z IN (-1,3)}         t1z  126
1654e50c5ecSdrh  216  {z=-1 OR z=3}         t1z  126
166fc449136Sdrh
1674e50c5ecSdrh  300  {y=0}                 t1y  974
1684e50c5ecSdrh  301  {y=1}                 t1y   26
1694e50c5ecSdrh  302  {y=0.1}               t1y    1
170e847d324Sdrh
1711f9c7663Sdrh  400  {x IS NULL}           t1x  400
1721f9c7663Sdrh
173e847d324Sdrh} {
174fc449136Sdrh  # Verify that the expected index is used with the expected row count
1758636e9c5Sdrh  # No longer valid due to an EXPLAIN QUERY PLAN output format change
1768636e9c5Sdrh  # do_test analyze5-1.${testid}a {
1778636e9c5Sdrh  #   set x [lindex [eqp "SELECT * FROM t1 WHERE $where"] 3]
1788636e9c5Sdrh  #   set idx {}
1798636e9c5Sdrh  #   regexp {INDEX (t1.) } $x all idx
1808636e9c5Sdrh  #   regexp {~([0-9]+) rows} $x all nrow
1818636e9c5Sdrh  #   list $idx $nrow
1828636e9c5Sdrh  # } [list $index $rows]
1835ac06071Sdrh
184fc449136Sdrh  # Verify that the same result is achieved regardless of whether or not
185fc449136Sdrh  # the index is used
186fc449136Sdrh  do_test analyze5-1.${testid}b {
187fc449136Sdrh    set w2 [string map {y +y z +z} $where]
188fc449136Sdrh    set a1 [db eval "SELECT rowid FROM t1 NOT INDEXED WHERE $w2\
189fc449136Sdrh                     ORDER BY +rowid"]
190fc449136Sdrh    set a2 [db eval "SELECT rowid FROM t1 WHERE $where ORDER BY +rowid"]
191fc449136Sdrh    if {$a1==$a2} {
192fc449136Sdrh      set res ok
1935ac06071Sdrh    } else {
194fc449136Sdrh      set res "a1=\[$a1\] a2=\[$a2\]"
1955ac06071Sdrh    }
196fc449136Sdrh    set res
197fc449136Sdrh  } {ok}
1983851b74aSdrh}
199e847d324Sdrh
2001f9c7663Sdrh# Increase the number of NULLs in column x
2011f9c7663Sdrh#
2021f9c7663Sdrhdb eval {
2031f9c7663Sdrh   UPDATE t1 SET x=NULL;
2041f9c7663Sdrh   UPDATE t1 SET x=rowid
2051f9c7663Sdrh    WHERE rowid IN (SELECT rowid FROM t1 ORDER BY random() LIMIT 5);
2061f9c7663Sdrh   ANALYZE;
2071f9c7663Sdrh}
2081f9c7663Sdrh
2091f9c7663Sdrh# Verify that range queries generate the correct row count estimates
2101f9c7663Sdrh#
2111f9c7663Sdrhforeach {testid where index rows} {
2124e50c5ecSdrh  500  {x IS NULL AND u='charlie'}         t1u  17
2134e50c5ecSdrh  501  {x=1 AND u='charlie'}               t1x   1
2144e50c5ecSdrh  502  {x IS NULL}                         t1x 995
2154e50c5ecSdrh  503  {x=1}                               t1x   1
2164e50c5ecSdrh  504  {x IS NOT NULL}                     t1x   2
217ea6dc445Sdrh  505  {+x IS NOT NULL}                     {} 500
218ea6dc445Sdrh  506  {upper(x) IS NOT NULL}               {} 500
2191f9c7663Sdrh
2201f9c7663Sdrh} {
2211f9c7663Sdrh  # Verify that the expected index is used with the expected row count
2228636e9c5Sdrh  # No longer valid due to an EXPLAIN QUERY PLAN format change
2238636e9c5Sdrh  # do_test analyze5-1.${testid}a {
2248636e9c5Sdrh  #   set x [lindex [eqp "SELECT * FROM t1 WHERE $where"] 3]
2258636e9c5Sdrh  #   set idx {}
2268636e9c5Sdrh  #   regexp {INDEX (t1.) } $x all idx
2278636e9c5Sdrh  #   regexp {~([0-9]+) rows} $x all nrow
2288636e9c5Sdrh  #   list $idx $nrow
2298636e9c5Sdrh  # } [list $index $rows]
2301f9c7663Sdrh
2311f9c7663Sdrh  # Verify that the same result is achieved regardless of whether or not
2321f9c7663Sdrh  # the index is used
2331f9c7663Sdrh  do_test analyze5-1.${testid}b {
2341f9c7663Sdrh    set w2 [string map {y +y z +z} $where]
2351f9c7663Sdrh    set a1 [db eval "SELECT rowid FROM t1 NOT INDEXED WHERE $w2\
2361f9c7663Sdrh                     ORDER BY +rowid"]
2371f9c7663Sdrh    set a2 [db eval "SELECT rowid FROM t1 WHERE $where ORDER BY +rowid"]
2381f9c7663Sdrh    if {$a1==$a2} {
2391f9c7663Sdrh      set res ok
2401f9c7663Sdrh    } else {
2411f9c7663Sdrh      set res "a1=\[$a1\] a2=\[$a2\]"
2421f9c7663Sdrh    }
2431f9c7663Sdrh    set res
2441f9c7663Sdrh  } {ok}
2451f9c7663Sdrh}
246e847d324Sdrh
247e847d324Sdrhfinish_test
248