xref: /sqlite-3.40.0/test/analyze8.test (revision 8210233c)
1# 2011 August 13
2#
3# The author disclaims copyright to this source code.  In place of
4# a legal notice, here is a blessing:
5#
6#    May you do good and not evil.
7#    May you find forgiveness for yourself and forgive others.
8#    May you share freely, never taking more than you give.
9#
10#***********************************************************************
11#
12# This file implements tests for SQLite library.  The focus of the tests
13# in this file is testing the capabilities of sqlite_stat4.
14#
15
16set testdir [file dirname $argv0]
17source $testdir/tester.tcl
18
19ifcapable !stat4 {
20  finish_test
21  return
22}
23
24set testprefix analyze8
25
26proc eqp {sql {db db}} {
27  uplevel execsql [list "EXPLAIN QUERY PLAN $sql"] $db
28}
29
30# Scenario:
31#
32#    Two indices.  One has mostly singleton entries, but for a few
33#    values there are hundreds of entries.  The other has 10-20
34#    entries per value.
35#
36# Verify that the query planner chooses the first index for the singleton
37# entries and the second index for the others.
38#
39do_test 1.0 {
40  db eval {
41    CREATE TABLE t1(a,b,c,d);
42    CREATE INDEX t1a ON t1(a);
43    CREATE INDEX t1b ON t1(b);
44    CREATE INDEX t1c ON t1(c);
45  }
46  for {set i 0} {$i<1000} {incr i} {
47    if {$i%2==0} {set a $i} {set a [expr {($i%8)*100}]}
48    set b [expr {$i/10}]
49    set c [expr {$i/8}]
50    set c [expr {$c*$c*$c}]
51    db eval {INSERT INTO t1 VALUES($a,$b,$c,$i)}
52  }
53  db eval {ANALYZE}
54} {}
55
56# The a==100 comparison is expensive because there are many rows
57# with a==100.  And so for those cases, choose the t1b index.
58#
59# Buf ro a==99 and a==101, there are far fewer rows so choose
60# the t1a index.
61#
62do_test 1.1 {
63  eqp {SELECT * FROM t1 WHERE a=100 AND b=55}
64} {/*SEARCH t1 USING INDEX t1b (b=?)*/}
65do_test 1.2 {
66  eqp {SELECT * FROM t1 WHERE a=99 AND b=55}
67} {/*SEARCH t1 USING INDEX t1a (a=?)*/}
68do_test 1.3 {
69  eqp {SELECT * FROM t1 WHERE a=101 AND b=55}
70} {/*SEARCH t1 USING INDEX t1a (a=?)*/}
71do_test 1.4 {
72  eqp {SELECT * FROM t1 WHERE a=100 AND b=56}
73} {/*SEARCH t1 USING INDEX t1b (b=?)*/}
74do_test 1.5 {
75  eqp {SELECT * FROM t1 WHERE a=99 AND b=56}
76} {/*SEARCH t1 USING INDEX t1a (a=?)*/}
77do_test 1.6 {
78  eqp {SELECT * FROM t1 WHERE a=101 AND b=56}
79} {/*SEARCH t1 USING INDEX t1a (a=?)*/}
80do_test 2.1 {
81  eqp {SELECT * FROM t1 WHERE a=100 AND b BETWEEN 50 AND 54}
82} {/*SEARCH t1 USING INDEX t1b (b>? AND b<?)*/}
83
84# There are many more values of c between 0 and 100000 than there are
85# between 800000 and 900000.  So t1c is more selective for the latter
86# range.
87#
88# Test 3.2 is a little unstable. It depends on the planner estimating
89# that (b BETWEEN 30 AND 34) will match more rows than (c BETWEEN
90# 800000 AND 900000). Which is a pretty close call (50 vs. 32), so
91# the planner could get it wrong with an unlucky set of samples. This
92# case happens to work, but others ("b BETWEEN 40 AND 44" for example)
93# will fail.
94#
95do_execsql_test 3.0 {
96  SELECT count(*) FROM t1 WHERE b BETWEEN 30 AND 34;
97  SELECT count(*) FROM t1 WHERE c BETWEEN 0 AND 100000;
98  SELECT count(*) FROM t1 WHERE c BETWEEN 800000 AND 900000;
99} {50 376 32}
100do_test 3.1 {
101  eqp {SELECT * FROM t1 WHERE b BETWEEN 30 AND 34 AND c BETWEEN 0 AND 100000}
102} {/*SEARCH t1 USING INDEX t1b (b>? AND b<?)*/}
103do_test 3.2 {
104  eqp {SELECT * FROM t1
105       WHERE b BETWEEN 30 AND 34 AND c BETWEEN 800000 AND 900000}
106} {/*SEARCH t1 USING INDEX t1c (c>? AND c<?)*/}
107do_test 3.3 {
108  eqp {SELECT * FROM t1 WHERE a=100 AND c BETWEEN 0 AND 100000}
109} {/*SEARCH t1 USING INDEX t1a (a=?)*/}
110do_test 3.4 {
111  eqp {SELECT * FROM t1
112       WHERE a=100 AND c BETWEEN 800000 AND 900000}
113} {/*SEARCH t1 USING INDEX t1c (c>? AND c<?)*/}
114
115finish_test
116