xref: /sqlite-3.40.0/test/analyze5.test (revision fc449136)
1# 2011 January 19
2#
3# The author disclaims copyright to this source code.  In place of
4# a legal notice, here is a blessing:
5#
6#    May you do good and not evil.
7#    May you find forgiveness for yourself and forgive others.
8#    May you share freely, never taking more than you give.
9#
10#***********************************************************************
11#
12# This file implements tests for SQLite library.  The focus of the tests
13# in this file is the use of the sqlite_stat2 histogram data on tables
14# with many repeated values and only a few distinct values.
15#
16
17return
18
19set testdir [file dirname $argv0]
20source $testdir/tester.tcl
21
22ifcapable !stat2 {
23  finish_test
24  return
25}
26
27set testprefix analyze5
28
29proc eqp {sql {db db}} {
30  uplevel execsql [list "EXPLAIN QUERY PLAN $sql"] $db
31}
32
33unset -nocomplain i t u v w x y z
34do_test analyze5-1.0 {
35  db eval {CREATE TABLE t1(t,u,v TEXT COLLATE nocase,w,x,y,z)}
36  for {set i 0} {$i < 1000} {incr i} {
37    set y [expr {$i>=25 && $i<=50}]
38    set z [expr {($i>=400) + ($i>=700) + ($i>=875)}]
39    set x $z
40    set w $z
41    set t [expr {$z+0.5}]
42    switch $z {
43      0 {set u "alpha"; unset x}
44      1 {set u "bravo"}
45      2 {set u "charlie"}
46      3 {set u "delta"; unset w}
47    }
48    if {$i%2} {set v $u} {set v [string toupper $u]}
49    db eval {INSERT INTO t1 VALUES($t,$u,$v,$w,$x,$y,$z)}
50  }
51  db eval {
52    CREATE INDEX t1t ON t1(t);  -- 0.5, 1.5, 2.5, and 3.5
53    CREATE INDEX t1u ON t1(u);  -- text
54    CREATE INDEX t1v ON t1(v);  -- mixed case text
55    CREATE INDEX t1w ON t1(w);  -- integers 0, 1, 2 and a few NULLs
56    CREATE INDEX t1x ON t1(x);  -- integers 1, 2, 3 and many NULLs
57    CREATE INDEX t1y ON t1(y);  -- integers 0 and very few 1s
58    CREATE INDEX t1z ON t1(z);  -- integers 0, 1, 2, and 3
59    ANALYZE;
60    SELECT sample FROM sqlite_stat2 WHERE idx='t1u' ORDER BY sampleno;
61  }
62} {alpha alpha alpha alpha bravo bravo bravo charlie charlie delta}
63do_test analyze5-1.1 {
64  string tolower \
65   [db eval {SELECT sample from sqlite_stat2 WHERE idx='t1v' ORDER BY sampleno}]
66} {alpha alpha alpha alpha bravo bravo bravo charlie charlie delta}
67do_test analyze5-1.2 {
68  db eval {SELECT sample from sqlite_stat2 WHERE idx='t1w' ORDER BY sampleno}
69} {{} 0 0 0 0 1 1 1 2 2}
70do_test analyze5-1.3 {
71  db eval {SELECT sample from sqlite_stat2 WHERE idx='t1x' ORDER BY sampleno}
72} {{} {} {} {} 1 1 1 2 2 3}
73do_test analyze5-1.4 {
74  db eval {SELECT sample from sqlite_stat2 WHERE idx='t1y' ORDER BY sampleno}
75} {0 0 0 0 0 0 0 0 0 0}
76do_test analyze5-1.5 {
77  db eval {SELECT sample from sqlite_stat2 WHERE idx='t1z' ORDER BY sampleno}
78} {0 0 0 0 1 1 1 2 2 3}
79do_test analyze5-1.6 {
80  db eval {SELECT sample from sqlite_stat2 WHERE idx='t1t' ORDER BY sampleno}
81} {0.5 0.5 0.5 0.5 1.5 1.5 1.5 2.5 2.5 3.5}
82
83
84# Verify that range queries generate the correct row count estimates
85#
86foreach {testid where index rows} {
87    1  {z>=0 AND z<=0}       t1z  400
88    2  {z>=1 AND z<=1}       t1z  300
89    3  {z>=2 AND z<=2}       t1z  200
90    4  {z>=3 AND z<=3}       t1z  100
91    5  {z>=4 AND z<=4}       t1z   50
92    6  {z>=-1 AND z<=-1}     t1z   50
93    7  {z>1 AND z<3}         t1z  200
94    8  {z>0 AND z<100}       t1z  600
95    9  {z>=1 AND z<100}      t1z  600
96   10  {z>1 AND z<100}       t1z  300
97   11  {z>=2 AND z<100}      t1z  300
98   12  {z>2 AND z<100}       t1z  100
99   13  {z>=3 AND z<100}      t1z  100
100   14  {z>3 AND z<100}       t1z   50
101   15  {z>=4 AND z<100}      t1z   50
102   16  {z>=-100 AND z<=-1}   t1z   50
103   17  {z>=-100 AND z<=0}    t1z  400
104   18  {z>=-100 AND z<0}     t1z   50
105   19  {z>=-100 AND z<=1}    t1z  700
106   20  {z>=-100 AND z<2}     t1z  700
107   21  {z>=-100 AND z<=2}    t1z  900
108   22  {z>=-100 AND z<3}     t1z  900
109
110   31  {z>=0.0 AND z<=0.0}   t1z  400
111   32  {z>=1.0 AND z<=1.0}   t1z  300
112   33  {z>=2.0 AND z<=2.0}   t1z  200
113   34  {z>=3.0 AND z<=3.0}   t1z  100
114   35  {z>=4.0 AND z<=4.0}   t1z   50
115   36  {z>=-1.0 AND z<=-1.0} t1z   50
116   37  {z>1.5 AND z<3.0}     t1z  200
117   38  {z>0.5 AND z<100}     t1z  600
118   39  {z>=1.0 AND z<100}    t1z  600
119   40  {z>1.5 AND z<100}     t1z  300
120   41  {z>=2.0 AND z<100}    t1z  300
121   42  {z>2.1 AND z<100}     t1z  100
122   43  {z>=3.0 AND z<100}    t1z  100
123   44  {z>3.2 AND z<100}     t1z   50
124   45  {z>=4.0 AND z<100}    t1z   50
125   46  {z>=-100 AND z<=-1.0} t1z   50
126   47  {z>=-100 AND z<=0.0}  t1z  400
127   48  {z>=-100 AND z<0.0}   t1z   50
128   49  {z>=-100 AND z<=1.0}  t1z  700
129   50  {z>=-100 AND z<2.0}   t1z  700
130   51  {z>=-100 AND z<=2.0}  t1z  900
131   52  {z>=-100 AND z<3.0}   t1z  900
132
133  101  {z=-1}                t1z   50
134  102  {z=0}                 t1z  400
135  103  {z=1}                 t1z  300
136  104  {z=2}                 t1z  200
137  105  {z=3}                 t1z  100
138  106  {z=4}                 t1z   50
139  107  {z=-10.0}             t1z   50
140  108  {z=0.0}               t1z  400
141  109  {z=1.0}               t1z  300
142  110  {z=2.0}               t1z  200
143  111  {z=3.0}               t1z  100
144  112  {z=4.0}               t1z   50
145  113  {z=1.5}               t1z   50
146  114  {z=2.5}               t1z   50
147
148  201  {z IN (-1)}           t1z   50
149  202  {z IN (0)}            t1z  400
150  203  {z IN (1)}            t1z  300
151  204  {z IN (2)}            t1z  200
152  205  {z IN (3)}            t1z  100
153  206  {z IN (4)}            t1z   50
154  207  {z IN (0.5)}          t1z   50
155  208  {z IN (0,1)}          t1z  700
156  209  {z IN (0,1,2)}        t1z  900
157  210  {z IN (0,1,2,3)}      {}   100
158  211  {z IN (0,1,2,3,4,5)}  {}   100
159  212  {z IN (1,2)}          t1z  500
160  213  {z IN (2,3)}          t1z  300
161  214  {z=3 OR z=2}          t1z  300
162  215  {z IN (-1,3)}         t1z  150
163  216  {z=-1 OR z=3}         t1z  150
164
165  300  {y=0}                 {}   100
166  301  {y=1}                 t1y   50
167  302  {y=0.1}               t1y   50
168
169} {
170  # Verify that the expected index is used with the expected row count
171  do_test analyze5-1.${testid}a {
172    set x [lindex [eqp "SELECT * FROM t1 WHERE $where"] 3]
173    set idx {}
174    regexp {INDEX (t1.) } $x all idx
175    regexp {~([0-9]+) rows} $x all nrow
176    list $idx $nrow
177  } [list $index $rows]
178
179  # Verify that the same result is achieved regardless of whether or not
180  # the index is used
181  do_test analyze5-1.${testid}b {
182    set w2 [string map {y +y z +z} $where]
183    set a1 [db eval "SELECT rowid FROM t1 NOT INDEXED WHERE $w2\
184                     ORDER BY +rowid"]
185    set a2 [db eval "SELECT rowid FROM t1 WHERE $where ORDER BY +rowid"]
186    if {$a1==$a2} {
187      set res ok
188    } else {
189      set res "a1=\[$a1\] a2=\[$a2\]"
190    }
191    set res
192  } {ok}
193}
194exit
195
196# Change the table values from integer to floating point and then
197# repeat the same sequence of tests.  We should get the same results.
198#
199do_test analyze5-2.0 {
200  db eval {
201    UPDATE t1 SET z=z+0.0;
202    ANALYZE;
203    SELECT sample FROM sqlite_stat2 WHERE idx='t1z' ORDER BY sampleno;
204  }
205} {0.0 0.0 0.0 0.0 1.0 1.0 1.0 2.0 2.0 3.0}
206foreach {testid where rows} {
207  1  {z>=0 AND z<=0}     400
208  2  {z>=1 AND z<=1}     300
209  3  {z>=2 AND z<=2}     200
210  4  {z>=3 AND z<=3}     100
211  5  {z>=4 AND z<=4}      50
212  6  {z>=-1 AND z<=-1}    50
213  7  {z>1 AND z<3}       200
214  8  {z>0 AND z<100}     600
215  9  {z>=1 AND z<100}    600
216 10  {z>1 AND z<100}     300
217 11  {z>=2 AND z<100}    300
218 12  {z>2 AND z<100}     100
219 13  {z>=3 AND z<100}    100
220 14  {z>3 AND z<100}      50
221 15  {z>=4 AND z<100}     50
222 16  {z>=-100 AND z<=-1}  50
223 17  {z>=-100 AND z<=0}  400
224 18  {z>=-100 AND z<0}    50
225 19  {z>=-100 AND z<=1}  700
226 20  {z>=-100 AND z<2}   700
227 21  {z>=-100 AND z<=2}  900
228 22  {z>=-100 AND z<3}   900
229
230 31  {z>=0.0 AND z<=0.0}   400
231 32  {z>=1.0 AND z<=1.0}   300
232 33  {z>=2.0 AND z<=2.0}   200
233 34  {z>=3.0 AND z<=3.0}   100
234 35  {z>=4.0 AND z<=4.0}    50
235 36  {z>=-1.0 AND z<=-1.0}  50
236 37  {z>1.5 AND z<3.0}     200
237 38  {z>0.5 AND z<100}     600
238 39  {z>=1.0 AND z<100}    600
239 40  {z>1.5 AND z<100}     300
240 41  {z>=2.0 AND z<100}    300
241 42  {z>2.1 AND z<100}     100
242 43  {z>=3.0 AND z<100}    100
243 44  {z>3.2 AND z<100}      50
244 45  {z>=4.0 AND z<100}     50
245 46  {z>=-100 AND z<=-1.0}  50
246 47  {z>=-100 AND z<=0.0}  400
247 48  {z>=-100 AND z<0.0}    50
248 49  {z>=-100 AND z<=1.0}  700
249 50  {z>=-100 AND z<2.0}   700
250 51  {z>=-100 AND z<=2.0}  900
251 52  {z>=-100 AND z<3.0}   900
252} {
253  do_test analyze5-2.$testid {
254    eqp "SELECT * FROM t1 WHERE $where"
255  } [format {0 0 0 {SEARCH TABLE t1 USING INDEX t1z (z>? AND z<?) (~%d rows)}} \
256       $rows]
257}
258foreach {testid where rows} {
259  101  {z=-1}           50
260  102  {z=0}            400
261  103  {z=1}            300
262  104  {z=2}            200
263  105  {z=3}            100
264  106  {z=4}             50
265  107  {z=-10.0}         50
266  108  {z=0.0}          400
267  109  {z=1.0}          300
268  110  {z=2.0}          200
269  111  {z=3.0}          100
270  112  {z=4.0}           50
271  113  {z=1.5}           50
272  114  {z=2.5}           50
273} {
274  do_test analyze5-2.$testid {
275    eqp "SELECT * FROM t1 WHERE $where"
276  } [format {0 0 0 {SEARCH TABLE t1 USING INDEX t1z (z=?) (~%d rows)}} $rows]
277}
278
279
280# Repeat the same range query tests using TEXT columns.
281#
282do_test analyze5-3.0 {
283  db eval {
284    UPDATE t1 SET y=CASE z WHEN 0 THEN 'alpha' WHEN 1 THEN 'bravo'
285                           WHEN 2 THEN 'charlie' ELSE 'delta' END;
286    ANALYZE;
287    SELECT sample FROM sqlite_stat2 WHERE idx='t1y' ORDER BY sampleno;
288  }
289} {alpha alpha alpha alpha bravo bravo bravo charlie charlie delta}
290foreach {testid where rows} {
291  1  {y>='alpha' AND y<='alpha'}     400
292  2  {y>='bravo' AND y<='bravo'}     300
293  3  {y>='charlie' AND y<='charlie'} 200
294  4  {y>='delta' AND y<='delta'}     100
295  5  {y>='echo' AND y<='echo'}        50
296  6  {y>='' AND y<=''}                50
297  7  {y>'bravo' AND y<'delta'}       200
298  8  {y>'alpha' AND y<'zzz'}         600
299  9  {y>='bravo' AND y<'zzz'}        600
300 10  {y>'bravo' AND y<'zzz'}         300
301 11  {y>='charlie' AND y<'zzz'}      300
302 12  {y>'charlie' AND y<'zzz'}       100
303 13  {y>='delta' AND y<'zzz'}        100
304 14  {y>'delta' AND y<'zzz'}          50
305 15  {y>='echo' AND y<'zzz'}          50
306 16  {y>=0 AND y<=''}                 50
307 17  {y>=0 AND y<='alpha'}           400
308 18  {y>=0 AND y<'alpha'}             50
309 19  {y>=0 AND y<='bravo'}           700
310 20  {y>=0 AND y<'charlie'}          700
311 21  {y>=0 AND y<='charlie'}         900
312 22  {y>=0 AND y<'delta'}            900
313 23  {y>'alpha' AND y<x'00'}         600
314 24  {y>='bravo' AND y<x'00'}        600
315 25  {y>'bravo' AND y<x'00'}         300
316 26  {y>='charlie' AND y<x'00'}      300
317 27  {y>'charlie' AND y<x'00'}       100
318 28  {y>='delta' AND y<x'00'}        100
319 29  {y>'delta' AND y<x'00'}          50
320 30  {y>='echo' AND y<x'00'}          50
321} {
322  do_test analyze5-3.$testid {
323    eqp "SELECT * FROM t1 WHERE $where"
324  } [format {0 0 0 {SEARCH TABLE t1 USING INDEX t1y (y>? AND y<?) (~%d rows)}} \
325       $rows]
326}
327foreach {testid where rows} {
328  101  {y=0}                  50
329  102  {y='alpha'}            400
330  103  {y='bravo'}            300
331  104  {y='charlie'}          200
332  105  {y='delta'}            100
333  106  {y='echo'}             50
334  107  {y=''}                 50
335  108  {y=x'0102'}            50
336} {
337  do_test analyze5-3.$testid {
338    eqp "SELECT * FROM t1 WHERE $where"
339  } [format {0 0 0 {SEARCH TABLE t1 USING INDEX t1y (y=?) (~%d rows)}} $rows]
340}
341
342
343finish_test
344