1# 2014 Dec 20
2#
3# The author disclaims copyright to this source code.  In place of
4# a legal notice, here is a blessing:
5#
6#    May you do good and not evil.
7#    May you find forgiveness for yourself and forgive others.
8#    May you share freely, never taking more than you give.
9#
10#***********************************************************************
11#
12# Tests focusing on the fts5 tokenizers
13#
14
15source [file join [file dirname [info script]] fts5_common.tcl]
16
17# If SQLITE_ENABLE_FTS5 is defined, omit this file.
18ifcapable !fts5 {
19  finish_test
20  return
21}
22
23proc fts3_unicode_path {file} {
24  file join .. [file dirname [info script]] .. .. fts3 unicode $file
25}
26
27source [fts3_unicode_path parseunicode.tcl]
28set testprefix fts5unicode3
29
30set CF [fts3_unicode_path CaseFolding.txt]
31set UD [fts3_unicode_path UnicodeData.txt]
32
33tl_load_casefolding_txt $CF
34foreach x [an_load_unicodedata_text $UD] {
35  set aNotAlnum($x) 1
36}
37
38foreach {y} [rd_load_unicodedata_text $UD] {
39  foreach {code ascii f} $y {}
40  if {$ascii==""} {
41    set int 0
42  } else {
43    binary scan $ascii c int
44  }
45  set aDiacritic($code,$f) $int
46  if {$f==0} { set aDiacritic($code,1) $int }
47}
48
49proc tcl_fold {i {bRemoveDiacritic 0}} {
50  global tl_lookup_table
51  global aDiacritic
52  set f [expr $bRemoveDiacritic==2]
53
54  if {[info exists tl_lookup_table($i)]} {
55    set i $tl_lookup_table($i)
56  }
57  if {$bRemoveDiacritic && [info exists aDiacritic($i,$f)]} {
58    set i $aDiacritic($i,$f)
59  }
60  expr $i
61}
62db func tcl_fold tcl_fold
63
64proc tcl_isalnum {i} {
65  global aNotAlnum
66  expr {![info exists aNotAlnum($i)]}
67}
68db func tcl_isalnum tcl_isalnum
69
70
71do_catchsql_test 1.0.1 {
72  SELECT fts5_isalnum(1, 2, 3);
73} {1 {wrong number of arguments to function fts5_isalnum}}
74do_catchsql_test 1.0.2 {
75  SELECT fts5_fold();
76} {1 {wrong number of arguments to function fts5_fold}}
77do_catchsql_test 1.0.3 {
78  SELECT fts5_fold(1,2,3);
79} {1 {wrong number of arguments to function fts5_fold}}
80
81do_execsql_test 1.1 {
82  WITH ii(i) AS (
83    SELECT -1
84    UNION ALL
85    SELECT i+1 FROM ii WHERE i<100000
86  )
87  SELECT count(*), min(i) FROM ii WHERE fts5_fold(i)!=CAST(tcl_fold(i) AS int);
88} {0 {}}
89
90do_execsql_test 1.2.1 {
91  WITH ii(i) AS (
92    SELECT -1
93    UNION ALL
94    SELECT i+1 FROM ii WHERE i<100000
95  )
96  SELECT count(*), min(i) FROM ii
97  WHERE fts5_fold(i,1)!=CAST(tcl_fold(i,1) AS int);
98} {0 {}}
99
100do_execsql_test 1.2.2 {
101  WITH ii(i) AS (
102    SELECT -1
103    UNION ALL
104    SELECT i+1 FROM ii WHERE i<100000
105  )
106  SELECT count(*), min(i) FROM ii
107  WHERE fts5_fold(i,2)!=CAST(tcl_fold(i,2) AS int);
108} {0 {}}
109
110do_execsql_test 1.3 {
111  WITH ii(i) AS (
112    SELECT -1
113    UNION ALL
114    SELECT i+1 FROM ii WHERE i<100000
115  )
116  SELECT count(*), min(i) FROM ii
117  WHERE fts5_isalnum(i)!=CAST(tcl_isalnum(i) AS int);
118} {0 {}}
119
120do_test 1.4 {
121  set str {CREATE VIRTUAL TABLE f3 USING fts5(a, tokenize=}
122  append str {"unicode61 separators '}
123  for {set i 700} {$i<900} {incr i} {
124    append str [format %c $i]
125  }
126  append str {'");}
127  execsql $str
128} {}
129do_test 1.5 {
130  set str {CREATE VIRTUAL TABLE f5 USING fts5(a, tokenize=}
131  append str {"unicode61 tokenchars '}
132  for {set i 700} {$i<900} {incr i} {
133    append str [format %c $i]
134  }
135  append str {'");}
136  execsql $str
137} {}
138
139
140finish_test
141