1########################################################################## 2# 2016 Jan 27 3# 4# The author disclaims copyright to this source code. In place of 5# a legal notice, here is a blessing: 6# 7# May you do good and not evil. 8# May you find forgiveness for yourself and forgive others. 9# May you share freely, never taking more than you give. 10# 11proc process_cmdline {} { 12 cmdline::process ::A $::argv { 13 {fts5 "use fts5 (this is the default)"} 14 {fts4 "use fts4"} 15 {trigram "Use tokenize=trigram"} 16 {colsize "10 10 10" "list of column sizes"} 17 {tblname "t1" "table name to create"} 18 {detail "full" "Fts5 detail mode to use"} 19 {repeat 1 "Load each file this many times"} 20 {prefix "" "Fts prefix= option"} 21 {trans 1 "True to use a transaction"} 22 database 23 file... 24 } { 25 This script is designed to create fts4/5 tables with more than one column. 26 The -colsize option should be set to a Tcl list of integer values, one for 27 each column in the table. Each value is the number of tokens that will be 28 inserted into the column value for each row. For example, setting the -colsize 29 option to "5 10" creates an FTS table with 2 columns, with roughly 5 and 10 30 tokens per row in each, respectively. 31 32 Each "FILE" argument should be a text file. The contents of these text files 33 is split on whitespace characters to form a list of tokens. The first N1 34 tokens are used for the first column of the first row, where N1 is the first 35 element of the -colsize list. The next N2 are used for the second column of 36 the first row, and so on. Rows are added to the table until the entire list 37 of tokens is exhausted. 38 } 39} 40 41########################################################################### 42########################################################################### 43# Command line options processor. This is generic code that can be copied 44# between scripts. 45# 46namespace eval cmdline { 47 proc cmdline_error {O E {msg ""}} { 48 if {$msg != ""} { 49 puts stderr "Error: $msg" 50 puts stderr "" 51 } 52 53 set L [list] 54 foreach o $O { 55 if {[llength $o]==1} { 56 lappend L [string toupper $o] 57 } 58 } 59 60 puts stderr "Usage: $::argv0 ?SWITCHES? $L" 61 puts stderr "" 62 puts stderr "Switches are:" 63 foreach o $O { 64 if {[llength $o]==3} { 65 foreach {a b c} $o {} 66 puts stderr [format " -%-15s %s (default \"%s\")" "$a VAL" $c $b] 67 } elseif {[llength $o]==2} { 68 foreach {a b} $o {} 69 puts stderr [format " -%-15s %s" $a $b] 70 } 71 } 72 puts stderr "" 73 puts stderr $E 74 exit -1 75 } 76 77 proc process {avar lArgs O E} { 78 upvar $avar A 79 set zTrailing "" ;# True if ... is present in $O 80 set lPosargs [list] 81 82 # Populate A() with default values. Also, for each switch in the command 83 # line spec, set an entry in the idx() array as follows: 84 # 85 # {tblname t1 "table name to use"} 86 # -> [set idx(-tblname) {tblname t1 "table name to use"} 87 # 88 # For each position parameter, append its name to $lPosargs. If the ... 89 # specifier is present, set $zTrailing to the name of the prefix. 90 # 91 foreach o $O { 92 set nm [lindex $o 0] 93 set nArg [llength $o] 94 switch -- $nArg { 95 1 { 96 if {[string range $nm end-2 end]=="..."} { 97 set zTrailing [string range $nm 0 end-3] 98 } else { 99 lappend lPosargs $nm 100 } 101 } 102 2 { 103 set A($nm) 0 104 set idx(-$nm) $o 105 } 106 3 { 107 set A($nm) [lindex $o 1] 108 set idx(-$nm) $o 109 } 110 default { 111 error "Error in command line specification" 112 } 113 } 114 } 115 116 # Set explicitly specified option values 117 # 118 set nArg [llength $lArgs] 119 for {set i 0} {$i < $nArg} {incr i} { 120 set opt [lindex $lArgs $i] 121 if {[string range $opt 0 0]!="-" || $opt=="--"} break 122 set c [array names idx "${opt}*"] 123 if {[llength $c]==0} { cmdline_error $O $E "Unrecognized option: $opt"} 124 if {[llength $c]>1} { cmdline_error $O $E "Ambiguous option: $opt"} 125 126 if {[llength $idx($c)]==3} { 127 if {$i==[llength $lArgs]-1} { 128 cmdline_error $O $E "Option requires argument: $c" 129 } 130 incr i 131 set A([lindex $idx($c) 0]) [lindex $lArgs $i] 132 } else { 133 set A([lindex $idx($c) 0]) 1 134 } 135 } 136 137 # Deal with position arguments. 138 # 139 set nPosarg [llength $lPosargs] 140 set nRem [expr $nArg - $i] 141 if {$nRem < $nPosarg || ($zTrailing=="" && $nRem > $nPosarg)} { 142 cmdline_error $O $E 143 } 144 for {set j 0} {$j < $nPosarg} {incr j} { 145 set A([lindex $lPosargs $j]) [lindex $lArgs [expr $j+$i]] 146 } 147 if {$zTrailing!=""} { 148 set A($zTrailing) [lrange $lArgs [expr $j+$i] end] 149 } 150 } 151} ;# namespace eval cmdline 152# End of command line options processor. 153########################################################################### 154########################################################################### 155 156process_cmdline 157 158# If -fts4 was specified, use fts4. Otherwise, fts5. 159if {$A(fts4)} { 160 set A(fts) fts4 161} else { 162 set A(fts) fts5 163} 164 165sqlite3 db $A(database) 166 167# Create the FTS table in the db. Return a list of the table columns. 168# 169proc create_table {} { 170 global A 171 set cols [list a b c d e f g h i j k l m n o p q r s t u v w x y z] 172 173 set nCol [llength $A(colsize)] 174 set cols [lrange $cols 0 [expr $nCol-1]] 175 176 set sql "CREATE VIRTUAL TABLE IF NOT EXISTS $A(tblname) USING $A(fts) (" 177 append sql [join $cols ,] 178 if {$A(fts)=="fts5"} { append sql ",detail=$A(detail)" } 179 if {$A(trigram)} { append sql ",tokenize=trigram" } 180 append sql ", prefix='$A(prefix)');" 181 182 db eval $sql 183 return $cols 184} 185 186# Return a list of tokens from the named file. 187# 188proc readfile {file} { 189 set fd [open $file] 190 set data [read $fd] 191 close $fd 192 split $data 193} 194 195proc repeat {L n} { 196 set res [list] 197 for {set i 0} {$i < $n} {incr i} { 198 set res [concat $res $L] 199 } 200 set res 201} 202 203 204# Load all the data into a big list of tokens. 205# 206set tokens [list] 207foreach f $A(file) { 208 set tokens [concat $tokens [repeat [readfile $f] $A(repeat)]] 209} 210 211set N [llength $tokens] 212set i 0 213set cols [create_table] 214set sql "INSERT INTO $A(tblname) VALUES(\$R([lindex $cols 0])" 215foreach c [lrange $cols 1 end] { 216 append sql ", \$R($c)" 217} 218append sql ")" 219 220if {$A(trans)} { db eval BEGIN } 221 while {$i < $N} { 222 foreach c $cols s $A(colsize) { 223 set R($c) [lrange $tokens $i [expr $i+$s-1]] 224 incr i $s 225 } 226 db eval $sql 227 } 228if {$A(trans)} { db eval COMMIT } 229 230 231 232