1# 2# 2014 Jun 09 3# 4# The author disclaims copyright to this source code. In place of 5# a legal notice, here is a blessing: 6# 7# May you do good and not evil. 8# May you find forgiveness for yourself and forgive others. 9# May you share freely, never taking more than you give. 10# 11#------------------------------------------------------------------------- 12# 13# This script generates the implementations of the following C functions, 14# which are part of the porter tokenizer implementation: 15# 16# static int fts5PorterStep1B(char *aBuf, int *pnBuf); 17# static int fts5PorterStep1B2(char *aBuf, int *pnBuf); 18# static int fts5PorterStep2(char *aBuf, int *pnBuf); 19# static int fts5PorterStep3(char *aBuf, int *pnBuf); 20# static int fts5PorterStep4(char *aBuf, int *pnBuf); 21# 22 23set O(Step1B2) { 24 { at {} ate 1 } 25 { bl {} ble 1 } 26 { iz {} ize 1 } 27} 28 29set O(Step1B) { 30 { "eed" fts5Porter_MGt0 "ee" 0 } 31 { "ed" fts5Porter_Vowel "" 1 } 32 { "ing" fts5Porter_Vowel "" 1 } 33} 34 35set O(Step2) { 36 { "ational" fts5Porter_MGt0 "ate" } 37 { "tional" fts5Porter_MGt0 "tion" } 38 { "enci" fts5Porter_MGt0 "ence" } 39 { "anci" fts5Porter_MGt0 "ance" } 40 { "izer" fts5Porter_MGt0 "ize" } 41 { "logi" fts5Porter_MGt0 "log" } 42 { "bli" fts5Porter_MGt0 "ble" } 43 { "alli" fts5Porter_MGt0 "al" } 44 { "entli" fts5Porter_MGt0 "ent" } 45 { "eli" fts5Porter_MGt0 "e" } 46 { "ousli" fts5Porter_MGt0 "ous" } 47 { "ization" fts5Porter_MGt0 "ize" } 48 { "ation" fts5Porter_MGt0 "ate" } 49 { "ator" fts5Porter_MGt0 "ate" } 50 { "alism" fts5Porter_MGt0 "al" } 51 { "iveness" fts5Porter_MGt0 "ive" } 52 { "fulness" fts5Porter_MGt0 "ful" } 53 { "ousness" fts5Porter_MGt0 "ous" } 54 { "aliti" fts5Porter_MGt0 "al" } 55 { "iviti" fts5Porter_MGt0 "ive" } 56 { "biliti" fts5Porter_MGt0 "ble" } 57} 58 59set O(Step3) { 60 { "icate" fts5Porter_MGt0 "ic" } 61 { "ative" fts5Porter_MGt0 "" } 62 { "alize" fts5Porter_MGt0 "al" } 63 { "iciti" fts5Porter_MGt0 "ic" } 64 { "ical" fts5Porter_MGt0 "ic" } 65 { "ful" fts5Porter_MGt0 "" } 66 { "ness" fts5Porter_MGt0 "" } 67} 68 69set O(Step4) { 70 { "al" fts5Porter_MGt1 "" } 71 { "ance" fts5Porter_MGt1 "" } 72 { "ence" fts5Porter_MGt1 "" } 73 { "er" fts5Porter_MGt1 "" } 74 { "ic" fts5Porter_MGt1 "" } 75 { "able" fts5Porter_MGt1 "" } 76 { "ible" fts5Porter_MGt1 "" } 77 { "ant" fts5Porter_MGt1 "" } 78 { "ement" fts5Porter_MGt1 "" } 79 { "ment" fts5Porter_MGt1 "" } 80 { "ent" fts5Porter_MGt1 "" } 81 { "ion" fts5Porter_MGt1_and_S_or_T "" } 82 { "ou" fts5Porter_MGt1 "" } 83 { "ism" fts5Porter_MGt1 "" } 84 { "ate" fts5Porter_MGt1 "" } 85 { "iti" fts5Porter_MGt1 "" } 86 { "ous" fts5Porter_MGt1 "" } 87 { "ive" fts5Porter_MGt1 "" } 88 { "ize" fts5Porter_MGt1 "" } 89} 90 91proc sort_cb {lhs rhs} { 92 set L [string range [lindex $lhs 0] end-1 end-1] 93 set R [string range [lindex $rhs 0] end-1 end-1] 94 string compare $L $R 95} 96 97proc create_step_function {name data} { 98 99 set T(function) { 100static int fts5Porter${name}(char *aBuf, int *pnBuf){ 101 int ret = 0; 102 int nBuf = *pnBuf; 103 switch( aBuf[nBuf-2] ){ 104 ${switchbody} 105 } 106 return ret; 107} 108 } 109 110 set T(case) { 111 case '${k}': 112 ${ifstmts} 113 break; 114 } 115 116 set T(if_0_0_0) { 117 if( ${match} ){ 118 *pnBuf = nBuf - $n; 119 } 120 } 121 set T(if_1_0_0) { 122 if( ${match} ){ 123 if( ${cond} ){ 124 *pnBuf = nBuf - $n; 125 } 126 } 127 } 128 set T(if_0_1_0) { 129 if( ${match} ){ 130 ${memcpy} 131 *pnBuf = nBuf - $n + $nRep; 132 } 133 } 134 set T(if_1_1_0) { 135 if( ${match} ){ 136 if( ${cond} ){ 137 ${memcpy} 138 *pnBuf = nBuf - $n + $nRep; 139 } 140 } 141 } 142 set T(if_1_0_1) { 143 if( ${match} ){ 144 if( ${cond} ){ 145 *pnBuf = nBuf - $n; 146 ret = 1; 147 } 148 } 149 } 150 set T(if_0_1_1) { 151 if( ${match} ){ 152 ${memcpy} 153 *pnBuf = nBuf - $n + $nRep; 154 ret = 1; 155 } 156 } 157 set T(if_1_1_1) { 158 if( ${match} ){ 159 if( ${cond} ){ 160 ${memcpy} 161 *pnBuf = nBuf - $n + $nRep; 162 ret = 1; 163 } 164 } 165 } 166 167 set switchbody "" 168 169 foreach I $data { 170 set k [string range [lindex $I 0] end-1 end-1] 171 lappend aCase($k) $I 172 } 173 foreach k [lsort [array names aCase]] { 174 set ifstmts "" 175 foreach I $aCase($k) { 176 set zSuffix [lindex $I 0] ;# Suffix text for this rule 177 set zRep [lindex $I 2] ;# Replacement text for rule 178 set xCond [lindex $I 1] ;# Condition callback (or "") 179 180 set n [string length $zSuffix] 181 set nRep [string length $zRep] 182 183 set match "nBuf>$n && 0==memcmp(\"$zSuffix\", &aBuf\[nBuf-$n\], $n)" 184 set memcpy "memcpy(&aBuf\[nBuf-$n\], \"$zRep\", $nRep);" 185 set cond "${xCond}(aBuf, nBuf-$n)" 186 187 set bMemcpy [expr {$nRep>0}] 188 set bCond [expr {$xCond!=""}] 189 set bRet [expr {[llength $I]>3 && [lindex $I 3]}] 190 191 set t $T(if_${bCond}_${bMemcpy}_${bRet}) 192 lappend ifstmts [string trim [subst -nocommands $t]] 193 } 194 195 set ifstmts [join $ifstmts "else "] 196 197 append switchbody [subst -nocommands $T(case)] 198 } 199 200 201 puts [subst -nocommands $T(function)] 202} 203 204 205puts [string trim { 206/************************************************************************** 207*************************************************************************** 208** GENERATED CODE STARTS HERE (mkportersteps.tcl) 209*/ 210}] 211foreach step [array names O] { 212 create_step_function $step $O($step) 213} 214puts [string trim { 215/* 216** GENERATED CODE ENDS HERE (mkportersteps.tcl) 217*************************************************************************** 218**************************************************************************/ 219}] 220 221 222 223