xref: /sqlite-3.40.0/ext/fts5/mkportersteps.tcl (revision 2656167f)
1#
2# 2014 Jun 09
3#
4# The author disclaims copyright to this source code.  In place of
5# a legal notice, here is a blessing:
6#
7#    May you do good and not evil.
8#    May you find forgiveness for yourself and forgive others.
9#    May you share freely, never taking more than you give.
10#
11#-------------------------------------------------------------------------
12#
13# This script generates the implementations of the following C functions,
14# which are part of the porter tokenizer implementation:
15#
16#   static int fts5PorterStep1B(char *aBuf, int *pnBuf);
17#   static int fts5PorterStep1B2(char *aBuf, int *pnBuf);
18#   static int fts5PorterStep2(char *aBuf, int *pnBuf);
19#   static int fts5PorterStep3(char *aBuf, int *pnBuf);
20#   static int fts5PorterStep4(char *aBuf, int *pnBuf);
21#
22
23set O(Step1B2) {
24  { at  {} ate 1 }
25  { bl  {} ble 1 }
26  { iz  {} ize 1 }
27}
28
29set O(Step1B) {
30  { "eed"  fts5Porter_MGt0  "ee" 0 }
31  { "ed"   fts5Porter_Vowel ""   1 }
32  { "ing"  fts5Porter_Vowel ""   1 }
33}
34
35set O(Step2) {
36  { "ational" fts5Porter_MGt0 "ate" }
37  { "tional"  fts5Porter_MGt0 "tion" }
38  { "enci"    fts5Porter_MGt0 "ence" }
39  { "anci"    fts5Porter_MGt0 "ance" }
40  { "izer"    fts5Porter_MGt0 "ize" }
41  { "logi"    fts5Porter_MGt0 "log" }
42  { "bli"     fts5Porter_MGt0 "ble" }
43  { "alli"    fts5Porter_MGt0 "al" }
44  { "entli"   fts5Porter_MGt0 "ent" }
45  { "eli"     fts5Porter_MGt0 "e" }
46  { "ousli"   fts5Porter_MGt0 "ous" }
47  { "ization" fts5Porter_MGt0 "ize" }
48  { "ation"   fts5Porter_MGt0 "ate" }
49  { "ator"    fts5Porter_MGt0 "ate" }
50  { "alism"   fts5Porter_MGt0 "al" }
51  { "iveness" fts5Porter_MGt0 "ive" }
52  { "fulness" fts5Porter_MGt0 "ful" }
53  { "ousness" fts5Porter_MGt0 "ous" }
54  { "aliti"   fts5Porter_MGt0 "al" }
55  { "iviti"   fts5Porter_MGt0 "ive" }
56  { "biliti"  fts5Porter_MGt0 "ble" }
57}
58
59set O(Step3) {
60  { "icate" fts5Porter_MGt0 "ic" }
61  { "ative" fts5Porter_MGt0 "" }
62  { "alize" fts5Porter_MGt0 "al" }
63  { "iciti" fts5Porter_MGt0 "ic" }
64  { "ical" fts5Porter_MGt0 "ic" }
65  { "ful" fts5Porter_MGt0 "" }
66  { "ness" fts5Porter_MGt0 "" }
67}
68
69set O(Step4) {
70  { "al" fts5Porter_MGt1 "" }
71  { "ance" fts5Porter_MGt1 "" }
72  { "ence" fts5Porter_MGt1 "" }
73  { "er" fts5Porter_MGt1 "" }
74  { "ic" fts5Porter_MGt1 "" }
75  { "able" fts5Porter_MGt1 "" }
76  { "ible" fts5Porter_MGt1 "" }
77  { "ant" fts5Porter_MGt1 "" }
78  { "ement" fts5Porter_MGt1 "" }
79  { "ment" fts5Porter_MGt1 "" }
80  { "ent" fts5Porter_MGt1 "" }
81  { "ion" fts5Porter_MGt1_and_S_or_T "" }
82  { "ou"  fts5Porter_MGt1 "" }
83  { "ism" fts5Porter_MGt1 "" }
84  { "ate" fts5Porter_MGt1 "" }
85  { "iti" fts5Porter_MGt1 "" }
86  { "ous" fts5Porter_MGt1 "" }
87  { "ive" fts5Porter_MGt1 "" }
88  { "ize" fts5Porter_MGt1 "" }
89}
90
91proc sort_cb {lhs rhs} {
92  set L [string range [lindex $lhs 0] end-1 end-1]
93  set R [string range [lindex $rhs 0] end-1 end-1]
94  string compare $L $R
95}
96
97proc create_step_function {name data} {
98
99  set T(function) {
100static int fts5Porter${name}(char *aBuf, int *pnBuf){
101  int ret = 0;
102  int nBuf = *pnBuf;
103  switch( aBuf[nBuf-2] ){
104    ${switchbody}
105  }
106  return ret;
107}
108  }
109
110  set T(case) {
111    case '${k}':
112      ${ifstmts}
113      break;
114  }
115
116  set T(if_0_0_0) {
117      if( ${match} ){
118        *pnBuf = nBuf - $n;
119      }
120  }
121  set T(if_1_0_0) {
122      if( ${match} ){
123        if( ${cond} ){
124          *pnBuf = nBuf - $n;
125        }
126      }
127  }
128  set T(if_0_1_0) {
129      if( ${match} ){
130        ${memcpy}
131        *pnBuf = nBuf - $n + $nRep;
132      }
133  }
134  set T(if_1_1_0) {
135      if( ${match} ){
136        if( ${cond} ){
137          ${memcpy}
138          *pnBuf = nBuf - $n + $nRep;
139        }
140      }
141  }
142  set T(if_1_0_1) {
143      if( ${match} ){
144        if( ${cond} ){
145          *pnBuf = nBuf - $n;
146          ret = 1;
147        }
148      }
149  }
150  set T(if_0_1_1) {
151      if( ${match} ){
152        ${memcpy}
153        *pnBuf = nBuf - $n + $nRep;
154        ret = 1;
155      }
156  }
157  set T(if_1_1_1) {
158      if( ${match} ){
159        if( ${cond} ){
160          ${memcpy}
161          *pnBuf = nBuf - $n + $nRep;
162          ret = 1;
163        }
164      }
165  }
166
167  set switchbody ""
168
169  foreach I $data {
170    set k [string range [lindex $I 0] end-1 end-1]
171    lappend aCase($k) $I
172  }
173  foreach k [lsort [array names aCase]] {
174    set ifstmts ""
175    foreach I $aCase($k) {
176      set zSuffix [lindex $I 0]         ;# Suffix text for this rule
177      set zRep [lindex $I 2]            ;# Replacement text for rule
178      set xCond [lindex $I 1]           ;# Condition callback (or "")
179
180      set n [string length $zSuffix]
181      set nRep [string length $zRep]
182
183      set match "nBuf>$n && 0==memcmp(\"$zSuffix\", &aBuf\[nBuf-$n\], $n)"
184      set memcpy "memcpy(&aBuf\[nBuf-$n\], \"$zRep\", $nRep);"
185      set cond "${xCond}(aBuf, nBuf-$n)"
186
187      set bMemcpy [expr {$nRep>0}]
188      set bCond [expr {$xCond!=""}]
189      set bRet [expr {[llength $I]>3 && [lindex $I 3]}]
190
191      set t $T(if_${bCond}_${bMemcpy}_${bRet})
192      lappend ifstmts [string trim [subst -nocommands $t]]
193    }
194
195    set ifstmts [join $ifstmts "else "]
196
197    append switchbody [subst -nocommands $T(case)]
198  }
199
200
201  puts [subst -nocommands $T(function)]
202}
203
204
205puts [string trim {
206/**************************************************************************
207***************************************************************************
208** GENERATED CODE STARTS HERE (mkportersteps.tcl)
209*/
210}]
211foreach step [array names O] {
212  create_step_function $step $O($step)
213}
214puts [string trim {
215/*
216** GENERATED CODE ENDS HERE (mkportersteps.tcl)
217***************************************************************************
218**************************************************************************/
219}]
220
221
222
223