1" Tests for regexp in utf8 encoding 2 3func s:equivalence_test() 4 let str = "AÀÁÂÃÄÅĀĂĄǍǞǠẢ BḂḆ CÇĆĈĊČ DĎĐḊḎḐ EÈÉÊËĒĔĖĘĚẺẼ FḞ GĜĞĠĢǤǦǴḠ HĤĦḢḦḨ IÌÍÎÏĨĪĬĮİǏỈ JĴ KĶǨḰḴ LĹĻĽĿŁḺ MḾṀ NÑŃŅŇṄṈ OÒÓÔÕÖØŌŎŐƠǑǪǬỎ PṔṖ Q RŔŖŘṘṞ SŚŜŞŠṠ TŢŤŦṪṮ UÙÚÛÜŨŪŬŮŰŲƯǓỦ VṼ WŴẀẂẄẆ XẊẌ YÝŶŸẎỲỶỸ ZŹŻŽƵẐẔ aàáâãäåāăąǎǟǡả bḃḇ cçćĉċč dďđḋḏḑ eèéêëēĕėęěẻẽ fḟ gĝğġģǥǧǵḡ hĥħḣḧḩẖ iìíîïĩīĭįǐỉ jĵǰ kķǩḱḵ lĺļľŀłḻ mḿṁ nñńņňʼnṅṉ oòóôõöøōŏőơǒǫǭỏ pṕṗ q rŕŗřṙṟ sśŝşšṡ tţťŧṫṯẗ uùúûüũūŭůűųưǔủ vṽ wŵẁẃẅẇẘ xẋẍ yýÿŷẏẙỳỷỹ zźżžƶẑẕ" 5 let groups = split(str) 6 for group1 in groups 7 for c in split(group1, '\zs') 8 " next statement confirms that equivalence class matches every 9 " character in group 10 call assert_match('^[[=' . c . '=]]*$', group1) 11 for group2 in groups 12 if group2 != group1 13 " next statement converts that equivalence class doesn't match 14 " character in any other group 15 call assert_equal(-1, match(group2, '[[=' . c . '=]]')) 16 endif 17 endfor 18 endfor 19 endfor 20endfunc 21 22func Test_equivalence_re1() 23 set re=1 24 call s:equivalence_test() 25 set re=0 26endfunc 27 28func Test_equivalence_re2() 29 set re=2 30 call s:equivalence_test() 31 set re=0 32endfunc 33 34func s:classes_test() 35 set isprint=@,161-255 36 call assert_equal('Motörhead', matchstr('Motörhead', '[[:print:]]\+')) 37 38 let alnumchars = '' 39 let alphachars = '' 40 let backspacechar = '' 41 let blankchars = '' 42 let cntrlchars = '' 43 let digitchars = '' 44 let escapechar = '' 45 let graphchars = '' 46 let lowerchars = '' 47 let printchars = '' 48 let punctchars = '' 49 let returnchar = '' 50 let spacechars = '' 51 let tabchar = '' 52 let upperchars = '' 53 let xdigitchars = '' 54 let identchars = '' 55 let identchars1 = '' 56 let kwordchars = '' 57 let kwordchars1 = '' 58 let fnamechars = '' 59 let fnamechars1 = '' 60 let i = 1 61 while i <= 255 62 let c = nr2char(i) 63 if c =~ '[[:alpha:]]' 64 let alphachars .= c 65 endif 66 if c =~ '[[:alnum:]]' 67 let alnumchars .= c 68 endif 69 if c =~ '[[:backspace:]]' 70 let backspacechar .= c 71 endif 72 if c =~ '[[:blank:]]' 73 let blankchars .= c 74 endif 75 if c =~ '[[:cntrl:]]' 76 let cntrlchars .= c 77 endif 78 if c =~ '[[:digit:]]' 79 let digitchars .= c 80 endif 81 if c =~ '[[:escape:]]' 82 let escapechar .= c 83 endif 84 if c =~ '[[:graph:]]' 85 let graphchars .= c 86 endif 87 if c =~ '[[:lower:]]' 88 let lowerchars .= c 89 endif 90 if c =~ '[[:print:]]' 91 let printchars .= c 92 endif 93 if c =~ '[[:punct:]]' 94 let punctchars .= c 95 endif 96 if c =~ '[[:return:]]' 97 let returnchar .= c 98 endif 99 if c =~ '[[:space:]]' 100 let spacechars .= c 101 endif 102 if c =~ '[[:tab:]]' 103 let tabchar .= c 104 endif 105 if c =~ '[[:upper:]]' 106 let upperchars .= c 107 endif 108 if c =~ '[[:xdigit:]]' 109 let xdigitchars .= c 110 endif 111 if c =~ '[[:ident:]]' 112 let identchars .= c 113 endif 114 if c =~ '\i' 115 let identchars1 .= c 116 endif 117 if c =~ '[[:keyword:]]' 118 let kwordchars .= c 119 endif 120 if c =~ '\k' 121 let kwordchars1 .= c 122 endif 123 if c =~ '[[:fname:]]' 124 let fnamechars .= c 125 endif 126 if c =~ '\f' 127 let fnamechars1 .= c 128 endif 129 let i += 1 130 endwhile 131 132 call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alphachars) 133 call assert_equal('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz', alnumchars) 134 call assert_equal("\b", backspacechar) 135 call assert_equal("\t ", blankchars) 136 call assert_equal("\x01\x02\x03\x04\x05\x06\x07\b\t\n\x0b\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\e\x1c\x1d\x1e\x1f\x7f", cntrlchars) 137 call assert_equal("0123456789", digitchars) 138 call assert_equal("\<Esc>", escapechar) 139 call assert_equal('!"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~', graphchars) 140 call assert_equal('abcdefghijklmnopqrstuvwxyzµßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ', lowerchars) 141 call assert_equal(' !"#$%&''()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ', printchars) 142 call assert_equal('!"#$%&''()*+,-./:;<=>?@[\]^_`{|}~', punctchars) 143 call assert_equal('ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞ', upperchars) 144 call assert_equal("\r", returnchar) 145 call assert_equal("\t\n\x0b\f\r ", spacechars) 146 call assert_equal("\t", tabchar) 147 call assert_equal('0123456789ABCDEFabcdef', xdigitchars) 148 149 if has('win32') 150 let identchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz ¡¢£¤¥¦§µÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ' 151 let kwordchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' 152 elseif has('ebcdic') 153 let identchars_ok = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz¬®µº¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' 154 let kwordchars_ok = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz¬®µº¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' 155 else 156 let identchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' 157 let kwordchars_ok = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyzµÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' 158 endif 159 160 if has('win32') 161 let fnamechars_ok = '!#$%+,-./0123456789:=@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]_abcdefghijklmnopqrstuvwxyz{}~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' 162 elseif has('amiga') 163 let fnamechars_ok = '$+,-./0123456789:ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' 164 elseif has('vms') 165 let fnamechars_ok = '#$%+,-./0123456789:;<>ABCDEFGHIJKLMNOPQRSTUVWXYZ[]_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' 166 elseif has('ebcdic') 167 let fnamechars_ok = '#$%+,-./=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' 168 else 169 let fnamechars_ok = '#$%+,-./0123456789=ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~ ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ' 170 endif 171 172 call assert_equal(identchars_ok, identchars) 173 call assert_equal(kwordchars_ok, kwordchars) 174 call assert_equal(fnamechars_ok, fnamechars) 175 176 call assert_equal(identchars1, identchars) 177 call assert_equal(kwordchars1, kwordchars) 178 call assert_equal(fnamechars1, fnamechars) 179endfunc 180 181func Test_classes_re1() 182 set re=1 183 call s:classes_test() 184 set re=0 185endfunc 186 187func Test_classes_re2() 188 set re=2 189 call s:classes_test() 190 set re=0 191endfunc 192 193func Test_reversed_range() 194 for re in range(0, 2) 195 exe 'set re=' . re 196 call assert_fails('call match("abc def", "[c-a]")', 'E944:') 197 endfor 198 set re=0 199endfunc 200 201func Test_large_class() 202 set re=1 203 call assert_fails('call match("abc def", "[\u3000-\u4000]")', 'E945:') 204 set re=2 205 call assert_equal(0, 'abc def' =~# '[\u3000-\u4000]') 206 call assert_equal(1, "\u3042" =~# '[\u3000-\u4000]') 207 set re=0 208endfunc 209 210func Test_optmatch_toolong() 211 set re=1 212 " Can only handle about 8000 characters. 213 let pat = '\\%[' .. repeat('x', 9000) .. ']' 214 call assert_fails('call match("abc def", "' .. pat .. '")', 'E339:') 215 set re=0 216endfunc 217 218" Test for regexp patterns with multi-byte support, using utf-8. 219func Test_multibyte_chars() 220 " tl is a List of Lists with: 221 " 2: test auto/old/new 0: test auto/old 1: test auto/new 222 " regexp pattern 223 " text to test the pattern on 224 " expected match (optional) 225 " expected submatch 1 (optional) 226 " expected submatch 2 (optional) 227 " etc. 228 " When there is no match use only the first two items. 229 let tl = [] 230 231 " Multi-byte character tests. These will fail unless vim is compiled 232 " with Multibyte (FEAT_MBYTE) or BIG/HUGE features. 233 call add(tl, [2, '[[:alpha:][=a=]]\+', '879 aiaãâaiuvna ', 'aiaãâaiuvna']) 234 call add(tl, [2, '[[=a=]]\+', 'ddaãâbcd', 'aãâ']) " equivalence classes 235 call add(tl, [2, '[^ม ]\+', 'มม oijasoifjos ifjoisj f osij j มมมมม abcd', 'oijasoifjos']) 236 call add(tl, [2, ' [^ ]\+', 'start มabcdม ', ' มabcdม']) 237 call add(tl, [2, '[ม[:alpha:][=a=]]\+', '879 aiaãมâมaiuvna ', 'aiaãมâมaiuvna']) 238 239 " this is not a normal "i" but 0xec 240 call add(tl, [2, '\p\+', 'ìa', 'ìa']) 241 call add(tl, [2, '\p*', 'aあ', 'aあ']) 242 243 " Test recognition of some character classes 244 call add(tl, [2, '\i\+', '&*¨xx ', 'xx']) 245 call add(tl, [2, '\f\+', '&*fname ', 'fname']) 246 247 " Test composing character matching 248 call add(tl, [2, '.ม', 'xม่x yมy', 'yม']) 249 call add(tl, [2, '.ม่', 'xม่x yมy', 'xม่']) 250 call add(tl, [2, "\u05b9", " x\u05b9 ", "x\u05b9"]) 251 call add(tl, [2, ".\u05b9", " x\u05b9 ", "x\u05b9"]) 252 call add(tl, [2, "\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"]) 253 call add(tl, [2, ".\u05b9\u05bb", " x\u05b9\u05bb ", "x\u05b9\u05bb"]) 254 call add(tl, [2, "\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"]) 255 call add(tl, [2, ".\u05bb\u05b9", " x\u05b9\u05bb ", "x\u05b9\u05bb"]) 256 call add(tl, [2, "\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"]) 257 call add(tl, [2, ".\u05b9", " y\u05bb x\u05b9 ", "x\u05b9"]) 258 call add(tl, [2, "\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"]) 259 call add(tl, [2, ".\u05b9", " y\u05bb\u05b9 x\u05b9 ", "y\u05bb\u05b9"]) 260 call add(tl, [1, "\u05b9\u05bb", " y\u05b9 x\u05b9\u05bb ", "x\u05b9\u05bb"]) 261 call add(tl, [2, ".\u05b9\u05bb", " y\u05bb x\u05b9\u05bb ", "x\u05b9\u05bb"]) 262 call add(tl, [2, "a", "ca\u0300t"]) 263 call add(tl, [2, "ca", "ca\u0300t"]) 264 call add(tl, [2, "a\u0300", "ca\u0300t", "a\u0300"]) 265 call add(tl, [2, 'a\%C', "ca\u0300t", "a\u0300"]) 266 call add(tl, [2, 'ca\%C', "ca\u0300t", "ca\u0300"]) 267 call add(tl, [2, 'ca\%Ct', "ca\u0300t", "ca\u0300t"]) 268 269 " Test \Z 270 call add(tl, [2, 'ú\Z', 'x']) 271 call add(tl, [2, 'יהוה\Z', 'יהוה', 'יהוה']) 272 call add(tl, [2, 'יְהוָה\Z', 'יהוה', 'יהוה']) 273 call add(tl, [2, 'יהוה\Z', 'יְהוָה', 'יְהוָה']) 274 call add(tl, [2, 'יְהוָה\Z', 'יְהוָה', 'יְהוָה']) 275 call add(tl, [2, 'יְ\Z', 'וְיַ', 'יַ']) 276 call add(tl, [2, "ק\u200d\u05b9x\\Z", "xק\u200d\u05b9xy", "ק\u200d\u05b9x"]) 277 call add(tl, [2, "ק\u200d\u05b9x\\Z", "xק\u200dxy", "ק\u200dx"]) 278 call add(tl, [2, "ק\u200dx\\Z", "xק\u200d\u05b9xy", "ק\u200d\u05b9x"]) 279 call add(tl, [2, "ק\u200dx\\Z", "xק\u200dxy", "ק\u200dx"]) 280 call add(tl, [2, "\u05b9\\Z", "xyz"]) 281 call add(tl, [2, "\\Z\u05b9", "xyz"]) 282 call add(tl, [2, "\u05b9\\Z", "xy\u05b9z", "y\u05b9"]) 283 call add(tl, [2, "\\Z\u05b9", "xy\u05b9z", "y\u05b9"]) 284 call add(tl, [1, "\u05b9\\+\\Z", "xy\u05b9z\u05b9 ", "y\u05b9z\u05b9"]) 285 call add(tl, [1, "\\Z\u05b9\\+", "xy\u05b9z\u05b9 ", "y\u05b9z\u05b9"]) 286 287 " Combining different tests and features 288 call add(tl, [2, '[^[=a=]]\+', 'ddaãâbcd', 'dd']) 289 290 " Run the tests 291 for t in tl 292 let re = t[0] 293 let pat = t[1] 294 let text = t[2] 295 let matchidx = 3 296 for engine in [0, 1, 2] 297 if engine == 2 && re == 0 || engine == 1 && re == 1 298 continue 299 endif 300 let ®expengine = engine 301 try 302 let l = matchlist(text, pat) 303 catch 304 call assert_report('Error ' . engine . ': pat: \"' . pat . 305 \ '\", text: \"' . text . 306 \ '\", caused an exception: \"' . v:exception . '\"') 307 endtry 308 " check the match itself 309 if len(l) == 0 && len(t) > matchidx 310 call assert_report('Error ' . engine . ': pat: \"' . pat . 311 \ '\", text: \"' . text . 312 \ '\", did not match, expected: \"' . t[matchidx] . '\"') 313 elseif len(l) > 0 && len(t) == matchidx 314 call assert_report('Error ' . engine . ': pat: \"' . pat . 315 \ '\", text: \"' . text . '\", match: \"' . l[0] . 316 \ '\", expected no match') 317 elseif len(t) > matchidx && l[0] != t[matchidx] 318 call assert_report('Error ' . engine . ': pat: \"' . pat . 319 \ '\", text: \"' . text . '\", match: \"' . l[0] . 320 \ '\", expected: \"' . t[matchidx] . '\"') 321 else 322 " Test passed 323 endif 324 if len(l) > 0 325 " check all the nine submatches 326 for i in range(1, 9) 327 if len(t) <= matchidx + i 328 let e = '' 329 else 330 let e = t[matchidx + i] 331 endif 332 if l[i] != e 333 call assert_report('Error ' . engine . ': pat: \"' . pat . 334 \ '\", text: \"' . text . '\", submatch ' . i . 335 \ ': \"' . l[i] . '\", expected: \"' . e . '\"') 336 endif 337 endfor 338 unlet i 339 endif 340 endfor 341 endfor 342 set regexpengine& 343endfunc 344 345" check that 'ambiwidth' does not change the meaning of \p 346func Test_ambiwidth() 347 set regexpengine=1 ambiwidth=single 348 call assert_equal(0, match("\u00EC", '\p')) 349 set regexpengine=1 ambiwidth=double 350 call assert_equal(0, match("\u00EC", '\p')) 351 set regexpengine=2 ambiwidth=single 352 call assert_equal(0, match("\u00EC", '\p')) 353 set regexpengine=2 ambiwidth=double 354 call assert_equal(0, match("\u00EC", '\p')) 355 set regexpengine& ambiwidth& 356endfunc 357 358" vim: shiftwidth=2 sts=2 expandtab 359