xref: /vim-8.2.3635/src/regexp_nfa.c (revision 2bf24176)
1 /* vi:set ts=8 sts=4 sw=4:
2  *
3  * NFA regular expression implementation.
4  *
5  * This file is included in "regexp.c".
6  */
7 
8 /*
9  * Logging of NFA engine.
10  *
11  * The NFA engine can write four log files:
12  * - Error log: Contains NFA engine's fatal errors.
13  * - Dump log: Contains compiled NFA state machine's information.
14  * - Run log: Contains information of matching procedure.
15  * - Debug log: Contains detailed information of matching procedure. Can be
16  *   disabled by undefining NFA_REGEXP_DEBUG_LOG.
17  * The first one can also be used without debug mode.
18  * The last three are enabled when compiled as debug mode and individually
19  * disabled by commenting them out.
20  * The log files can get quite big!
21  * Do disable all of this when compiling Vim for debugging, undefine DEBUG in
22  * regexp.c
23  */
24 #ifdef DEBUG
25 # define NFA_REGEXP_ERROR_LOG	"nfa_regexp_error.log"
26 # define ENABLE_LOG
27 # define NFA_REGEXP_DUMP_LOG	"nfa_regexp_dump.log"
28 # define NFA_REGEXP_RUN_LOG	"nfa_regexp_run.log"
29 # define NFA_REGEXP_DEBUG_LOG	"nfa_regexp_debug.log"
30 #endif
31 
32 /* Added to NFA_ANY - NFA_NUPPER_IC to include a NL. */
33 #define NFA_ADD_NL		31
34 
35 enum
36 {
37     NFA_SPLIT = -1024,
38     NFA_MATCH,
39     NFA_EMPTY,			    /* matches 0-length */
40 
41     NFA_START_COLL,		    /* [abc] start */
42     NFA_END_COLL,		    /* [abc] end */
43     NFA_START_NEG_COLL,		    /* [^abc] start */
44     NFA_END_NEG_COLL,		    /* [^abc] end (postfix only) */
45     NFA_RANGE,			    /* range of the two previous items
46 				     * (postfix only) */
47     NFA_RANGE_MIN,		    /* low end of a range  */
48     NFA_RANGE_MAX,		    /* high end of a range  */
49 
50     NFA_CONCAT,			    /* concatenate two previous items (postfix
51 				     * only) */
52     NFA_OR,			    /* \| (postfix only) */
53     NFA_STAR,			    /* greedy * (posfix only) */
54     NFA_STAR_NONGREEDY,		    /* non-greedy * (postfix only) */
55     NFA_QUEST,			    /* greedy \? (postfix only) */
56     NFA_QUEST_NONGREEDY,	    /* non-greedy \? (postfix only) */
57 
58     NFA_BOL,			    /* ^    Begin line */
59     NFA_EOL,			    /* $    End line */
60     NFA_BOW,			    /* \<   Begin word */
61     NFA_EOW,			    /* \>   End word */
62     NFA_BOF,			    /* \%^  Begin file */
63     NFA_EOF,			    /* \%$  End file */
64     NFA_NEWL,
65     NFA_ZSTART,			    /* Used for \zs */
66     NFA_ZEND,			    /* Used for \ze */
67     NFA_NOPEN,			    /* Start of subexpression marked with \%( */
68     NFA_NCLOSE,			    /* End of subexpr. marked with \%( ... \) */
69     NFA_START_INVISIBLE,
70     NFA_START_INVISIBLE_FIRST,
71     NFA_START_INVISIBLE_NEG,
72     NFA_START_INVISIBLE_NEG_FIRST,
73     NFA_START_INVISIBLE_BEFORE,
74     NFA_START_INVISIBLE_BEFORE_FIRST,
75     NFA_START_INVISIBLE_BEFORE_NEG,
76     NFA_START_INVISIBLE_BEFORE_NEG_FIRST,
77     NFA_START_PATTERN,
78     NFA_END_INVISIBLE,
79     NFA_END_INVISIBLE_NEG,
80     NFA_END_PATTERN,
81     NFA_COMPOSING,		    /* Next nodes in NFA are part of the
82 				       composing multibyte char */
83     NFA_END_COMPOSING,		    /* End of a composing char in the NFA */
84     NFA_ANY_COMPOSING,		    /* \%C: Any composing characters. */
85     NFA_OPT_CHARS,		    /* \%[abc] */
86 
87     /* The following are used only in the postfix form, not in the NFA */
88     NFA_PREV_ATOM_NO_WIDTH,	    /* Used for \@= */
89     NFA_PREV_ATOM_NO_WIDTH_NEG,	    /* Used for \@! */
90     NFA_PREV_ATOM_JUST_BEFORE,	    /* Used for \@<= */
91     NFA_PREV_ATOM_JUST_BEFORE_NEG,  /* Used for \@<! */
92     NFA_PREV_ATOM_LIKE_PATTERN,	    /* Used for \@> */
93 
94     NFA_BACKREF1,		    /* \1 */
95     NFA_BACKREF2,		    /* \2 */
96     NFA_BACKREF3,		    /* \3 */
97     NFA_BACKREF4,		    /* \4 */
98     NFA_BACKREF5,		    /* \5 */
99     NFA_BACKREF6,		    /* \6 */
100     NFA_BACKREF7,		    /* \7 */
101     NFA_BACKREF8,		    /* \8 */
102     NFA_BACKREF9,		    /* \9 */
103 #ifdef FEAT_SYN_HL
104     NFA_ZREF1,			    /* \z1 */
105     NFA_ZREF2,			    /* \z2 */
106     NFA_ZREF3,			    /* \z3 */
107     NFA_ZREF4,			    /* \z4 */
108     NFA_ZREF5,			    /* \z5 */
109     NFA_ZREF6,			    /* \z6 */
110     NFA_ZREF7,			    /* \z7 */
111     NFA_ZREF8,			    /* \z8 */
112     NFA_ZREF9,			    /* \z9 */
113 #endif
114     NFA_SKIP,			    /* Skip characters */
115 
116     NFA_MOPEN,
117     NFA_MOPEN1,
118     NFA_MOPEN2,
119     NFA_MOPEN3,
120     NFA_MOPEN4,
121     NFA_MOPEN5,
122     NFA_MOPEN6,
123     NFA_MOPEN7,
124     NFA_MOPEN8,
125     NFA_MOPEN9,
126 
127     NFA_MCLOSE,
128     NFA_MCLOSE1,
129     NFA_MCLOSE2,
130     NFA_MCLOSE3,
131     NFA_MCLOSE4,
132     NFA_MCLOSE5,
133     NFA_MCLOSE6,
134     NFA_MCLOSE7,
135     NFA_MCLOSE8,
136     NFA_MCLOSE9,
137 
138 #ifdef FEAT_SYN_HL
139     NFA_ZOPEN,
140     NFA_ZOPEN1,
141     NFA_ZOPEN2,
142     NFA_ZOPEN3,
143     NFA_ZOPEN4,
144     NFA_ZOPEN5,
145     NFA_ZOPEN6,
146     NFA_ZOPEN7,
147     NFA_ZOPEN8,
148     NFA_ZOPEN9,
149 
150     NFA_ZCLOSE,
151     NFA_ZCLOSE1,
152     NFA_ZCLOSE2,
153     NFA_ZCLOSE3,
154     NFA_ZCLOSE4,
155     NFA_ZCLOSE5,
156     NFA_ZCLOSE6,
157     NFA_ZCLOSE7,
158     NFA_ZCLOSE8,
159     NFA_ZCLOSE9,
160 #endif
161 
162     /* NFA_FIRST_NL */
163     NFA_ANY,		/*	Match any one character. */
164     NFA_IDENT,		/*	Match identifier char */
165     NFA_SIDENT,		/*	Match identifier char but no digit */
166     NFA_KWORD,		/*	Match keyword char */
167     NFA_SKWORD,		/*	Match word char but no digit */
168     NFA_FNAME,		/*	Match file name char */
169     NFA_SFNAME,		/*	Match file name char but no digit */
170     NFA_PRINT,		/*	Match printable char */
171     NFA_SPRINT,		/*	Match printable char but no digit */
172     NFA_WHITE,		/*	Match whitespace char */
173     NFA_NWHITE,		/*	Match non-whitespace char */
174     NFA_DIGIT,		/*	Match digit char */
175     NFA_NDIGIT,		/*	Match non-digit char */
176     NFA_HEX,		/*	Match hex char */
177     NFA_NHEX,		/*	Match non-hex char */
178     NFA_OCTAL,		/*	Match octal char */
179     NFA_NOCTAL,		/*	Match non-octal char */
180     NFA_WORD,		/*	Match word char */
181     NFA_NWORD,		/*	Match non-word char */
182     NFA_HEAD,		/*	Match head char */
183     NFA_NHEAD,		/*	Match non-head char */
184     NFA_ALPHA,		/*	Match alpha char */
185     NFA_NALPHA,		/*	Match non-alpha char */
186     NFA_LOWER,		/*	Match lowercase char */
187     NFA_NLOWER,		/*	Match non-lowercase char */
188     NFA_UPPER,		/*	Match uppercase char */
189     NFA_NUPPER,		/*	Match non-uppercase char */
190     NFA_LOWER_IC,	/*	Match [a-z] */
191     NFA_NLOWER_IC,	/*	Match [^a-z] */
192     NFA_UPPER_IC,	/*	Match [A-Z] */
193     NFA_NUPPER_IC,	/*	Match [^A-Z] */
194 
195     NFA_FIRST_NL = NFA_ANY + NFA_ADD_NL,
196     NFA_LAST_NL = NFA_NUPPER_IC + NFA_ADD_NL,
197 
198     NFA_CURSOR,		/*	Match cursor pos */
199     NFA_LNUM,		/*	Match line number */
200     NFA_LNUM_GT,	/*	Match > line number */
201     NFA_LNUM_LT,	/*	Match < line number */
202     NFA_COL,		/*	Match cursor column */
203     NFA_COL_GT,		/*	Match > cursor column */
204     NFA_COL_LT,		/*	Match < cursor column */
205     NFA_VCOL,		/*	Match cursor virtual column */
206     NFA_VCOL_GT,	/*	Match > cursor virtual column */
207     NFA_VCOL_LT,	/*	Match < cursor virtual column */
208     NFA_MARK,		/*	Match mark */
209     NFA_MARK_GT,	/*	Match > mark */
210     NFA_MARK_LT,	/*	Match < mark */
211     NFA_VISUAL,		/*	Match Visual area */
212 
213     /* Character classes [:alnum:] etc */
214     NFA_CLASS_ALNUM,
215     NFA_CLASS_ALPHA,
216     NFA_CLASS_BLANK,
217     NFA_CLASS_CNTRL,
218     NFA_CLASS_DIGIT,
219     NFA_CLASS_GRAPH,
220     NFA_CLASS_LOWER,
221     NFA_CLASS_PRINT,
222     NFA_CLASS_PUNCT,
223     NFA_CLASS_SPACE,
224     NFA_CLASS_UPPER,
225     NFA_CLASS_XDIGIT,
226     NFA_CLASS_TAB,
227     NFA_CLASS_RETURN,
228     NFA_CLASS_BACKSPACE,
229     NFA_CLASS_ESCAPE
230 };
231 
232 /* Keep in sync with classchars. */
233 static int nfa_classcodes[] = {
234     NFA_ANY, NFA_IDENT, NFA_SIDENT, NFA_KWORD,NFA_SKWORD,
235     NFA_FNAME, NFA_SFNAME, NFA_PRINT, NFA_SPRINT,
236     NFA_WHITE, NFA_NWHITE, NFA_DIGIT, NFA_NDIGIT,
237     NFA_HEX, NFA_NHEX, NFA_OCTAL, NFA_NOCTAL,
238     NFA_WORD, NFA_NWORD, NFA_HEAD, NFA_NHEAD,
239     NFA_ALPHA, NFA_NALPHA, NFA_LOWER, NFA_NLOWER,
240     NFA_UPPER, NFA_NUPPER
241 };
242 
243 static char_u e_nul_found[] = N_("E865: (NFA) Regexp end encountered prematurely");
244 static char_u e_misplaced[] = N_("E866: (NFA regexp) Misplaced %c");
245 static char_u e_ill_char_class[] = N_("E877: (NFA regexp) Invalid character class: %ld");
246 
247 /* re_flags passed to nfa_regcomp() */
248 static int nfa_re_flags;
249 
250 /* NFA regexp \ze operator encountered. */
251 static int nfa_has_zend;
252 
253 /* NFA regexp \1 .. \9 encountered. */
254 static int nfa_has_backref;
255 
256 #ifdef FEAT_SYN_HL
257 /* NFA regexp has \z( ), set zsubexpr. */
258 static int nfa_has_zsubexpr;
259 #endif
260 
261 /* Number of sub expressions actually being used during execution. 1 if only
262  * the whole match (subexpr 0) is used. */
263 static int nfa_nsubexpr;
264 
265 static int *post_start;  /* holds the postfix form of r.e. */
266 static int *post_end;
267 static int *post_ptr;
268 
269 static int nstate;	/* Number of states in the NFA. Also used when
270 			 * executing. */
271 static int istate;	/* Index in the state vector, used in alloc_state() */
272 
273 /* If not NULL match must end at this position */
274 static save_se_T *nfa_endp = NULL;
275 
276 /* listid is global, so that it increases on recursive calls to
277  * nfa_regmatch(), which means we don't have to clear the lastlist field of
278  * all the states. */
279 static int nfa_listid;
280 static int nfa_alt_listid;
281 
282 /* 0 for first call to nfa_regmatch(), 1 for recursive call. */
283 static int nfa_ll_index = 0;
284 
285 static int nfa_regcomp_start __ARGS((char_u *expr, int re_flags));
286 static int nfa_get_reganch __ARGS((nfa_state_T *start, int depth));
287 static int nfa_get_regstart __ARGS((nfa_state_T *start, int depth));
288 static char_u *nfa_get_match_text __ARGS((nfa_state_T *start));
289 static int realloc_post_list __ARGS((void));
290 static int nfa_recognize_char_class __ARGS((char_u *start, char_u *end, int extra_newl));
291 static int nfa_emit_equi_class __ARGS((int c));
292 static int nfa_regatom __ARGS((void));
293 static int nfa_regpiece __ARGS((void));
294 static int nfa_regconcat __ARGS((void));
295 static int nfa_regbranch __ARGS((void));
296 static int nfa_reg __ARGS((int paren));
297 #ifdef DEBUG
298 static void nfa_set_code __ARGS((int c));
299 static void nfa_postfix_dump __ARGS((char_u *expr, int retval));
300 static void nfa_print_state __ARGS((FILE *debugf, nfa_state_T *state));
301 static void nfa_print_state2 __ARGS((FILE *debugf, nfa_state_T *state, garray_T *indent));
302 static void nfa_dump __ARGS((nfa_regprog_T *prog));
303 #endif
304 static int *re2post __ARGS((void));
305 static nfa_state_T *alloc_state __ARGS((int c, nfa_state_T *out, nfa_state_T *out1));
306 static void st_error __ARGS((int *postfix, int *end, int *p));
307 static int nfa_max_width __ARGS((nfa_state_T *startstate, int depth));
308 static nfa_state_T *post2nfa __ARGS((int *postfix, int *end, int nfa_calc_size));
309 static void nfa_postprocess __ARGS((nfa_regprog_T *prog));
310 static int check_char_class __ARGS((int class, int c));
311 static void nfa_save_listids __ARGS((nfa_regprog_T *prog, int *list));
312 static void nfa_restore_listids __ARGS((nfa_regprog_T *prog, int *list));
313 static int nfa_re_num_cmp __ARGS((long_u val, int op, long_u pos));
314 static long nfa_regtry __ARGS((nfa_regprog_T *prog, colnr_T col, proftime_T *tm));
315 static long nfa_regexec_both __ARGS((char_u *line, colnr_T col, proftime_T *tm));
316 static regprog_T *nfa_regcomp __ARGS((char_u *expr, int re_flags));
317 static void nfa_regfree __ARGS((regprog_T *prog));
318 static int  nfa_regexec_nl __ARGS((regmatch_T *rmp, char_u *line, colnr_T col, int line_lbr));
319 static long nfa_regexec_multi __ARGS((regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum, colnr_T col, proftime_T *tm));
320 static int match_follows __ARGS((nfa_state_T *startstate, int depth));
321 static int failure_chance __ARGS((nfa_state_T *state, int depth));
322 
323 /* helper functions used when doing re2post() ... regatom() parsing */
324 #define EMIT(c)	do {				\
325 		    if (post_ptr >= post_end && realloc_post_list() == FAIL) \
326 			return FAIL;		\
327 		    *post_ptr++ = c;		\
328 		} while (0)
329 
330 /*
331  * Initialize internal variables before NFA compilation.
332  * Return OK on success, FAIL otherwise.
333  */
334     static int
335 nfa_regcomp_start(expr, re_flags)
336     char_u	*expr;
337     int		re_flags;	    /* see vim_regcomp() */
338 {
339     size_t	postfix_size;
340     int		nstate_max;
341 
342     nstate = 0;
343     istate = 0;
344     /* A reasonable estimation for maximum size */
345     nstate_max = (int)(STRLEN(expr) + 1) * 25;
346 
347     /* Some items blow up in size, such as [A-z].  Add more space for that.
348      * When it is still not enough realloc_post_list() will be used. */
349     nstate_max += 1000;
350 
351     /* Size for postfix representation of expr. */
352     postfix_size = sizeof(int) * nstate_max;
353 
354     post_start = (int *)lalloc(postfix_size, TRUE);
355     if (post_start == NULL)
356 	return FAIL;
357     post_ptr = post_start;
358     post_end = post_start + nstate_max;
359     nfa_has_zend = FALSE;
360     nfa_has_backref = FALSE;
361 
362     /* shared with BT engine */
363     regcomp_start(expr, re_flags);
364 
365     return OK;
366 }
367 
368 /*
369  * Figure out if the NFA state list starts with an anchor, must match at start
370  * of the line.
371  */
372     static int
373 nfa_get_reganch(start, depth)
374     nfa_state_T *start;
375     int		depth;
376 {
377     nfa_state_T *p = start;
378 
379     if (depth > 4)
380 	return 0;
381 
382     while (p != NULL)
383     {
384 	switch (p->c)
385 	{
386 	    case NFA_BOL:
387 	    case NFA_BOF:
388 		return 1; /* yes! */
389 
390 	    case NFA_ZSTART:
391 	    case NFA_ZEND:
392 	    case NFA_CURSOR:
393 	    case NFA_VISUAL:
394 
395 	    case NFA_MOPEN:
396 	    case NFA_MOPEN1:
397 	    case NFA_MOPEN2:
398 	    case NFA_MOPEN3:
399 	    case NFA_MOPEN4:
400 	    case NFA_MOPEN5:
401 	    case NFA_MOPEN6:
402 	    case NFA_MOPEN7:
403 	    case NFA_MOPEN8:
404 	    case NFA_MOPEN9:
405 	    case NFA_NOPEN:
406 #ifdef FEAT_SYN_HL
407 	    case NFA_ZOPEN:
408 	    case NFA_ZOPEN1:
409 	    case NFA_ZOPEN2:
410 	    case NFA_ZOPEN3:
411 	    case NFA_ZOPEN4:
412 	    case NFA_ZOPEN5:
413 	    case NFA_ZOPEN6:
414 	    case NFA_ZOPEN7:
415 	    case NFA_ZOPEN8:
416 	    case NFA_ZOPEN9:
417 #endif
418 		p = p->out;
419 		break;
420 
421 	    case NFA_SPLIT:
422 		return nfa_get_reganch(p->out, depth + 1)
423 				       && nfa_get_reganch(p->out1, depth + 1);
424 
425 	    default:
426 		return 0; /* noooo */
427 	}
428     }
429     return 0;
430 }
431 
432 /*
433  * Figure out if the NFA state list starts with a character which must match
434  * at start of the match.
435  */
436     static int
437 nfa_get_regstart(start, depth)
438     nfa_state_T *start;
439     int		depth;
440 {
441     nfa_state_T *p = start;
442 
443     if (depth > 4)
444 	return 0;
445 
446     while (p != NULL)
447     {
448 	switch (p->c)
449 	{
450 	    /* all kinds of zero-width matches */
451 	    case NFA_BOL:
452 	    case NFA_BOF:
453 	    case NFA_BOW:
454 	    case NFA_EOW:
455 	    case NFA_ZSTART:
456 	    case NFA_ZEND:
457 	    case NFA_CURSOR:
458 	    case NFA_VISUAL:
459 	    case NFA_LNUM:
460 	    case NFA_LNUM_GT:
461 	    case NFA_LNUM_LT:
462 	    case NFA_COL:
463 	    case NFA_COL_GT:
464 	    case NFA_COL_LT:
465 	    case NFA_VCOL:
466 	    case NFA_VCOL_GT:
467 	    case NFA_VCOL_LT:
468 	    case NFA_MARK:
469 	    case NFA_MARK_GT:
470 	    case NFA_MARK_LT:
471 
472 	    case NFA_MOPEN:
473 	    case NFA_MOPEN1:
474 	    case NFA_MOPEN2:
475 	    case NFA_MOPEN3:
476 	    case NFA_MOPEN4:
477 	    case NFA_MOPEN5:
478 	    case NFA_MOPEN6:
479 	    case NFA_MOPEN7:
480 	    case NFA_MOPEN8:
481 	    case NFA_MOPEN9:
482 	    case NFA_NOPEN:
483 #ifdef FEAT_SYN_HL
484 	    case NFA_ZOPEN:
485 	    case NFA_ZOPEN1:
486 	    case NFA_ZOPEN2:
487 	    case NFA_ZOPEN3:
488 	    case NFA_ZOPEN4:
489 	    case NFA_ZOPEN5:
490 	    case NFA_ZOPEN6:
491 	    case NFA_ZOPEN7:
492 	    case NFA_ZOPEN8:
493 	    case NFA_ZOPEN9:
494 #endif
495 		p = p->out;
496 		break;
497 
498 	    case NFA_SPLIT:
499 	    {
500 		int c1 = nfa_get_regstart(p->out, depth + 1);
501 		int c2 = nfa_get_regstart(p->out1, depth + 1);
502 
503 		if (c1 == c2)
504 		    return c1; /* yes! */
505 		return 0;
506 	    }
507 
508 	    default:
509 		if (p->c > 0)
510 		    return p->c; /* yes! */
511 		return 0;
512 	}
513     }
514     return 0;
515 }
516 
517 /*
518  * Figure out if the NFA state list contains just literal text and nothing
519  * else.  If so return a string in allocated memory with what must match after
520  * regstart.  Otherwise return NULL.
521  */
522     static char_u *
523 nfa_get_match_text(start)
524     nfa_state_T *start;
525 {
526     nfa_state_T *p = start;
527     int		len = 0;
528     char_u	*ret;
529     char_u	*s;
530 
531     if (p->c != NFA_MOPEN)
532 	return NULL; /* just in case */
533     p = p->out;
534     while (p->c > 0)
535     {
536 	len += MB_CHAR2LEN(p->c);
537 	p = p->out;
538     }
539     if (p->c != NFA_MCLOSE || p->out->c != NFA_MATCH)
540 	return NULL;
541 
542     ret = alloc(len);
543     if (ret != NULL)
544     {
545 	p = start->out->out; /* skip first char, it goes into regstart */
546 	s = ret;
547 	while (p->c > 0)
548 	{
549 #ifdef FEAT_MBYTE
550 	    if (has_mbyte)
551 		s += (*mb_char2bytes)(p->c, s);
552 	    else
553 #endif
554 		*s++ = p->c;
555 	    p = p->out;
556 	}
557 	*s = NUL;
558     }
559     return ret;
560 }
561 
562 /*
563  * Allocate more space for post_start.  Called when
564  * running above the estimated number of states.
565  */
566     static int
567 realloc_post_list()
568 {
569     int   nstate_max = (int)(post_end - post_start);
570     int   new_max = nstate_max + 1000;
571     int   *new_start;
572     int	  *old_start;
573 
574     new_start = (int *)lalloc(new_max * sizeof(int), TRUE);
575     if (new_start == NULL)
576 	return FAIL;
577     mch_memmove(new_start, post_start, nstate_max * sizeof(int));
578     old_start = post_start;
579     post_start = new_start;
580     post_ptr = new_start + (post_ptr - old_start);
581     post_end = post_start + new_max;
582     vim_free(old_start);
583     return OK;
584 }
585 
586 /*
587  * Search between "start" and "end" and try to recognize a
588  * character class in expanded form. For example [0-9].
589  * On success, return the id the character class to be emitted.
590  * On failure, return 0 (=FAIL)
591  * Start points to the first char of the range, while end should point
592  * to the closing brace.
593  * Keep in mind that 'ignorecase' applies at execution time, thus [a-z] may
594  * need to be interpreted as [a-zA-Z].
595  */
596     static int
597 nfa_recognize_char_class(start, end, extra_newl)
598     char_u  *start;
599     char_u  *end;
600     int	    extra_newl;
601 {
602 #   define CLASS_not		0x80
603 #   define CLASS_af		0x40
604 #   define CLASS_AF		0x20
605 #   define CLASS_az		0x10
606 #   define CLASS_AZ		0x08
607 #   define CLASS_o7		0x04
608 #   define CLASS_o9		0x02
609 #   define CLASS_underscore	0x01
610 
611     int		newl = FALSE;
612     char_u	*p;
613     int		config = 0;
614 
615     if (extra_newl == TRUE)
616 	newl = TRUE;
617 
618     if (*end != ']')
619 	return FAIL;
620     p = start;
621     if (*p == '^')
622     {
623 	config |= CLASS_not;
624 	p++;
625     }
626 
627     while (p < end)
628     {
629 	if (p + 2 < end && *(p + 1) == '-')
630 	{
631 	    switch (*p)
632 	    {
633 		case '0':
634 		    if (*(p + 2) == '9')
635 		    {
636 			config |= CLASS_o9;
637 			break;
638 		    }
639 		    else
640 		    if (*(p + 2) == '7')
641 		    {
642 			config |= CLASS_o7;
643 			break;
644 		    }
645 		case 'a':
646 		    if (*(p + 2) == 'z')
647 		    {
648 			config |= CLASS_az;
649 			break;
650 		    }
651 		    else
652 		    if (*(p + 2) == 'f')
653 		    {
654 			config |= CLASS_af;
655 			break;
656 		    }
657 		case 'A':
658 		    if (*(p + 2) == 'Z')
659 		    {
660 			config |= CLASS_AZ;
661 			break;
662 		    }
663 		    else
664 		    if (*(p + 2) == 'F')
665 		    {
666 			config |= CLASS_AF;
667 			break;
668 		    }
669 		/* FALLTHROUGH */
670 		default:
671 		    return FAIL;
672 	    }
673 	    p += 3;
674 	}
675 	else if (p + 1 < end && *p == '\\' && *(p + 1) == 'n')
676 	{
677 	    newl = TRUE;
678 	    p += 2;
679 	}
680 	else if (*p == '_')
681 	{
682 	    config |= CLASS_underscore;
683 	    p ++;
684 	}
685 	else if (*p == '\n')
686 	{
687 	    newl = TRUE;
688 	    p ++;
689 	}
690 	else
691 	    return FAIL;
692     } /* while (p < end) */
693 
694     if (p != end)
695 	return FAIL;
696 
697     if (newl == TRUE)
698 	extra_newl = NFA_ADD_NL;
699 
700     switch (config)
701     {
702 	case CLASS_o9:
703 	    return extra_newl + NFA_DIGIT;
704 	case CLASS_not |  CLASS_o9:
705 	    return extra_newl + NFA_NDIGIT;
706 	case CLASS_af | CLASS_AF | CLASS_o9:
707 	    return extra_newl + NFA_HEX;
708 	case CLASS_not | CLASS_af | CLASS_AF | CLASS_o9:
709 	    return extra_newl + NFA_NHEX;
710 	case CLASS_o7:
711 	    return extra_newl + NFA_OCTAL;
712 	case CLASS_not | CLASS_o7:
713 	    return extra_newl + NFA_NOCTAL;
714 	case CLASS_az | CLASS_AZ | CLASS_o9 | CLASS_underscore:
715 	    return extra_newl + NFA_WORD;
716 	case CLASS_not | CLASS_az | CLASS_AZ | CLASS_o9 | CLASS_underscore:
717 	    return extra_newl + NFA_NWORD;
718 	case CLASS_az | CLASS_AZ | CLASS_underscore:
719 	    return extra_newl + NFA_HEAD;
720 	case CLASS_not | CLASS_az | CLASS_AZ | CLASS_underscore:
721 	    return extra_newl + NFA_NHEAD;
722 	case CLASS_az | CLASS_AZ:
723 	    return extra_newl + NFA_ALPHA;
724 	case CLASS_not | CLASS_az | CLASS_AZ:
725 	    return extra_newl + NFA_NALPHA;
726 	case CLASS_az:
727 	   return extra_newl + NFA_LOWER_IC;
728 	case CLASS_not | CLASS_az:
729 	    return extra_newl + NFA_NLOWER_IC;
730 	case CLASS_AZ:
731 	    return extra_newl + NFA_UPPER_IC;
732 	case CLASS_not | CLASS_AZ:
733 	    return extra_newl + NFA_NUPPER_IC;
734     }
735     return FAIL;
736 }
737 
738 /*
739  * Produce the bytes for equivalence class "c".
740  * Currently only handles latin1, latin9 and utf-8.
741  * Emits bytes in postfix notation: 'a,b,NFA_OR,c,NFA_OR' is
742  * equivalent to 'a OR b OR c'
743  *
744  * NOTE! When changing this function, also update reg_equi_class()
745  */
746     static int
747 nfa_emit_equi_class(c)
748     int	    c;
749 {
750 #define EMIT2(c)    EMIT(c); EMIT(NFA_CONCAT);
751 #ifdef FEAT_MBYTE
752 # define EMITMBC(c) EMIT(c); EMIT(NFA_CONCAT);
753 #else
754 # define EMITMBC(c)
755 #endif
756 
757 #ifdef FEAT_MBYTE
758     if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
759 					 || STRCMP(p_enc, "iso-8859-15") == 0)
760 #endif
761     {
762 	switch (c)
763 	{
764 	    case 'A': case 0300: case 0301: case 0302:
765 	    case 0303: case 0304: case 0305:
766 	    CASEMBC(0x100) CASEMBC(0x102) CASEMBC(0x104) CASEMBC(0x1cd)
767 	    CASEMBC(0x1de) CASEMBC(0x1e0) CASEMBC(0x1ea2)
768 		    EMIT2('A');	EMIT2(0300); EMIT2(0301); EMIT2(0302);
769 		    EMIT2(0303); EMIT2(0304); EMIT2(0305);
770 		    EMITMBC(0x100) EMITMBC(0x102) EMITMBC(0x104)
771 		    EMITMBC(0x1cd) EMITMBC(0x1de) EMITMBC(0x1e0)
772 		    EMITMBC(0x1ea2)
773 		    return OK;
774 
775 	    case 'B': CASEMBC(0x1e02) CASEMBC(0x1e06)
776 		    EMIT2('B'); EMITMBC(0x1e02) EMITMBC(0x1e06)
777 		    return OK;
778 
779 	    case 'C': case 0307:
780 	    CASEMBC(0x106) CASEMBC(0x108) CASEMBC(0x10a) CASEMBC(0x10c)
781 		    EMIT2('C'); EMIT2(0307); EMITMBC(0x106) EMITMBC(0x108)
782 		    EMITMBC(0x10a) EMITMBC(0x10c)
783 		    return OK;
784 
785 	    case 'D': CASEMBC(0x10e) CASEMBC(0x110) CASEMBC(0x1e0a)
786 	    CASEMBC(0x1e0e) CASEMBC(0x1e10)
787 		    EMIT2('D'); EMITMBC(0x10e) EMITMBC(0x110) EMITMBC(0x1e0a)
788 		    EMITMBC(0x1e0e) EMITMBC(0x1e10)
789 		    return OK;
790 
791 	    case 'E': case 0310: case 0311: case 0312: case 0313:
792 	    CASEMBC(0x112) CASEMBC(0x114) CASEMBC(0x116) CASEMBC(0x118)
793 	    CASEMBC(0x11a) CASEMBC(0x1eba) CASEMBC(0x1ebc)
794 		    EMIT2('E'); EMIT2(0310); EMIT2(0311); EMIT2(0312);
795 		    EMIT2(0313);
796 		    EMITMBC(0x112) EMITMBC(0x114) EMITMBC(0x116)
797 		    EMITMBC(0x118) EMITMBC(0x11a) EMITMBC(0x1eba)
798 		    EMITMBC(0x1ebc)
799 		    return OK;
800 
801 	    case 'F': CASEMBC(0x1e1e)
802 		    EMIT2('F'); EMITMBC(0x1e1e)
803 		    return OK;
804 
805 	    case 'G': CASEMBC(0x11c) CASEMBC(0x11e) CASEMBC(0x120)
806 	    CASEMBC(0x122) CASEMBC(0x1e4) CASEMBC(0x1e6) CASEMBC(0x1f4)
807 	    CASEMBC(0x1e20)
808 		    EMIT2('G'); EMITMBC(0x11c) EMITMBC(0x11e) EMITMBC(0x120)
809 		    EMITMBC(0x122) EMITMBC(0x1e4) EMITMBC(0x1e6)
810 		    EMITMBC(0x1f4) EMITMBC(0x1e20)
811 		    return OK;
812 
813 	    case 'H': CASEMBC(0x124) CASEMBC(0x126) CASEMBC(0x1e22)
814 	    CASEMBC(0x1e26) CASEMBC(0x1e28)
815 		    EMIT2('H'); EMITMBC(0x124) EMITMBC(0x126) EMITMBC(0x1e22)
816 		    EMITMBC(0x1e26) EMITMBC(0x1e28)
817 		    return OK;
818 
819 	    case 'I': case 0314: case 0315: case 0316: case 0317:
820 	    CASEMBC(0x128) CASEMBC(0x12a) CASEMBC(0x12c) CASEMBC(0x12e)
821 	    CASEMBC(0x130) CASEMBC(0x1cf) CASEMBC(0x1ec8)
822 		    EMIT2('I'); EMIT2(0314); EMIT2(0315); EMIT2(0316);
823 		    EMIT2(0317); EMITMBC(0x128) EMITMBC(0x12a)
824 		    EMITMBC(0x12c) EMITMBC(0x12e) EMITMBC(0x130)
825 		    EMITMBC(0x1cf) EMITMBC(0x1ec8)
826 		    return OK;
827 
828 	    case 'J': CASEMBC(0x134)
829 		    EMIT2('J'); EMITMBC(0x134)
830 		    return OK;
831 
832 	    case 'K': CASEMBC(0x136) CASEMBC(0x1e8) CASEMBC(0x1e30)
833 	    CASEMBC(0x1e34)
834 		    EMIT2('K'); EMITMBC(0x136) EMITMBC(0x1e8) EMITMBC(0x1e30)
835 		    EMITMBC(0x1e34)
836 		    return OK;
837 
838 	    case 'L': CASEMBC(0x139) CASEMBC(0x13b) CASEMBC(0x13d)
839 	    CASEMBC(0x13f) CASEMBC(0x141) CASEMBC(0x1e3a)
840 		    EMIT2('L'); EMITMBC(0x139) EMITMBC(0x13b) EMITMBC(0x13d)
841 		    EMITMBC(0x13f) EMITMBC(0x141) EMITMBC(0x1e3a)
842 		    return OK;
843 
844 	    case 'M': CASEMBC(0x1e3e) CASEMBC(0x1e40)
845 		    EMIT2('M'); EMITMBC(0x1e3e) EMITMBC(0x1e40)
846 		    return OK;
847 
848 	    case 'N': case 0321:
849 	    CASEMBC(0x143) CASEMBC(0x145) CASEMBC(0x147) CASEMBC(0x1e44)
850 	    CASEMBC(0x1e48)
851 		    EMIT2('N'); EMIT2(0321); EMITMBC(0x143) EMITMBC(0x145)
852 		    EMITMBC(0x147) EMITMBC(0x1e44) EMITMBC(0x1e48)
853 		    return OK;
854 
855 	    case 'O': case 0322: case 0323: case 0324: case 0325:
856 	    case 0326: case 0330:
857 	    CASEMBC(0x14c) CASEMBC(0x14e) CASEMBC(0x150) CASEMBC(0x1a0)
858 	    CASEMBC(0x1d1) CASEMBC(0x1ea) CASEMBC(0x1ec) CASEMBC(0x1ece)
859 		    EMIT2('O'); EMIT2(0322); EMIT2(0323); EMIT2(0324);
860 		    EMIT2(0325); EMIT2(0326); EMIT2(0330);
861 		    EMITMBC(0x14c) EMITMBC(0x14e) EMITMBC(0x150)
862 		    EMITMBC(0x1a0) EMITMBC(0x1d1) EMITMBC(0x1ea)
863 		    EMITMBC(0x1ec) EMITMBC(0x1ece)
864 		    return OK;
865 
866 	    case 'P': case 0x1e54: case 0x1e56:
867 		    EMIT2('P'); EMITMBC(0x1e54) EMITMBC(0x1e56)
868 		    return OK;
869 
870 	    case 'R': CASEMBC(0x154) CASEMBC(0x156) CASEMBC(0x158)
871 	    CASEMBC(0x1e58) CASEMBC(0x1e5e)
872 		    EMIT2('R'); EMITMBC(0x154) EMITMBC(0x156) EMITMBC(0x158)
873 		    EMITMBC(0x1e58) EMITMBC(0x1e5e)
874 		    return OK;
875 
876 	    case 'S': CASEMBC(0x15a) CASEMBC(0x15c) CASEMBC(0x15e)
877 	    CASEMBC(0x160) CASEMBC(0x1e60)
878 		    EMIT2('S'); EMITMBC(0x15a) EMITMBC(0x15c) EMITMBC(0x15e)
879 		    EMITMBC(0x160) EMITMBC(0x1e60)
880 		    return OK;
881 
882 	    case 'T': CASEMBC(0x162) CASEMBC(0x164) CASEMBC(0x166)
883 	    CASEMBC(0x1e6a) CASEMBC(0x1e6e)
884 		    EMIT2('T'); EMITMBC(0x162) EMITMBC(0x164) EMITMBC(0x166)
885 		    EMITMBC(0x1e6a) EMITMBC(0x1e6e)
886 		    return OK;
887 
888 	    case 'U': case 0331: case 0332: case 0333: case 0334:
889 	    CASEMBC(0x168) CASEMBC(0x16a) CASEMBC(0x16c) CASEMBC(0x16e)
890 	    CASEMBC(0x170) CASEMBC(0x172) CASEMBC(0x1af) CASEMBC(0x1d3)
891 	    CASEMBC(0x1ee6)
892 		    EMIT2('U'); EMIT2(0331); EMIT2(0332); EMIT2(0333);
893 		    EMIT2(0334); EMITMBC(0x168) EMITMBC(0x16a)
894 		    EMITMBC(0x16c) EMITMBC(0x16e) EMITMBC(0x170)
895 		    EMITMBC(0x172) EMITMBC(0x1af) EMITMBC(0x1d3)
896 		    EMITMBC(0x1ee6)
897 		    return OK;
898 
899 	    case 'V': CASEMBC(0x1e7c)
900 		    EMIT2('V'); EMITMBC(0x1e7c)
901 		    return OK;
902 
903 	    case 'W': CASEMBC(0x174) CASEMBC(0x1e80) CASEMBC(0x1e82)
904 	    CASEMBC(0x1e84) CASEMBC(0x1e86)
905 		    EMIT2('W'); EMITMBC(0x174) EMITMBC(0x1e80) EMITMBC(0x1e82)
906 		    EMITMBC(0x1e84) EMITMBC(0x1e86)
907 		    return OK;
908 
909 	    case 'X': CASEMBC(0x1e8a) CASEMBC(0x1e8c)
910 		    EMIT2('X'); EMITMBC(0x1e8a) EMITMBC(0x1e8c)
911 		    return OK;
912 
913 	    case 'Y': case 0335:
914 	    CASEMBC(0x176) CASEMBC(0x178) CASEMBC(0x1e8e) CASEMBC(0x1ef2)
915 	    CASEMBC(0x1ef6) CASEMBC(0x1ef8)
916 		    EMIT2('Y'); EMIT2(0335); EMITMBC(0x176) EMITMBC(0x178)
917 		    EMITMBC(0x1e8e) EMITMBC(0x1ef2) EMITMBC(0x1ef6)
918 		    EMITMBC(0x1ef8)
919 		    return OK;
920 
921 	    case 'Z': CASEMBC(0x179) CASEMBC(0x17b) CASEMBC(0x17d)
922 	    CASEMBC(0x1b5) CASEMBC(0x1e90) CASEMBC(0x1e94)
923 		    EMIT2('Z'); EMITMBC(0x179) EMITMBC(0x17b) EMITMBC(0x17d)
924 		    EMITMBC(0x1b5) EMITMBC(0x1e90) EMITMBC(0x1e94)
925 		    return OK;
926 
927 	    case 'a': case 0340: case 0341: case 0342:
928 	    case 0343: case 0344: case 0345:
929 	    CASEMBC(0x101) CASEMBC(0x103) CASEMBC(0x105) CASEMBC(0x1ce)
930 	    CASEMBC(0x1df) CASEMBC(0x1e1) CASEMBC(0x1ea3)
931 		    EMIT2('a'); EMIT2(0340); EMIT2(0341); EMIT2(0342);
932 		    EMIT2(0343); EMIT2(0344); EMIT2(0345);
933 		    EMITMBC(0x101) EMITMBC(0x103) EMITMBC(0x105)
934 		    EMITMBC(0x1ce) EMITMBC(0x1df) EMITMBC(0x1e1)
935 		    EMITMBC(0x1ea3)
936 		    return OK;
937 
938 	    case 'b': CASEMBC(0x1e03) CASEMBC(0x1e07)
939 		    EMIT2('b'); EMITMBC(0x1e03) EMITMBC(0x1e07)
940 		    return OK;
941 
942 	    case 'c': case 0347:
943 	    CASEMBC(0x107) CASEMBC(0x109) CASEMBC(0x10b) CASEMBC(0x10d)
944 		    EMIT2('c'); EMIT2(0347); EMITMBC(0x107) EMITMBC(0x109)
945 		    EMITMBC(0x10b) EMITMBC(0x10d)
946 		    return OK;
947 
948 	    case 'd': CASEMBC(0x10f) CASEMBC(0x111) CASEMBC(0x1e0b)
949 	    CASEMBC(0x1e0f) CASEMBC(0x1e11)
950 		    EMIT2('d'); EMITMBC(0x10f) EMITMBC(0x111)
951 		    EMITMBC(0x1e0b) EMITMBC(0x1e0f) EMITMBC(0x1e11)
952 		    return OK;
953 
954 	    case 'e': case 0350: case 0351: case 0352: case 0353:
955 	    CASEMBC(0x113) CASEMBC(0x115) CASEMBC(0x117) CASEMBC(0x119)
956 	    CASEMBC(0x11b) CASEMBC(0x1ebb) CASEMBC(0x1ebd)
957 		    EMIT2('e'); EMIT2(0350); EMIT2(0351); EMIT2(0352);
958 		    EMIT2(0353); EMITMBC(0x113) EMITMBC(0x115)
959 		    EMITMBC(0x117) EMITMBC(0x119) EMITMBC(0x11b)
960 		    EMITMBC(0x1ebb) EMITMBC(0x1ebd)
961 		    return OK;
962 
963 	    case 'f': CASEMBC(0x1e1f)
964 		    EMIT2('f'); EMITMBC(0x1e1f)
965 		    return OK;
966 
967 	    case 'g': CASEMBC(0x11d) CASEMBC(0x11f) CASEMBC(0x121)
968 	    CASEMBC(0x123) CASEMBC(0x1e5) CASEMBC(0x1e7) CASEMBC(0x1f5)
969 	    CASEMBC(0x1e21)
970 		    EMIT2('g'); EMITMBC(0x11d) EMITMBC(0x11f) EMITMBC(0x121)
971 		    EMITMBC(0x123) EMITMBC(0x1e5) EMITMBC(0x1e7)
972 		    EMITMBC(0x1f5) EMITMBC(0x1e21)
973 		    return OK;
974 
975 	    case 'h': CASEMBC(0x125) CASEMBC(0x127) CASEMBC(0x1e23)
976 	    CASEMBC(0x1e27) CASEMBC(0x1e29) CASEMBC(0x1e96)
977 		    EMIT2('h'); EMITMBC(0x125) EMITMBC(0x127) EMITMBC(0x1e23)
978 		    EMITMBC(0x1e27) EMITMBC(0x1e29) EMITMBC(0x1e96)
979 		    return OK;
980 
981 	    case 'i': case 0354: case 0355: case 0356: case 0357:
982 	    CASEMBC(0x129) CASEMBC(0x12b) CASEMBC(0x12d) CASEMBC(0x12f)
983 	    CASEMBC(0x1d0) CASEMBC(0x1ec9)
984 		    EMIT2('i'); EMIT2(0354); EMIT2(0355); EMIT2(0356);
985 		    EMIT2(0357); EMITMBC(0x129) EMITMBC(0x12b)
986 		    EMITMBC(0x12d) EMITMBC(0x12f) EMITMBC(0x1d0)
987 		    EMITMBC(0x1ec9)
988 		    return OK;
989 
990 	    case 'j': CASEMBC(0x135) CASEMBC(0x1f0)
991 		    EMIT2('j'); EMITMBC(0x135) EMITMBC(0x1f0)
992 		    return OK;
993 
994 	    case 'k': CASEMBC(0x137) CASEMBC(0x1e9) CASEMBC(0x1e31)
995 	    CASEMBC(0x1e35)
996 		    EMIT2('k'); EMITMBC(0x137) EMITMBC(0x1e9) EMITMBC(0x1e31)
997 		    EMITMBC(0x1e35)
998 		    return OK;
999 
1000 	    case 'l': CASEMBC(0x13a) CASEMBC(0x13c) CASEMBC(0x13e)
1001 	    CASEMBC(0x140) CASEMBC(0x142) CASEMBC(0x1e3b)
1002 		    EMIT2('l'); EMITMBC(0x13a) EMITMBC(0x13c) EMITMBC(0x13e)
1003 		    EMITMBC(0x140) EMITMBC(0x142) EMITMBC(0x1e3b)
1004 		    return OK;
1005 
1006 	    case 'm': CASEMBC(0x1e3f) CASEMBC(0x1e41)
1007 		    EMIT2('m'); EMITMBC(0x1e3f) EMITMBC(0x1e41)
1008 		    return OK;
1009 
1010 	    case 'n': case 0361:
1011 	    CASEMBC(0x144) CASEMBC(0x146) CASEMBC(0x148) CASEMBC(0x149)
1012 	    CASEMBC(0x1e45) CASEMBC(0x1e49)
1013 		    EMIT2('n'); EMIT2(0361); EMITMBC(0x144) EMITMBC(0x146)
1014 		    EMITMBC(0x148) EMITMBC(0x149) EMITMBC(0x1e45)
1015 		    EMITMBC(0x1e49)
1016 		    return OK;
1017 
1018 	    case 'o': case 0362: case 0363: case 0364: case 0365:
1019 	    case 0366: case 0370:
1020 	    CASEMBC(0x14d) CASEMBC(0x14f) CASEMBC(0x151) CASEMBC(0x1a1)
1021 	    CASEMBC(0x1d2) CASEMBC(0x1eb) CASEMBC(0x1ed) CASEMBC(0x1ecf)
1022 		    EMIT2('o'); EMIT2(0362); EMIT2(0363); EMIT2(0364);
1023 		    EMIT2(0365); EMIT2(0366); EMIT2(0370);
1024 		    EMITMBC(0x14d) EMITMBC(0x14f) EMITMBC(0x151)
1025 		    EMITMBC(0x1a1) EMITMBC(0x1d2) EMITMBC(0x1eb)
1026 		    EMITMBC(0x1ed) EMITMBC(0x1ecf)
1027 		    return OK;
1028 
1029 	    case 'p': CASEMBC(0x1e55) CASEMBC(0x1e57)
1030 		    EMIT2('p'); EMITMBC(0x1e55) EMITMBC(0x1e57)
1031 		    return OK;
1032 
1033 	    case 'r': CASEMBC(0x155) CASEMBC(0x157) CASEMBC(0x159)
1034 	    CASEMBC(0x1e59) CASEMBC(0x1e5f)
1035 		    EMIT2('r'); EMITMBC(0x155) EMITMBC(0x157) EMITMBC(0x159)
1036 		    EMITMBC(0x1e59) EMITMBC(0x1e5f)
1037 		    return OK;
1038 
1039 	    case 's': CASEMBC(0x15b) CASEMBC(0x15d) CASEMBC(0x15f)
1040 	    CASEMBC(0x161) CASEMBC(0x1e61)
1041 		    EMIT2('s'); EMITMBC(0x15b) EMITMBC(0x15d) EMITMBC(0x15f)
1042 		    EMITMBC(0x161) EMITMBC(0x1e61)
1043 		    return OK;
1044 
1045 	    case 't': CASEMBC(0x163) CASEMBC(0x165) CASEMBC(0x167)
1046 	    CASEMBC(0x1e6b) CASEMBC(0x1e6f) CASEMBC(0x1e97)
1047 		    EMIT2('t'); EMITMBC(0x163) EMITMBC(0x165) EMITMBC(0x167)
1048 		    EMITMBC(0x1e6b) EMITMBC(0x1e6f) EMITMBC(0x1e97)
1049 		    return OK;
1050 
1051 	    case 'u': case 0371: case 0372: case 0373: case 0374:
1052 	    CASEMBC(0x169) CASEMBC(0x16b) CASEMBC(0x16d) CASEMBC(0x16f)
1053 	    CASEMBC(0x171) CASEMBC(0x173) CASEMBC(0x1b0) CASEMBC(0x1d4)
1054 	    CASEMBC(0x1ee7)
1055 		    EMIT2('u'); EMIT2(0371); EMIT2(0372); EMIT2(0373);
1056 		    EMIT2(0374); EMITMBC(0x169) EMITMBC(0x16b)
1057 		    EMITMBC(0x16d) EMITMBC(0x16f) EMITMBC(0x171)
1058 		    EMITMBC(0x173) EMITMBC(0x1b0) EMITMBC(0x1d4)
1059 		    EMITMBC(0x1ee7)
1060 		    return OK;
1061 
1062 	    case 'v': CASEMBC(0x1e7d)
1063 		    EMIT2('v'); EMITMBC(0x1e7d)
1064 		    return OK;
1065 
1066 	    case 'w': CASEMBC(0x175) CASEMBC(0x1e81) CASEMBC(0x1e83)
1067 	    CASEMBC(0x1e85) CASEMBC(0x1e87) CASEMBC(0x1e98)
1068 		    EMIT2('w'); EMITMBC(0x175) EMITMBC(0x1e81) EMITMBC(0x1e83)
1069 		    EMITMBC(0x1e85) EMITMBC(0x1e87) EMITMBC(0x1e98)
1070 		    return OK;
1071 
1072 	    case 'x': CASEMBC(0x1e8b) CASEMBC(0x1e8d)
1073 		    EMIT2('x'); EMITMBC(0x1e8b) EMITMBC(0x1e8d)
1074 		    return OK;
1075 
1076 	    case 'y': case 0375: case 0377:
1077 	    CASEMBC(0x177) CASEMBC(0x1e8f) CASEMBC(0x1e99)
1078 	    CASEMBC(0x1ef3) CASEMBC(0x1ef7) CASEMBC(0x1ef9)
1079 		    EMIT2('y'); EMIT2(0375); EMIT2(0377); EMITMBC(0x177)
1080 		    EMITMBC(0x1e8f) EMITMBC(0x1e99) EMITMBC(0x1ef3)
1081 		    EMITMBC(0x1ef7) EMITMBC(0x1ef9)
1082 		    return OK;
1083 
1084 	    case 'z': CASEMBC(0x17a) CASEMBC(0x17c) CASEMBC(0x17e)
1085 	    CASEMBC(0x1b6) CASEMBC(0x1e91) CASEMBC(0x1e95)
1086 		    EMIT2('z'); EMITMBC(0x17a) EMITMBC(0x17c) EMITMBC(0x17e)
1087 		    EMITMBC(0x1b6) EMITMBC(0x1e91) EMITMBC(0x1e95)
1088 		    return OK;
1089 
1090 	    /* default: character itself */
1091 	}
1092     }
1093 
1094     EMIT2(c);
1095     return OK;
1096 #undef EMIT2
1097 #undef EMITMBC
1098 }
1099 
1100 /*
1101  * Code to parse regular expression.
1102  *
1103  * We try to reuse parsing functions in regexp.c to
1104  * minimize surprise and keep the syntax consistent.
1105  */
1106 
1107 /*
1108  * Parse the lowest level.
1109  *
1110  * An atom can be one of a long list of items.  Many atoms match one character
1111  * in the text.  It is often an ordinary character or a character class.
1112  * Braces can be used to make a pattern into an atom.  The "\z(\)" construct
1113  * is only for syntax highlighting.
1114  *
1115  * atom    ::=     ordinary-atom
1116  *     or  \( pattern \)
1117  *     or  \%( pattern \)
1118  *     or  \z( pattern \)
1119  */
1120     static int
1121 nfa_regatom()
1122 {
1123     int		c;
1124     int		charclass;
1125     int		equiclass;
1126     int		collclass;
1127     int		got_coll_char;
1128     char_u	*p;
1129     char_u	*endp;
1130 #ifdef FEAT_MBYTE
1131     char_u	*old_regparse = regparse;
1132 #endif
1133     int		extra = 0;
1134     int		emit_range;
1135     int		negated;
1136     int		result;
1137     int		startc = -1;
1138     int		endc = -1;
1139     int		oldstartc = -1;
1140 
1141     c = getchr();
1142     switch (c)
1143     {
1144 	case NUL:
1145 	    EMSG_RET_FAIL(_(e_nul_found));
1146 
1147 	case Magic('^'):
1148 	    EMIT(NFA_BOL);
1149 	    break;
1150 
1151 	case Magic('$'):
1152 	    EMIT(NFA_EOL);
1153 #if defined(FEAT_SYN_HL) || defined(PROTO)
1154 	    had_eol = TRUE;
1155 #endif
1156 	    break;
1157 
1158 	case Magic('<'):
1159 	    EMIT(NFA_BOW);
1160 	    break;
1161 
1162 	case Magic('>'):
1163 	    EMIT(NFA_EOW);
1164 	    break;
1165 
1166 	case Magic('_'):
1167 	    c = no_Magic(getchr());
1168 	    if (c == NUL)
1169 		EMSG_RET_FAIL(_(e_nul_found));
1170 
1171 	    if (c == '^')	/* "\_^" is start-of-line */
1172 	    {
1173 		EMIT(NFA_BOL);
1174 		break;
1175 	    }
1176 	    if (c == '$')	/* "\_$" is end-of-line */
1177 	    {
1178 		EMIT(NFA_EOL);
1179 #if defined(FEAT_SYN_HL) || defined(PROTO)
1180 		had_eol = TRUE;
1181 #endif
1182 		break;
1183 	    }
1184 
1185 	    extra = NFA_ADD_NL;
1186 
1187 	    /* "\_[" is collection plus newline */
1188 	    if (c == '[')
1189 		goto collection;
1190 
1191 	/* "\_x" is character class plus newline */
1192 	/*FALLTHROUGH*/
1193 
1194 	/*
1195 	 * Character classes.
1196 	 */
1197 	case Magic('.'):
1198 	case Magic('i'):
1199 	case Magic('I'):
1200 	case Magic('k'):
1201 	case Magic('K'):
1202 	case Magic('f'):
1203 	case Magic('F'):
1204 	case Magic('p'):
1205 	case Magic('P'):
1206 	case Magic('s'):
1207 	case Magic('S'):
1208 	case Magic('d'):
1209 	case Magic('D'):
1210 	case Magic('x'):
1211 	case Magic('X'):
1212 	case Magic('o'):
1213 	case Magic('O'):
1214 	case Magic('w'):
1215 	case Magic('W'):
1216 	case Magic('h'):
1217 	case Magic('H'):
1218 	case Magic('a'):
1219 	case Magic('A'):
1220 	case Magic('l'):
1221 	case Magic('L'):
1222 	case Magic('u'):
1223 	case Magic('U'):
1224 	    p = vim_strchr(classchars, no_Magic(c));
1225 	    if (p == NULL)
1226 	    {
1227 		if (extra == NFA_ADD_NL)
1228 		{
1229 		    EMSGN(_(e_ill_char_class), c);
1230 		    rc_did_emsg = TRUE;
1231 		    return FAIL;
1232 		}
1233 		EMSGN("INTERNAL: Unknown character class char: %ld", c);
1234 		return FAIL;
1235 	    }
1236 #ifdef FEAT_MBYTE
1237 	    /* When '.' is followed by a composing char ignore the dot, so that
1238 	     * the composing char is matched here. */
1239 	    if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr()))
1240 	    {
1241 		old_regparse = regparse;
1242 		c = getchr();
1243 		goto nfa_do_multibyte;
1244 	    }
1245 #endif
1246 	    EMIT(nfa_classcodes[p - classchars]);
1247 	    if (extra == NFA_ADD_NL)
1248 	    {
1249 		EMIT(NFA_NEWL);
1250 		EMIT(NFA_OR);
1251 		regflags |= RF_HASNL;
1252 	    }
1253 	    break;
1254 
1255 	case Magic('n'):
1256 	    if (reg_string)
1257 		/* In a string "\n" matches a newline character. */
1258 		EMIT(NL);
1259 	    else
1260 	    {
1261 		/* In buffer text "\n" matches the end of a line. */
1262 		EMIT(NFA_NEWL);
1263 		regflags |= RF_HASNL;
1264 	    }
1265 	    break;
1266 
1267 	case Magic('('):
1268 	    if (nfa_reg(REG_PAREN) == FAIL)
1269 		return FAIL;	    /* cascaded error */
1270 	    break;
1271 
1272 	case Magic('|'):
1273 	case Magic('&'):
1274 	case Magic(')'):
1275 	    EMSGN(_(e_misplaced), no_Magic(c));
1276 	    return FAIL;
1277 
1278 	case Magic('='):
1279 	case Magic('?'):
1280 	case Magic('+'):
1281 	case Magic('@'):
1282 	case Magic('*'):
1283 	case Magic('{'):
1284 	    /* these should follow an atom, not form an atom */
1285 	    EMSGN(_(e_misplaced), no_Magic(c));
1286 	    return FAIL;
1287 
1288 	case Magic('~'):
1289 	    {
1290 		char_u	    *lp;
1291 
1292 		/* Previous substitute pattern.
1293 		 * Generated as "\%(pattern\)". */
1294 		if (reg_prev_sub == NULL)
1295 		{
1296 		    EMSG(_(e_nopresub));
1297 		    return FAIL;
1298 		}
1299 		for (lp = reg_prev_sub; *lp != NUL; mb_cptr_adv(lp))
1300 		{
1301 		    EMIT(PTR2CHAR(lp));
1302 		    if (lp != reg_prev_sub)
1303 			EMIT(NFA_CONCAT);
1304 		}
1305 		EMIT(NFA_NOPEN);
1306 		break;
1307 	    }
1308 
1309 	case Magic('1'):
1310 	case Magic('2'):
1311 	case Magic('3'):
1312 	case Magic('4'):
1313 	case Magic('5'):
1314 	case Magic('6'):
1315 	case Magic('7'):
1316 	case Magic('8'):
1317 	case Magic('9'):
1318 	    EMIT(NFA_BACKREF1 + (no_Magic(c) - '1'));
1319 	    nfa_has_backref = TRUE;
1320 	    break;
1321 
1322 	case Magic('z'):
1323 	    c = no_Magic(getchr());
1324 	    switch (c)
1325 	    {
1326 		case 's':
1327 		    EMIT(NFA_ZSTART);
1328 		    if (re_mult_next("\\zs") == FAIL)
1329 			return FAIL;
1330 		    break;
1331 		case 'e':
1332 		    EMIT(NFA_ZEND);
1333 		    nfa_has_zend = TRUE;
1334 		    if (re_mult_next("\\ze") == FAIL)
1335 			return FAIL;
1336 		    break;
1337 #ifdef FEAT_SYN_HL
1338 		case '1':
1339 		case '2':
1340 		case '3':
1341 		case '4':
1342 		case '5':
1343 		case '6':
1344 		case '7':
1345 		case '8':
1346 		case '9':
1347 		    /* \z1...\z9 */
1348 		    if (reg_do_extmatch != REX_USE)
1349 			EMSG_RET_FAIL(_(e_z1_not_allowed));
1350 		    EMIT(NFA_ZREF1 + (no_Magic(c) - '1'));
1351 		    /* No need to set nfa_has_backref, the sub-matches don't
1352 		     * change when \z1 .. \z9 matches or not. */
1353 		    re_has_z = REX_USE;
1354 		    break;
1355 		case '(':
1356 		    /* \z(  */
1357 		    if (reg_do_extmatch != REX_SET)
1358 			EMSG_RET_FAIL(_(e_z_not_allowed));
1359 		    if (nfa_reg(REG_ZPAREN) == FAIL)
1360 			return FAIL;	    /* cascaded error */
1361 		    re_has_z = REX_SET;
1362 		    break;
1363 #endif
1364 		default:
1365 		    EMSGN(_("E867: (NFA) Unknown operator '\\z%c'"),
1366 								 no_Magic(c));
1367 		    return FAIL;
1368 	    }
1369 	    break;
1370 
1371 	case Magic('%'):
1372 	    c = no_Magic(getchr());
1373 	    switch (c)
1374 	    {
1375 		/* () without a back reference */
1376 		case '(':
1377 		    if (nfa_reg(REG_NPAREN) == FAIL)
1378 			return FAIL;
1379 		    EMIT(NFA_NOPEN);
1380 		    break;
1381 
1382 		case 'd':   /* %d123 decimal */
1383 		case 'o':   /* %o123 octal */
1384 		case 'x':   /* %xab hex 2 */
1385 		case 'u':   /* %uabcd hex 4 */
1386 		case 'U':   /* %U1234abcd hex 8 */
1387 		    {
1388 			int nr;
1389 
1390 			switch (c)
1391 			{
1392 			    case 'd': nr = getdecchrs(); break;
1393 			    case 'o': nr = getoctchrs(); break;
1394 			    case 'x': nr = gethexchrs(2); break;
1395 			    case 'u': nr = gethexchrs(4); break;
1396 			    case 'U': nr = gethexchrs(8); break;
1397 			    default:  nr = -1; break;
1398 			}
1399 
1400 			if (nr < 0)
1401 			    EMSG2_RET_FAIL(
1402 			       _("E678: Invalid character after %s%%[dxouU]"),
1403 				    reg_magic == MAGIC_ALL);
1404 			/* A NUL is stored in the text as NL */
1405 			/* TODO: what if a composing character follows? */
1406 			EMIT(nr == 0 ? 0x0a : nr);
1407 		    }
1408 		    break;
1409 
1410 		/* Catch \%^ and \%$ regardless of where they appear in the
1411 		 * pattern -- regardless of whether or not it makes sense. */
1412 		case '^':
1413 		    EMIT(NFA_BOF);
1414 		    break;
1415 
1416 		case '$':
1417 		    EMIT(NFA_EOF);
1418 		    break;
1419 
1420 		case '#':
1421 		    EMIT(NFA_CURSOR);
1422 		    break;
1423 
1424 		case 'V':
1425 		    EMIT(NFA_VISUAL);
1426 		    break;
1427 
1428 		case 'C':
1429 		    EMIT(NFA_ANY_COMPOSING);
1430 		    break;
1431 
1432 		case '[':
1433 		    {
1434 			int	    n;
1435 
1436 			/* \%[abc] */
1437 			for (n = 0; (c = peekchr()) != ']'; ++n)
1438 			{
1439 			    if (c == NUL)
1440 				EMSG2_RET_FAIL(_(e_missing_sb),
1441 						      reg_magic == MAGIC_ALL);
1442 			    /* recursive call! */
1443 			    if (nfa_regatom() == FAIL)
1444 				return FAIL;
1445 			}
1446 			getchr();  /* get the ] */
1447 			if (n == 0)
1448 			    EMSG2_RET_FAIL(_(e_empty_sb),
1449 						      reg_magic == MAGIC_ALL);
1450 			EMIT(NFA_OPT_CHARS);
1451 			EMIT(n);
1452 
1453 			/* Emit as "\%(\%[abc]\)" to be able to handle
1454 			 * "\%[abc]*" which would cause the empty string to be
1455 			 * matched an unlimited number of times. NFA_NOPEN is
1456 			 * added only once at a position, while NFA_SPLIT is
1457 			 * added multiple times.  This is more efficient than
1458 			 * not allowing NFA_SPLIT multiple times, it is used
1459 			 * a lot. */
1460 			EMIT(NFA_NOPEN);
1461 			break;
1462 		    }
1463 
1464 		default:
1465 		    {
1466 			int	n = 0;
1467 			int	cmp = c;
1468 
1469 			if (c == '<' || c == '>')
1470 			    c = getchr();
1471 			while (VIM_ISDIGIT(c))
1472 			{
1473 			    n = n * 10 + (c - '0');
1474 			    c = getchr();
1475 			}
1476 			if (c == 'l' || c == 'c' || c == 'v')
1477 			{
1478 			    if (c == 'l')
1479 				/* \%{n}l  \%{n}<l  \%{n}>l  */
1480 				EMIT(cmp == '<' ? NFA_LNUM_LT :
1481 				     cmp == '>' ? NFA_LNUM_GT : NFA_LNUM);
1482 			    else if (c == 'c')
1483 				/* \%{n}c  \%{n}<c  \%{n}>c  */
1484 				EMIT(cmp == '<' ? NFA_COL_LT :
1485 				     cmp == '>' ? NFA_COL_GT : NFA_COL);
1486 			    else
1487 				/* \%{n}v  \%{n}<v  \%{n}>v  */
1488 				EMIT(cmp == '<' ? NFA_VCOL_LT :
1489 				     cmp == '>' ? NFA_VCOL_GT : NFA_VCOL);
1490 			    EMIT(n);
1491 			    break;
1492 			}
1493 			else if (c == '\'' && n == 0)
1494 			{
1495 			    /* \%'m  \%<'m  \%>'m  */
1496 			    EMIT(cmp == '<' ? NFA_MARK_LT :
1497 				 cmp == '>' ? NFA_MARK_GT : NFA_MARK);
1498 			    EMIT(getchr());
1499 			    break;
1500 			}
1501 		    }
1502 		    EMSGN(_("E867: (NFA) Unknown operator '\\%%%c'"),
1503 								 no_Magic(c));
1504 		    return FAIL;
1505 	    }
1506 	    break;
1507 
1508 	case Magic('['):
1509 collection:
1510 	    /*
1511 	     * [abc]  uses NFA_START_COLL - NFA_END_COLL
1512 	     * [^abc] uses NFA_START_NEG_COLL - NFA_END_NEG_COLL
1513 	     * Each character is produced as a regular state, using
1514 	     * NFA_CONCAT to bind them together.
1515 	     * Besides normal characters there can be:
1516 	     * - character classes  NFA_CLASS_*
1517 	     * - ranges, two characters followed by NFA_RANGE.
1518 	     */
1519 
1520 	    p = regparse;
1521 	    endp = skip_anyof(p);
1522 	    if (*endp == ']')
1523 	    {
1524 		/*
1525 		 * Try to reverse engineer character classes. For example,
1526 		 * recognize that [0-9] stands for \d and [A-Za-z_] for \h,
1527 		 * and perform the necessary substitutions in the NFA.
1528 		 */
1529 		result = nfa_recognize_char_class(regparse, endp,
1530 							 extra == NFA_ADD_NL);
1531 		if (result != FAIL)
1532 		{
1533 		    if (result >= NFA_FIRST_NL && result <= NFA_LAST_NL)
1534 		    {
1535 			EMIT(result - NFA_ADD_NL);
1536 			EMIT(NFA_NEWL);
1537 			EMIT(NFA_OR);
1538 		    }
1539 		    else
1540 			EMIT(result);
1541 		    regparse = endp;
1542 		    mb_ptr_adv(regparse);
1543 		    return OK;
1544 		}
1545 		/*
1546 		 * Failed to recognize a character class. Use the simple
1547 		 * version that turns [abc] into 'a' OR 'b' OR 'c'
1548 		 */
1549 		startc = endc = oldstartc = -1;
1550 		negated = FALSE;
1551 		if (*regparse == '^')			/* negated range */
1552 		{
1553 		    negated = TRUE;
1554 		    mb_ptr_adv(regparse);
1555 		    EMIT(NFA_START_NEG_COLL);
1556 		}
1557 		else
1558 		    EMIT(NFA_START_COLL);
1559 		if (*regparse == '-')
1560 		{
1561 		    startc = '-';
1562 		    EMIT(startc);
1563 		    EMIT(NFA_CONCAT);
1564 		    mb_ptr_adv(regparse);
1565 		}
1566 		/* Emit the OR branches for each character in the [] */
1567 		emit_range = FALSE;
1568 		while (regparse < endp)
1569 		{
1570 		    oldstartc = startc;
1571 		    startc = -1;
1572 		    got_coll_char = FALSE;
1573 		    if (*regparse == '[')
1574 		    {
1575 			/* Check for [: :], [= =], [. .] */
1576 			equiclass = collclass = 0;
1577 			charclass = get_char_class(&regparse);
1578 			if (charclass == CLASS_NONE)
1579 			{
1580 			    equiclass = get_equi_class(&regparse);
1581 			    if (equiclass == 0)
1582 				collclass = get_coll_element(&regparse);
1583 			}
1584 
1585 			/* Character class like [:alpha:]  */
1586 			if (charclass != CLASS_NONE)
1587 			{
1588 			    switch (charclass)
1589 			    {
1590 				case CLASS_ALNUM:
1591 				    EMIT(NFA_CLASS_ALNUM);
1592 				    break;
1593 				case CLASS_ALPHA:
1594 				    EMIT(NFA_CLASS_ALPHA);
1595 				    break;
1596 				case CLASS_BLANK:
1597 				    EMIT(NFA_CLASS_BLANK);
1598 				    break;
1599 				case CLASS_CNTRL:
1600 				    EMIT(NFA_CLASS_CNTRL);
1601 				    break;
1602 				case CLASS_DIGIT:
1603 				    EMIT(NFA_CLASS_DIGIT);
1604 				    break;
1605 				case CLASS_GRAPH:
1606 				    EMIT(NFA_CLASS_GRAPH);
1607 				    break;
1608 				case CLASS_LOWER:
1609 				    EMIT(NFA_CLASS_LOWER);
1610 				    break;
1611 				case CLASS_PRINT:
1612 				    EMIT(NFA_CLASS_PRINT);
1613 				    break;
1614 				case CLASS_PUNCT:
1615 				    EMIT(NFA_CLASS_PUNCT);
1616 				    break;
1617 				case CLASS_SPACE:
1618 				    EMIT(NFA_CLASS_SPACE);
1619 				    break;
1620 				case CLASS_UPPER:
1621 				    EMIT(NFA_CLASS_UPPER);
1622 				    break;
1623 				case CLASS_XDIGIT:
1624 				    EMIT(NFA_CLASS_XDIGIT);
1625 				    break;
1626 				case CLASS_TAB:
1627 				    EMIT(NFA_CLASS_TAB);
1628 				    break;
1629 				case CLASS_RETURN:
1630 				    EMIT(NFA_CLASS_RETURN);
1631 				    break;
1632 				case CLASS_BACKSPACE:
1633 				    EMIT(NFA_CLASS_BACKSPACE);
1634 				    break;
1635 				case CLASS_ESCAPE:
1636 				    EMIT(NFA_CLASS_ESCAPE);
1637 				    break;
1638 			    }
1639 			    EMIT(NFA_CONCAT);
1640 			    continue;
1641 			}
1642 			/* Try equivalence class [=a=] and the like */
1643 			if (equiclass != 0)
1644 			{
1645 			    result = nfa_emit_equi_class(equiclass);
1646 			    if (result == FAIL)
1647 			    {
1648 				/* should never happen */
1649 				EMSG_RET_FAIL(_("E868: Error building NFA with equivalence class!"));
1650 			    }
1651 			    continue;
1652 			}
1653 			/* Try collating class like [. .]  */
1654 			if (collclass != 0)
1655 			{
1656 			    startc = collclass;	 /* allow [.a.]-x as a range */
1657 			    /* Will emit the proper atom at the end of the
1658 			     * while loop. */
1659 			}
1660 		    }
1661 		    /* Try a range like 'a-x' or '\t-z'. Also allows '-' as a
1662 		     * start character. */
1663 		    if (*regparse == '-' && oldstartc != -1)
1664 		    {
1665 			emit_range = TRUE;
1666 			startc = oldstartc;
1667 			mb_ptr_adv(regparse);
1668 			continue;	    /* reading the end of the range */
1669 		    }
1670 
1671 		    /* Now handle simple and escaped characters.
1672 		     * Only "\]", "\^", "\]" and "\\" are special in Vi.  Vim
1673 		     * accepts "\t", "\e", etc., but only when the 'l' flag in
1674 		     * 'cpoptions' is not included.
1675 		     * Posix doesn't recognize backslash at all.
1676 		     */
1677 		    if (*regparse == '\\'
1678 			    && !reg_cpo_bsl
1679 			    && regparse + 1 <= endp
1680 			    && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
1681 				|| (!reg_cpo_lit
1682 				    && vim_strchr(REGEXP_ABBR, regparse[1])
1683 								      != NULL)
1684 			    )
1685 			)
1686 		    {
1687 			mb_ptr_adv(regparse);
1688 
1689 			if (*regparse == 'n')
1690 			    startc = reg_string ? NL : NFA_NEWL;
1691 			else
1692 			    if  (*regparse == 'd'
1693 				    || *regparse == 'o'
1694 				    || *regparse == 'x'
1695 				    || *regparse == 'u'
1696 				    || *regparse == 'U'
1697 				)
1698 			    {
1699 				/* TODO(RE) This needs more testing */
1700 				startc = coll_get_char();
1701 				got_coll_char = TRUE;
1702 				mb_ptr_back(old_regparse, regparse);
1703 			    }
1704 			    else
1705 			    {
1706 				/* \r,\t,\e,\b */
1707 				startc = backslash_trans(*regparse);
1708 			    }
1709 		    }
1710 
1711 		    /* Normal printable char */
1712 		    if (startc == -1)
1713 			startc = PTR2CHAR(regparse);
1714 
1715 		    /* Previous char was '-', so this char is end of range. */
1716 		    if (emit_range)
1717 		    {
1718 			endc = startc;
1719 			startc = oldstartc;
1720 			if (startc > endc)
1721 			    EMSG_RET_FAIL(_(e_invrange));
1722 
1723 			if (endc > startc + 2)
1724 			{
1725 			    /* Emit a range instead of the sequence of
1726 			     * individual characters. */
1727 			    if (startc == 0)
1728 				/* \x00 is translated to \x0a, start at \x01. */
1729 				EMIT(1);
1730 			    else
1731 				--post_ptr; /* remove NFA_CONCAT */
1732 			    EMIT(endc);
1733 			    EMIT(NFA_RANGE);
1734 			    EMIT(NFA_CONCAT);
1735 			}
1736 			else
1737 #ifdef FEAT_MBYTE
1738 			     if (has_mbyte && ((*mb_char2len)(startc) > 1
1739 				    || (*mb_char2len)(endc) > 1))
1740 			{
1741 			    /* Emit the characters in the range.
1742 			     * "startc" was already emitted, so skip it.
1743 			     * */
1744 			    for (c = startc + 1; c <= endc; c++)
1745 			    {
1746 				EMIT(c);
1747 				EMIT(NFA_CONCAT);
1748 			    }
1749 			}
1750 			else
1751 #endif
1752 			{
1753 #ifdef EBCDIC
1754 			    int alpha_only = FALSE;
1755 
1756 			    /* for alphabetical range skip the gaps
1757 			     * 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'. */
1758 			    if (isalpha(startc) && isalpha(endc))
1759 				alpha_only = TRUE;
1760 #endif
1761 			    /* Emit the range. "startc" was already emitted, so
1762 			     * skip it. */
1763 			    for (c = startc + 1; c <= endc; c++)
1764 #ifdef EBCDIC
1765 				if (!alpha_only || isalpha(startc))
1766 #endif
1767 				{
1768 				    EMIT(c);
1769 				    EMIT(NFA_CONCAT);
1770 				}
1771 			}
1772 			emit_range = FALSE;
1773 			startc = -1;
1774 		    }
1775 		    else
1776 		    {
1777 			/* This char (startc) is not part of a range. Just
1778 			 * emit it.
1779 			 * Normally, simply emit startc. But if we get char
1780 			 * code=0 from a collating char, then replace it with
1781 			 * 0x0a.
1782 			 * This is needed to completely mimic the behaviour of
1783 			 * the backtracking engine. */
1784 			if (startc == NFA_NEWL)
1785 			{
1786 			    /* Line break can't be matched as part of the
1787 			     * collection, add an OR below. But not for negated
1788 			     * range. */
1789 			    if (!negated)
1790 				extra = NFA_ADD_NL;
1791 			}
1792 			else
1793 			{
1794 			    if (got_coll_char == TRUE && startc == 0)
1795 				EMIT(0x0a);
1796 			    else
1797 				EMIT(startc);
1798 			    EMIT(NFA_CONCAT);
1799 			}
1800 		    }
1801 
1802 		    mb_ptr_adv(regparse);
1803 		} /* while (p < endp) */
1804 
1805 		mb_ptr_back(old_regparse, regparse);
1806 		if (*regparse == '-')	    /* if last, '-' is just a char */
1807 		{
1808 		    EMIT('-');
1809 		    EMIT(NFA_CONCAT);
1810 		}
1811 
1812 		/* skip the trailing ] */
1813 		regparse = endp;
1814 		mb_ptr_adv(regparse);
1815 
1816 		/* Mark end of the collection. */
1817 		if (negated == TRUE)
1818 		    EMIT(NFA_END_NEG_COLL);
1819 		else
1820 		    EMIT(NFA_END_COLL);
1821 
1822 		/* \_[] also matches \n but it's not negated */
1823 		if (extra == NFA_ADD_NL)
1824 		{
1825 		    EMIT(reg_string ? NL : NFA_NEWL);
1826 		    EMIT(NFA_OR);
1827 		}
1828 
1829 		return OK;
1830 	    } /* if exists closing ] */
1831 
1832 	    if (reg_strict)
1833 		EMSG_RET_FAIL(_(e_missingbracket));
1834 	    /* FALLTHROUGH */
1835 
1836 	default:
1837 	    {
1838 #ifdef FEAT_MBYTE
1839 		int	plen;
1840 
1841 nfa_do_multibyte:
1842 		/* plen is length of current char with composing chars */
1843 		if (enc_utf8 && ((*mb_char2len)(c)
1844 			    != (plen = (*mb_ptr2len)(old_regparse))
1845 						       || utf_iscomposing(c)))
1846 		{
1847 		    int i = 0;
1848 
1849 		    /* A base character plus composing characters, or just one
1850 		     * or more composing characters.
1851 		     * This requires creating a separate atom as if enclosing
1852 		     * the characters in (), where NFA_COMPOSING is the ( and
1853 		     * NFA_END_COMPOSING is the ). Note that right now we are
1854 		     * building the postfix form, not the NFA itself;
1855 		     * a composing char could be: a, b, c, NFA_COMPOSING
1856 		     * where 'b' and 'c' are chars with codes > 256. */
1857 		    for (;;)
1858 		    {
1859 			EMIT(c);
1860 			if (i > 0)
1861 			    EMIT(NFA_CONCAT);
1862 			if ((i += utf_char2len(c)) >= plen)
1863 			    break;
1864 			c = utf_ptr2char(old_regparse + i);
1865 		    }
1866 		    EMIT(NFA_COMPOSING);
1867 		    regparse = old_regparse + plen;
1868 		}
1869 		else
1870 #endif
1871 		{
1872 		    c = no_Magic(c);
1873 		    EMIT(c);
1874 		}
1875 		return OK;
1876 	    }
1877     }
1878 
1879     return OK;
1880 }
1881 
1882 /*
1883  * Parse something followed by possible [*+=].
1884  *
1885  * A piece is an atom, possibly followed by a multi, an indication of how many
1886  * times the atom can be matched.  Example: "a*" matches any sequence of "a"
1887  * characters: "", "a", "aa", etc.
1888  *
1889  * piece   ::=	    atom
1890  *	or  atom  multi
1891  */
1892     static int
1893 nfa_regpiece()
1894 {
1895     int		i;
1896     int		op;
1897     int		ret;
1898     long	minval, maxval;
1899     int		greedy = TRUE;      /* Braces are prefixed with '-' ? */
1900     parse_state_T old_state;
1901     parse_state_T new_state;
1902     int		c2;
1903     int		old_post_pos;
1904     int		my_post_start;
1905     int		quest;
1906 
1907     /* Save the current parse state, so that we can use it if <atom>{m,n} is
1908      * next. */
1909     save_parse_state(&old_state);
1910 
1911     /* store current pos in the postfix form, for \{m,n} involving 0s */
1912     my_post_start = (int)(post_ptr - post_start);
1913 
1914     ret = nfa_regatom();
1915     if (ret == FAIL)
1916 	return FAIL;	    /* cascaded error */
1917 
1918     op = peekchr();
1919     if (re_multi_type(op) == NOT_MULTI)
1920 	return OK;
1921 
1922     skipchr();
1923     switch (op)
1924     {
1925 	case Magic('*'):
1926 	    EMIT(NFA_STAR);
1927 	    break;
1928 
1929 	case Magic('+'):
1930 	    /*
1931 	     * Trick: Normally, (a*)\+ would match the whole input "aaa".  The
1932 	     * first and only submatch would be "aaa". But the backtracking
1933 	     * engine interprets the plus as "try matching one more time", and
1934 	     * a* matches a second time at the end of the input, the empty
1935 	     * string.
1936 	     * The submatch will be the empty string.
1937 	     *
1938 	     * In order to be consistent with the old engine, we replace
1939 	     * <atom>+ with <atom><atom>*
1940 	     */
1941 	    restore_parse_state(&old_state);
1942 	    curchr = -1;
1943 	    if (nfa_regatom() == FAIL)
1944 		return FAIL;
1945 	    EMIT(NFA_STAR);
1946 	    EMIT(NFA_CONCAT);
1947 	    skipchr();		/* skip the \+	*/
1948 	    break;
1949 
1950 	case Magic('@'):
1951 	    c2 = getdecchrs();
1952 	    op = no_Magic(getchr());
1953 	    i = 0;
1954 	    switch(op)
1955 	    {
1956 		case '=':
1957 		    /* \@= */
1958 		    i = NFA_PREV_ATOM_NO_WIDTH;
1959 		    break;
1960 		case '!':
1961 		    /* \@! */
1962 		    i = NFA_PREV_ATOM_NO_WIDTH_NEG;
1963 		    break;
1964 		case '<':
1965 		    op = no_Magic(getchr());
1966 		    if (op == '=')
1967 			/* \@<= */
1968 			i = NFA_PREV_ATOM_JUST_BEFORE;
1969 		    else if (op == '!')
1970 			/* \@<! */
1971 			i = NFA_PREV_ATOM_JUST_BEFORE_NEG;
1972 		    break;
1973 		case '>':
1974 		    /* \@>  */
1975 		    i = NFA_PREV_ATOM_LIKE_PATTERN;
1976 		    break;
1977 	    }
1978 	    if (i == 0)
1979 	    {
1980 		EMSGN(_("E869: (NFA) Unknown operator '\\@%c'"), op);
1981 		return FAIL;
1982 	    }
1983 	    EMIT(i);
1984 	    if (i == NFA_PREV_ATOM_JUST_BEFORE
1985 					|| i == NFA_PREV_ATOM_JUST_BEFORE_NEG)
1986 		EMIT(c2);
1987 	    break;
1988 
1989 	case Magic('?'):
1990 	case Magic('='):
1991 	    EMIT(NFA_QUEST);
1992 	    break;
1993 
1994 	case Magic('{'):
1995 	    /* a{2,5} will expand to 'aaa?a?a?'
1996 	     * a{-1,3} will expand to 'aa??a??', where ?? is the nongreedy
1997 	     * version of '?'
1998 	     * \v(ab){2,3} will expand to '(ab)(ab)(ab)?', where all the
1999 	     * parenthesis have the same id
2000 	     */
2001 
2002 	    greedy = TRUE;
2003 	    c2 = peekchr();
2004 	    if (c2 == '-' || c2 == Magic('-'))
2005 	    {
2006 		skipchr();
2007 		greedy = FALSE;
2008 	    }
2009 	    if (!read_limits(&minval, &maxval))
2010 		EMSG_RET_FAIL(_("E870: (NFA regexp) Error reading repetition limits"));
2011 
2012 	    /*  <atom>{0,inf}, <atom>{0,} and <atom>{}  are equivalent to
2013 	     *  <atom>*  */
2014 	    if (minval == 0 && maxval == MAX_LIMIT)
2015 	    {
2016 		if (greedy)		/* { { (match the braces) */
2017 		    /* \{}, \{0,} */
2018 		    EMIT(NFA_STAR);
2019 		else			/* { { (match the braces) */
2020 		    /* \{-}, \{-0,} */
2021 		    EMIT(NFA_STAR_NONGREEDY);
2022 		break;
2023 	    }
2024 
2025 	    /* Special case: x{0} or x{-0} */
2026 	    if (maxval == 0)
2027 	    {
2028 		/* Ignore result of previous call to nfa_regatom() */
2029 		post_ptr = post_start + my_post_start;
2030 		/* NFA_EMPTY is 0-length and works everywhere */
2031 		EMIT(NFA_EMPTY);
2032 		return OK;
2033 	    }
2034 
2035 	    /* The engine is very inefficient (uses too many states) when the
2036 	     * maximum is much larger than the minimum and when the maximum is
2037 	     * large.  Bail out if we can use the other engine. */
2038 	    if ((nfa_re_flags & RE_AUTO)
2039 				   && (maxval > minval + 200 || maxval > 500))
2040 		return FAIL;
2041 
2042 	    /* Ignore previous call to nfa_regatom() */
2043 	    post_ptr = post_start + my_post_start;
2044 	    /* Save parse state after the repeated atom and the \{} */
2045 	    save_parse_state(&new_state);
2046 
2047 	    quest = (greedy == TRUE? NFA_QUEST : NFA_QUEST_NONGREEDY);
2048 	    for (i = 0; i < maxval; i++)
2049 	    {
2050 		/* Goto beginning of the repeated atom */
2051 		restore_parse_state(&old_state);
2052 		old_post_pos = (int)(post_ptr - post_start);
2053 		if (nfa_regatom() == FAIL)
2054 		    return FAIL;
2055 		/* after "minval" times, atoms are optional */
2056 		if (i + 1 > minval)
2057 		{
2058 		    if (maxval == MAX_LIMIT)
2059 		    {
2060 			if (greedy)
2061 			    EMIT(NFA_STAR);
2062 			else
2063 			    EMIT(NFA_STAR_NONGREEDY);
2064 		    }
2065 		    else
2066 			EMIT(quest);
2067 		}
2068 		if (old_post_pos != my_post_start)
2069 		    EMIT(NFA_CONCAT);
2070 		if (i + 1 > minval && maxval == MAX_LIMIT)
2071 		    break;
2072 	    }
2073 
2074 	    /* Go to just after the repeated atom and the \{} */
2075 	    restore_parse_state(&new_state);
2076 	    curchr = -1;
2077 
2078 	    break;
2079 
2080 
2081 	default:
2082 	    break;
2083     }	/* end switch */
2084 
2085     if (re_multi_type(peekchr()) != NOT_MULTI)
2086 	/* Can't have a multi follow a multi. */
2087 	EMSG_RET_FAIL(_("E871: (NFA regexp) Can't have a multi follow a multi !"));
2088 
2089     return OK;
2090 }
2091 
2092 /*
2093  * Parse one or more pieces, concatenated.  It matches a match for the
2094  * first piece, followed by a match for the second piece, etc.  Example:
2095  * "f[0-9]b", first matches "f", then a digit and then "b".
2096  *
2097  * concat  ::=	    piece
2098  *	or  piece piece
2099  *	or  piece piece piece
2100  *	etc.
2101  */
2102     static int
2103 nfa_regconcat()
2104 {
2105     int		cont = TRUE;
2106     int		first = TRUE;
2107 
2108     while (cont)
2109     {
2110 	switch (peekchr())
2111 	{
2112 	    case NUL:
2113 	    case Magic('|'):
2114 	    case Magic('&'):
2115 	    case Magic(')'):
2116 		cont = FALSE;
2117 		break;
2118 
2119 	    case Magic('Z'):
2120 #ifdef FEAT_MBYTE
2121 		regflags |= RF_ICOMBINE;
2122 #endif
2123 		skipchr_keepstart();
2124 		break;
2125 	    case Magic('c'):
2126 		regflags |= RF_ICASE;
2127 		skipchr_keepstart();
2128 		break;
2129 	    case Magic('C'):
2130 		regflags |= RF_NOICASE;
2131 		skipchr_keepstart();
2132 		break;
2133 	    case Magic('v'):
2134 		reg_magic = MAGIC_ALL;
2135 		skipchr_keepstart();
2136 		curchr = -1;
2137 		break;
2138 	    case Magic('m'):
2139 		reg_magic = MAGIC_ON;
2140 		skipchr_keepstart();
2141 		curchr = -1;
2142 		break;
2143 	    case Magic('M'):
2144 		reg_magic = MAGIC_OFF;
2145 		skipchr_keepstart();
2146 		curchr = -1;
2147 		break;
2148 	    case Magic('V'):
2149 		reg_magic = MAGIC_NONE;
2150 		skipchr_keepstart();
2151 		curchr = -1;
2152 		break;
2153 
2154 	    default:
2155 		if (nfa_regpiece() == FAIL)
2156 		    return FAIL;
2157 		if (first == FALSE)
2158 		    EMIT(NFA_CONCAT);
2159 		else
2160 		    first = FALSE;
2161 		break;
2162 	}
2163     }
2164 
2165     return OK;
2166 }
2167 
2168 /*
2169  * Parse a branch, one or more concats, separated by "\&".  It matches the
2170  * last concat, but only if all the preceding concats also match at the same
2171  * position.  Examples:
2172  *      "foobeep\&..." matches "foo" in "foobeep".
2173  *      ".*Peter\&.*Bob" matches in a line containing both "Peter" and "Bob"
2174  *
2175  * branch ::=	    concat
2176  *		or  concat \& concat
2177  *		or  concat \& concat \& concat
2178  *		etc.
2179  */
2180     static int
2181 nfa_regbranch()
2182 {
2183     int		ch;
2184     int		old_post_pos;
2185 
2186     old_post_pos = (int)(post_ptr - post_start);
2187 
2188     /* First branch, possibly the only one */
2189     if (nfa_regconcat() == FAIL)
2190 	return FAIL;
2191 
2192     ch = peekchr();
2193     /* Try next concats */
2194     while (ch == Magic('&'))
2195     {
2196 	skipchr();
2197 	EMIT(NFA_NOPEN);
2198 	EMIT(NFA_PREV_ATOM_NO_WIDTH);
2199 	old_post_pos = (int)(post_ptr - post_start);
2200 	if (nfa_regconcat() == FAIL)
2201 	    return FAIL;
2202 	/* if concat is empty do emit a node */
2203 	if (old_post_pos == (int)(post_ptr - post_start))
2204 	    EMIT(NFA_EMPTY);
2205 	EMIT(NFA_CONCAT);
2206 	ch = peekchr();
2207     }
2208 
2209     /* if a branch is empty, emit one node for it */
2210     if (old_post_pos == (int)(post_ptr - post_start))
2211 	EMIT(NFA_EMPTY);
2212 
2213     return OK;
2214 }
2215 
2216 /*
2217  *  Parse a pattern, one or more branches, separated by "\|".  It matches
2218  *  anything that matches one of the branches.  Example: "foo\|beep" matches
2219  *  "foo" and matches "beep".  If more than one branch matches, the first one
2220  *  is used.
2221  *
2222  *  pattern ::=	    branch
2223  *	or  branch \| branch
2224  *	or  branch \| branch \| branch
2225  *	etc.
2226  */
2227     static int
2228 nfa_reg(paren)
2229     int		paren;	/* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
2230 {
2231     int		parno = 0;
2232 
2233     if (paren == REG_PAREN)
2234     {
2235 	if (regnpar >= NSUBEXP) /* Too many `(' */
2236 	    EMSG_RET_FAIL(_("E872: (NFA regexp) Too many '('"));
2237 	parno = regnpar++;
2238     }
2239 #ifdef FEAT_SYN_HL
2240     else if (paren == REG_ZPAREN)
2241     {
2242 	/* Make a ZOPEN node. */
2243 	if (regnzpar >= NSUBEXP)
2244 	    EMSG_RET_FAIL(_("E879: (NFA regexp) Too many \\z("));
2245 	parno = regnzpar++;
2246     }
2247 #endif
2248 
2249     if (nfa_regbranch() == FAIL)
2250 	return FAIL;	    /* cascaded error */
2251 
2252     while (peekchr() == Magic('|'))
2253     {
2254 	skipchr();
2255 	if (nfa_regbranch() == FAIL)
2256 	    return FAIL;    /* cascaded error */
2257 	EMIT(NFA_OR);
2258     }
2259 
2260     /* Check for proper termination. */
2261     if (paren != REG_NOPAREN && getchr() != Magic(')'))
2262     {
2263 	if (paren == REG_NPAREN)
2264 	    EMSG2_RET_FAIL(_(e_unmatchedpp), reg_magic == MAGIC_ALL);
2265 	else
2266 	    EMSG2_RET_FAIL(_(e_unmatchedp), reg_magic == MAGIC_ALL);
2267     }
2268     else if (paren == REG_NOPAREN && peekchr() != NUL)
2269     {
2270 	if (peekchr() == Magic(')'))
2271 	    EMSG2_RET_FAIL(_(e_unmatchedpar), reg_magic == MAGIC_ALL);
2272 	else
2273 	    EMSG_RET_FAIL(_("E873: (NFA regexp) proper termination error"));
2274     }
2275     /*
2276      * Here we set the flag allowing back references to this set of
2277      * parentheses.
2278      */
2279     if (paren == REG_PAREN)
2280     {
2281 	had_endbrace[parno] = TRUE;     /* have seen the close paren */
2282 	EMIT(NFA_MOPEN + parno);
2283     }
2284 #ifdef FEAT_SYN_HL
2285     else if (paren == REG_ZPAREN)
2286 	EMIT(NFA_ZOPEN + parno);
2287 #endif
2288 
2289     return OK;
2290 }
2291 
2292 #ifdef DEBUG
2293 static char_u code[50];
2294 
2295     static void
2296 nfa_set_code(c)
2297     int	    c;
2298 {
2299     int	    addnl = FALSE;
2300 
2301     if (c >= NFA_FIRST_NL && c <= NFA_LAST_NL)
2302     {
2303 	addnl = TRUE;
2304 	c -= NFA_ADD_NL;
2305     }
2306 
2307     STRCPY(code, "");
2308     switch (c)
2309     {
2310 	case NFA_MATCH:	    STRCPY(code, "NFA_MATCH "); break;
2311 	case NFA_SPLIT:	    STRCPY(code, "NFA_SPLIT "); break;
2312 	case NFA_CONCAT:    STRCPY(code, "NFA_CONCAT "); break;
2313 	case NFA_NEWL:	    STRCPY(code, "NFA_NEWL "); break;
2314 	case NFA_ZSTART:    STRCPY(code, "NFA_ZSTART"); break;
2315 	case NFA_ZEND:	    STRCPY(code, "NFA_ZEND"); break;
2316 
2317 	case NFA_BACKREF1:  STRCPY(code, "NFA_BACKREF1"); break;
2318 	case NFA_BACKREF2:  STRCPY(code, "NFA_BACKREF2"); break;
2319 	case NFA_BACKREF3:  STRCPY(code, "NFA_BACKREF3"); break;
2320 	case NFA_BACKREF4:  STRCPY(code, "NFA_BACKREF4"); break;
2321 	case NFA_BACKREF5:  STRCPY(code, "NFA_BACKREF5"); break;
2322 	case NFA_BACKREF6:  STRCPY(code, "NFA_BACKREF6"); break;
2323 	case NFA_BACKREF7:  STRCPY(code, "NFA_BACKREF7"); break;
2324 	case NFA_BACKREF8:  STRCPY(code, "NFA_BACKREF8"); break;
2325 	case NFA_BACKREF9:  STRCPY(code, "NFA_BACKREF9"); break;
2326 #ifdef FEAT_SYN_HL
2327 	case NFA_ZREF1:	    STRCPY(code, "NFA_ZREF1"); break;
2328 	case NFA_ZREF2:	    STRCPY(code, "NFA_ZREF2"); break;
2329 	case NFA_ZREF3:	    STRCPY(code, "NFA_ZREF3"); break;
2330 	case NFA_ZREF4:	    STRCPY(code, "NFA_ZREF4"); break;
2331 	case NFA_ZREF5:	    STRCPY(code, "NFA_ZREF5"); break;
2332 	case NFA_ZREF6:	    STRCPY(code, "NFA_ZREF6"); break;
2333 	case NFA_ZREF7:	    STRCPY(code, "NFA_ZREF7"); break;
2334 	case NFA_ZREF8:	    STRCPY(code, "NFA_ZREF8"); break;
2335 	case NFA_ZREF9:	    STRCPY(code, "NFA_ZREF9"); break;
2336 #endif
2337 	case NFA_SKIP:	    STRCPY(code, "NFA_SKIP"); break;
2338 
2339 	case NFA_PREV_ATOM_NO_WIDTH:
2340 			    STRCPY(code, "NFA_PREV_ATOM_NO_WIDTH"); break;
2341 	case NFA_PREV_ATOM_NO_WIDTH_NEG:
2342 			    STRCPY(code, "NFA_PREV_ATOM_NO_WIDTH_NEG"); break;
2343 	case NFA_PREV_ATOM_JUST_BEFORE:
2344 			    STRCPY(code, "NFA_PREV_ATOM_JUST_BEFORE"); break;
2345 	case NFA_PREV_ATOM_JUST_BEFORE_NEG:
2346 			 STRCPY(code, "NFA_PREV_ATOM_JUST_BEFORE_NEG"); break;
2347 	case NFA_PREV_ATOM_LIKE_PATTERN:
2348 			    STRCPY(code, "NFA_PREV_ATOM_LIKE_PATTERN"); break;
2349 
2350 	case NFA_NOPEN:		    STRCPY(code, "NFA_NOPEN"); break;
2351 	case NFA_NCLOSE:	    STRCPY(code, "NFA_NCLOSE"); break;
2352 	case NFA_START_INVISIBLE:   STRCPY(code, "NFA_START_INVISIBLE"); break;
2353 	case NFA_START_INVISIBLE_FIRST:
2354 			     STRCPY(code, "NFA_START_INVISIBLE_FIRST"); break;
2355 	case NFA_START_INVISIBLE_NEG:
2356 			       STRCPY(code, "NFA_START_INVISIBLE_NEG"); break;
2357 	case NFA_START_INVISIBLE_NEG_FIRST:
2358 			 STRCPY(code, "NFA_START_INVISIBLE_NEG_FIRST"); break;
2359 	case NFA_START_INVISIBLE_BEFORE:
2360 			    STRCPY(code, "NFA_START_INVISIBLE_BEFORE"); break;
2361 	case NFA_START_INVISIBLE_BEFORE_FIRST:
2362 		      STRCPY(code, "NFA_START_INVISIBLE_BEFORE_FIRST"); break;
2363 	case NFA_START_INVISIBLE_BEFORE_NEG:
2364 			STRCPY(code, "NFA_START_INVISIBLE_BEFORE_NEG"); break;
2365 	case NFA_START_INVISIBLE_BEFORE_NEG_FIRST:
2366 		  STRCPY(code, "NFA_START_INVISIBLE_BEFORE_NEG_FIRST"); break;
2367 	case NFA_START_PATTERN:   STRCPY(code, "NFA_START_PATTERN"); break;
2368 	case NFA_END_INVISIBLE:	    STRCPY(code, "NFA_END_INVISIBLE"); break;
2369 	case NFA_END_INVISIBLE_NEG: STRCPY(code, "NFA_END_INVISIBLE_NEG"); break;
2370 	case NFA_END_PATTERN:	    STRCPY(code, "NFA_END_PATTERN"); break;
2371 
2372 	case NFA_COMPOSING:	    STRCPY(code, "NFA_COMPOSING"); break;
2373 	case NFA_END_COMPOSING:	    STRCPY(code, "NFA_END_COMPOSING"); break;
2374 	case NFA_OPT_CHARS:	    STRCPY(code, "NFA_OPT_CHARS"); break;
2375 
2376 	case NFA_MOPEN:
2377 	case NFA_MOPEN1:
2378 	case NFA_MOPEN2:
2379 	case NFA_MOPEN3:
2380 	case NFA_MOPEN4:
2381 	case NFA_MOPEN5:
2382 	case NFA_MOPEN6:
2383 	case NFA_MOPEN7:
2384 	case NFA_MOPEN8:
2385 	case NFA_MOPEN9:
2386 	    STRCPY(code, "NFA_MOPEN(x)");
2387 	    code[10] = c - NFA_MOPEN + '0';
2388 	    break;
2389 	case NFA_MCLOSE:
2390 	case NFA_MCLOSE1:
2391 	case NFA_MCLOSE2:
2392 	case NFA_MCLOSE3:
2393 	case NFA_MCLOSE4:
2394 	case NFA_MCLOSE5:
2395 	case NFA_MCLOSE6:
2396 	case NFA_MCLOSE7:
2397 	case NFA_MCLOSE8:
2398 	case NFA_MCLOSE9:
2399 	    STRCPY(code, "NFA_MCLOSE(x)");
2400 	    code[11] = c - NFA_MCLOSE + '0';
2401 	    break;
2402 #ifdef FEAT_SYN_HL
2403 	case NFA_ZOPEN:
2404 	case NFA_ZOPEN1:
2405 	case NFA_ZOPEN2:
2406 	case NFA_ZOPEN3:
2407 	case NFA_ZOPEN4:
2408 	case NFA_ZOPEN5:
2409 	case NFA_ZOPEN6:
2410 	case NFA_ZOPEN7:
2411 	case NFA_ZOPEN8:
2412 	case NFA_ZOPEN9:
2413 	    STRCPY(code, "NFA_ZOPEN(x)");
2414 	    code[10] = c - NFA_ZOPEN + '0';
2415 	    break;
2416 	case NFA_ZCLOSE:
2417 	case NFA_ZCLOSE1:
2418 	case NFA_ZCLOSE2:
2419 	case NFA_ZCLOSE3:
2420 	case NFA_ZCLOSE4:
2421 	case NFA_ZCLOSE5:
2422 	case NFA_ZCLOSE6:
2423 	case NFA_ZCLOSE7:
2424 	case NFA_ZCLOSE8:
2425 	case NFA_ZCLOSE9:
2426 	    STRCPY(code, "NFA_ZCLOSE(x)");
2427 	    code[11] = c - NFA_ZCLOSE + '0';
2428 	    break;
2429 #endif
2430 	case NFA_EOL:		STRCPY(code, "NFA_EOL "); break;
2431 	case NFA_BOL:		STRCPY(code, "NFA_BOL "); break;
2432 	case NFA_EOW:		STRCPY(code, "NFA_EOW "); break;
2433 	case NFA_BOW:		STRCPY(code, "NFA_BOW "); break;
2434 	case NFA_EOF:		STRCPY(code, "NFA_EOF "); break;
2435 	case NFA_BOF:		STRCPY(code, "NFA_BOF "); break;
2436 	case NFA_LNUM:		STRCPY(code, "NFA_LNUM "); break;
2437 	case NFA_LNUM_GT:	STRCPY(code, "NFA_LNUM_GT "); break;
2438 	case NFA_LNUM_LT:	STRCPY(code, "NFA_LNUM_LT "); break;
2439 	case NFA_COL:		STRCPY(code, "NFA_COL "); break;
2440 	case NFA_COL_GT:	STRCPY(code, "NFA_COL_GT "); break;
2441 	case NFA_COL_LT:	STRCPY(code, "NFA_COL_LT "); break;
2442 	case NFA_VCOL:		STRCPY(code, "NFA_VCOL "); break;
2443 	case NFA_VCOL_GT:	STRCPY(code, "NFA_VCOL_GT "); break;
2444 	case NFA_VCOL_LT:	STRCPY(code, "NFA_VCOL_LT "); break;
2445 	case NFA_MARK:		STRCPY(code, "NFA_MARK "); break;
2446 	case NFA_MARK_GT:	STRCPY(code, "NFA_MARK_GT "); break;
2447 	case NFA_MARK_LT:	STRCPY(code, "NFA_MARK_LT "); break;
2448 	case NFA_CURSOR:	STRCPY(code, "NFA_CURSOR "); break;
2449 	case NFA_VISUAL:	STRCPY(code, "NFA_VISUAL "); break;
2450 	case NFA_ANY_COMPOSING:	STRCPY(code, "NFA_ANY_COMPOSING "); break;
2451 
2452 	case NFA_STAR:		STRCPY(code, "NFA_STAR "); break;
2453 	case NFA_STAR_NONGREEDY: STRCPY(code, "NFA_STAR_NONGREEDY "); break;
2454 	case NFA_QUEST:		STRCPY(code, "NFA_QUEST"); break;
2455 	case NFA_QUEST_NONGREEDY: STRCPY(code, "NFA_QUEST_NON_GREEDY"); break;
2456 	case NFA_EMPTY:		STRCPY(code, "NFA_EMPTY"); break;
2457 	case NFA_OR:		STRCPY(code, "NFA_OR"); break;
2458 
2459 	case NFA_START_COLL:	STRCPY(code, "NFA_START_COLL"); break;
2460 	case NFA_END_COLL:	STRCPY(code, "NFA_END_COLL"); break;
2461 	case NFA_START_NEG_COLL: STRCPY(code, "NFA_START_NEG_COLL"); break;
2462 	case NFA_END_NEG_COLL:	STRCPY(code, "NFA_END_NEG_COLL"); break;
2463 	case NFA_RANGE:		STRCPY(code, "NFA_RANGE"); break;
2464 	case NFA_RANGE_MIN:	STRCPY(code, "NFA_RANGE_MIN"); break;
2465 	case NFA_RANGE_MAX:	STRCPY(code, "NFA_RANGE_MAX"); break;
2466 
2467 	case NFA_CLASS_ALNUM:	STRCPY(code, "NFA_CLASS_ALNUM"); break;
2468 	case NFA_CLASS_ALPHA:	STRCPY(code, "NFA_CLASS_ALPHA"); break;
2469 	case NFA_CLASS_BLANK:	STRCPY(code, "NFA_CLASS_BLANK"); break;
2470 	case NFA_CLASS_CNTRL:	STRCPY(code, "NFA_CLASS_CNTRL"); break;
2471 	case NFA_CLASS_DIGIT:	STRCPY(code, "NFA_CLASS_DIGIT"); break;
2472 	case NFA_CLASS_GRAPH:	STRCPY(code, "NFA_CLASS_GRAPH"); break;
2473 	case NFA_CLASS_LOWER:	STRCPY(code, "NFA_CLASS_LOWER"); break;
2474 	case NFA_CLASS_PRINT:	STRCPY(code, "NFA_CLASS_PRINT"); break;
2475 	case NFA_CLASS_PUNCT:	STRCPY(code, "NFA_CLASS_PUNCT"); break;
2476 	case NFA_CLASS_SPACE:	STRCPY(code, "NFA_CLASS_SPACE"); break;
2477 	case NFA_CLASS_UPPER:	STRCPY(code, "NFA_CLASS_UPPER"); break;
2478 	case NFA_CLASS_XDIGIT:	STRCPY(code, "NFA_CLASS_XDIGIT"); break;
2479 	case NFA_CLASS_TAB:	STRCPY(code, "NFA_CLASS_TAB"); break;
2480 	case NFA_CLASS_RETURN:	STRCPY(code, "NFA_CLASS_RETURN"); break;
2481 	case NFA_CLASS_BACKSPACE:   STRCPY(code, "NFA_CLASS_BACKSPACE"); break;
2482 	case NFA_CLASS_ESCAPE:	STRCPY(code, "NFA_CLASS_ESCAPE"); break;
2483 
2484 	case NFA_ANY:	STRCPY(code, "NFA_ANY"); break;
2485 	case NFA_IDENT:	STRCPY(code, "NFA_IDENT"); break;
2486 	case NFA_SIDENT:STRCPY(code, "NFA_SIDENT"); break;
2487 	case NFA_KWORD:	STRCPY(code, "NFA_KWORD"); break;
2488 	case NFA_SKWORD:STRCPY(code, "NFA_SKWORD"); break;
2489 	case NFA_FNAME:	STRCPY(code, "NFA_FNAME"); break;
2490 	case NFA_SFNAME:STRCPY(code, "NFA_SFNAME"); break;
2491 	case NFA_PRINT:	STRCPY(code, "NFA_PRINT"); break;
2492 	case NFA_SPRINT:STRCPY(code, "NFA_SPRINT"); break;
2493 	case NFA_WHITE:	STRCPY(code, "NFA_WHITE"); break;
2494 	case NFA_NWHITE:STRCPY(code, "NFA_NWHITE"); break;
2495 	case NFA_DIGIT:	STRCPY(code, "NFA_DIGIT"); break;
2496 	case NFA_NDIGIT:STRCPY(code, "NFA_NDIGIT"); break;
2497 	case NFA_HEX:	STRCPY(code, "NFA_HEX"); break;
2498 	case NFA_NHEX:	STRCPY(code, "NFA_NHEX"); break;
2499 	case NFA_OCTAL:	STRCPY(code, "NFA_OCTAL"); break;
2500 	case NFA_NOCTAL:STRCPY(code, "NFA_NOCTAL"); break;
2501 	case NFA_WORD:	STRCPY(code, "NFA_WORD"); break;
2502 	case NFA_NWORD:	STRCPY(code, "NFA_NWORD"); break;
2503 	case NFA_HEAD:	STRCPY(code, "NFA_HEAD"); break;
2504 	case NFA_NHEAD:	STRCPY(code, "NFA_NHEAD"); break;
2505 	case NFA_ALPHA:	STRCPY(code, "NFA_ALPHA"); break;
2506 	case NFA_NALPHA:STRCPY(code, "NFA_NALPHA"); break;
2507 	case NFA_LOWER:	STRCPY(code, "NFA_LOWER"); break;
2508 	case NFA_NLOWER:STRCPY(code, "NFA_NLOWER"); break;
2509 	case NFA_UPPER:	STRCPY(code, "NFA_UPPER"); break;
2510 	case NFA_NUPPER:STRCPY(code, "NFA_NUPPER"); break;
2511 	case NFA_LOWER_IC:  STRCPY(code, "NFA_LOWER_IC"); break;
2512 	case NFA_NLOWER_IC: STRCPY(code, "NFA_NLOWER_IC"); break;
2513 	case NFA_UPPER_IC:  STRCPY(code, "NFA_UPPER_IC"); break;
2514 	case NFA_NUPPER_IC: STRCPY(code, "NFA_NUPPER_IC"); break;
2515 
2516 	default:
2517 	    STRCPY(code, "CHAR(x)");
2518 	    code[5] = c;
2519     }
2520 
2521     if (addnl == TRUE)
2522 	STRCAT(code, " + NEWLINE ");
2523 
2524 }
2525 
2526 #ifdef ENABLE_LOG
2527 static FILE *log_fd;
2528 
2529 /*
2530  * Print the postfix notation of the current regexp.
2531  */
2532     static void
2533 nfa_postfix_dump(expr, retval)
2534     char_u  *expr;
2535     int	    retval;
2536 {
2537     int *p;
2538     FILE *f;
2539 
2540     f = fopen(NFA_REGEXP_DUMP_LOG, "a");
2541     if (f != NULL)
2542     {
2543 	fprintf(f, "\n-------------------------\n");
2544 	if (retval == FAIL)
2545 	    fprintf(f, ">>> NFA engine failed ... \n");
2546 	else if (retval == OK)
2547 	    fprintf(f, ">>> NFA engine succeeded !\n");
2548 	fprintf(f, "Regexp: \"%s\"\nPostfix notation (char): \"", expr);
2549 	for (p = post_start; *p && p < post_ptr; p++)
2550 	{
2551 	    nfa_set_code(*p);
2552 	    fprintf(f, "%s, ", code);
2553 	}
2554 	fprintf(f, "\"\nPostfix notation (int): ");
2555 	for (p = post_start; *p && p < post_ptr; p++)
2556 		fprintf(f, "%d ", *p);
2557 	fprintf(f, "\n\n");
2558 	fclose(f);
2559     }
2560 }
2561 
2562 /*
2563  * Print the NFA starting with a root node "state".
2564  */
2565     static void
2566 nfa_print_state(debugf, state)
2567     FILE *debugf;
2568     nfa_state_T *state;
2569 {
2570     garray_T indent;
2571 
2572     ga_init2(&indent, 1, 64);
2573     ga_append(&indent, '\0');
2574     nfa_print_state2(debugf, state, &indent);
2575     ga_clear(&indent);
2576 }
2577 
2578     static void
2579 nfa_print_state2(debugf, state, indent)
2580     FILE *debugf;
2581     nfa_state_T *state;
2582     garray_T *indent;
2583 {
2584     char_u  *p;
2585 
2586     if (state == NULL)
2587 	return;
2588 
2589     fprintf(debugf, "(%2d)", abs(state->id));
2590 
2591     /* Output indent */
2592     p = (char_u *)indent->ga_data;
2593     if (indent->ga_len >= 3)
2594     {
2595 	int	last = indent->ga_len - 3;
2596 	char_u	save[2];
2597 
2598 	STRNCPY(save, &p[last], 2);
2599 	STRNCPY(&p[last], "+-", 2);
2600 	fprintf(debugf, " %s", p);
2601 	STRNCPY(&p[last], save, 2);
2602     }
2603     else
2604 	fprintf(debugf, " %s", p);
2605 
2606     nfa_set_code(state->c);
2607     fprintf(debugf, "%s (%d) (id=%d) val=%d\n",
2608 		 code,
2609 		 state->c,
2610 		 abs(state->id),
2611 		 state->val);
2612     if (state->id < 0)
2613 	return;
2614 
2615     state->id = abs(state->id) * -1;
2616 
2617     /* grow indent for state->out */
2618     indent->ga_len -= 1;
2619     if (state->out1)
2620 	ga_concat(indent, (char_u *)"| ");
2621     else
2622 	ga_concat(indent, (char_u *)"  ");
2623     ga_append(indent, '\0');
2624 
2625     nfa_print_state2(debugf, state->out, indent);
2626 
2627     /* replace last part of indent for state->out1 */
2628     indent->ga_len -= 3;
2629     ga_concat(indent, (char_u *)"  ");
2630     ga_append(indent, '\0');
2631 
2632     nfa_print_state2(debugf, state->out1, indent);
2633 
2634     /* shrink indent */
2635     indent->ga_len -= 3;
2636     ga_append(indent, '\0');
2637 }
2638 
2639 /*
2640  * Print the NFA state machine.
2641  */
2642     static void
2643 nfa_dump(prog)
2644     nfa_regprog_T *prog;
2645 {
2646     FILE *debugf = fopen(NFA_REGEXP_DUMP_LOG, "a");
2647 
2648     if (debugf != NULL)
2649     {
2650 	nfa_print_state(debugf, prog->start);
2651 
2652 	if (prog->reganch)
2653 	    fprintf(debugf, "reganch: %d\n", prog->reganch);
2654 	if (prog->regstart != NUL)
2655 	    fprintf(debugf, "regstart: %c (decimal: %d)\n",
2656 					      prog->regstart, prog->regstart);
2657 	if (prog->match_text != NULL)
2658 	    fprintf(debugf, "match_text: \"%s\"\n", prog->match_text);
2659 
2660 	fclose(debugf);
2661     }
2662 }
2663 #endif	    /* ENABLE_LOG */
2664 #endif	    /* DEBUG */
2665 
2666 /*
2667  * Parse r.e. @expr and convert it into postfix form.
2668  * Return the postfix string on success, NULL otherwise.
2669  */
2670     static int *
2671 re2post()
2672 {
2673     if (nfa_reg(REG_NOPAREN) == FAIL)
2674 	return NULL;
2675     EMIT(NFA_MOPEN);
2676     return post_start;
2677 }
2678 
2679 /* NB. Some of the code below is inspired by Russ's. */
2680 
2681 /*
2682  * Represents an NFA state plus zero or one or two arrows exiting.
2683  * if c == MATCH, no arrows out; matching state.
2684  * If c == SPLIT, unlabeled arrows to out and out1 (if != NULL).
2685  * If c < 256, labeled arrow with character c to out.
2686  */
2687 
2688 static nfa_state_T	*state_ptr; /* points to nfa_prog->state */
2689 
2690 /*
2691  * Allocate and initialize nfa_state_T.
2692  */
2693     static nfa_state_T *
2694 alloc_state(c, out, out1)
2695     int		c;
2696     nfa_state_T	*out;
2697     nfa_state_T	*out1;
2698 {
2699     nfa_state_T *s;
2700 
2701     if (istate >= nstate)
2702 	return NULL;
2703 
2704     s = &state_ptr[istate++];
2705 
2706     s->c    = c;
2707     s->out  = out;
2708     s->out1 = out1;
2709     s->val  = 0;
2710 
2711     s->id   = istate;
2712     s->lastlist[0] = 0;
2713     s->lastlist[1] = 0;
2714 
2715     return s;
2716 }
2717 
2718 /*
2719  * A partially built NFA without the matching state filled in.
2720  * Frag_T.start points at the start state.
2721  * Frag_T.out is a list of places that need to be set to the
2722  * next state for this fragment.
2723  */
2724 
2725 /* Since the out pointers in the list are always
2726  * uninitialized, we use the pointers themselves
2727  * as storage for the Ptrlists. */
2728 typedef union Ptrlist Ptrlist;
2729 union Ptrlist
2730 {
2731     Ptrlist	*next;
2732     nfa_state_T	*s;
2733 };
2734 
2735 struct Frag
2736 {
2737     nfa_state_T *start;
2738     Ptrlist	*out;
2739 };
2740 typedef struct Frag Frag_T;
2741 
2742 static Frag_T frag __ARGS((nfa_state_T *start, Ptrlist *out));
2743 static Ptrlist *list1 __ARGS((nfa_state_T **outp));
2744 static void patch __ARGS((Ptrlist *l, nfa_state_T *s));
2745 static Ptrlist *append __ARGS((Ptrlist *l1, Ptrlist *l2));
2746 static void st_push __ARGS((Frag_T s, Frag_T **p, Frag_T *stack_end));
2747 static Frag_T st_pop __ARGS((Frag_T **p, Frag_T *stack));
2748 
2749 /*
2750  * Initialize a Frag_T struct and return it.
2751  */
2752     static Frag_T
2753 frag(start, out)
2754     nfa_state_T	*start;
2755     Ptrlist	*out;
2756 {
2757     Frag_T n;
2758 
2759     n.start = start;
2760     n.out = out;
2761     return n;
2762 }
2763 
2764 /*
2765  * Create singleton list containing just outp.
2766  */
2767     static Ptrlist *
2768 list1(outp)
2769     nfa_state_T	**outp;
2770 {
2771     Ptrlist *l;
2772 
2773     l = (Ptrlist *)outp;
2774     l->next = NULL;
2775     return l;
2776 }
2777 
2778 /*
2779  * Patch the list of states at out to point to start.
2780  */
2781     static void
2782 patch(l, s)
2783     Ptrlist	*l;
2784     nfa_state_T	*s;
2785 {
2786     Ptrlist *next;
2787 
2788     for (; l; l = next)
2789     {
2790 	next = l->next;
2791 	l->s = s;
2792     }
2793 }
2794 
2795 
2796 /*
2797  * Join the two lists l1 and l2, returning the combination.
2798  */
2799     static Ptrlist *
2800 append(l1, l2)
2801     Ptrlist *l1;
2802     Ptrlist *l2;
2803 {
2804     Ptrlist *oldl1;
2805 
2806     oldl1 = l1;
2807     while (l1->next)
2808 	l1 = l1->next;
2809     l1->next = l2;
2810     return oldl1;
2811 }
2812 
2813 /*
2814  * Stack used for transforming postfix form into NFA.
2815  */
2816 static Frag_T empty;
2817 
2818     static void
2819 st_error(postfix, end, p)
2820     int *postfix UNUSED;
2821     int *end UNUSED;
2822     int *p UNUSED;
2823 {
2824 #ifdef NFA_REGEXP_ERROR_LOG
2825     FILE *df;
2826     int *p2;
2827 
2828     df = fopen(NFA_REGEXP_ERROR_LOG, "a");
2829     if (df)
2830     {
2831 	fprintf(df, "Error popping the stack!\n");
2832 #ifdef DEBUG
2833 	fprintf(df, "Current regexp is \"%s\"\n", nfa_regengine.expr);
2834 #endif
2835 	fprintf(df, "Postfix form is: ");
2836 #ifdef DEBUG
2837 	for (p2 = postfix; p2 < end; p2++)
2838 	{
2839 	    nfa_set_code(*p2);
2840 	    fprintf(df, "%s, ", code);
2841 	}
2842 	nfa_set_code(*p);
2843 	fprintf(df, "\nCurrent position is: ");
2844 	for (p2 = postfix; p2 <= p; p2 ++)
2845 	{
2846 	    nfa_set_code(*p2);
2847 	    fprintf(df, "%s, ", code);
2848 	}
2849 #else
2850 	for (p2 = postfix; p2 < end; p2++)
2851 	{
2852 	    fprintf(df, "%d, ", *p2);
2853 	}
2854 	fprintf(df, "\nCurrent position is: ");
2855 	for (p2 = postfix; p2 <= p; p2 ++)
2856 	{
2857 	    fprintf(df, "%d, ", *p2);
2858 	}
2859 #endif
2860 	fprintf(df, "\n--------------------------\n");
2861 	fclose(df);
2862     }
2863 #endif
2864     EMSG(_("E874: (NFA) Could not pop the stack !"));
2865 }
2866 
2867 /*
2868  * Push an item onto the stack.
2869  */
2870     static void
2871 st_push(s, p, stack_end)
2872     Frag_T s;
2873     Frag_T **p;
2874     Frag_T *stack_end;
2875 {
2876     Frag_T *stackp = *p;
2877 
2878     if (stackp >= stack_end)
2879 	return;
2880     *stackp = s;
2881     *p = *p + 1;
2882 }
2883 
2884 /*
2885  * Pop an item from the stack.
2886  */
2887     static Frag_T
2888 st_pop(p, stack)
2889     Frag_T **p;
2890     Frag_T *stack;
2891 {
2892     Frag_T *stackp;
2893 
2894     *p = *p - 1;
2895     stackp = *p;
2896     if (stackp < stack)
2897 	return empty;
2898     return **p;
2899 }
2900 
2901 /*
2902  * Estimate the maximum byte length of anything matching "state".
2903  * When unknown or unlimited return -1.
2904  */
2905     static int
2906 nfa_max_width(startstate, depth)
2907     nfa_state_T *startstate;
2908     int		depth;
2909 {
2910     int		    l, r;
2911     nfa_state_T	    *state = startstate;
2912     int		    len = 0;
2913 
2914     /* detect looping in a NFA_SPLIT */
2915     if (depth > 4)
2916 	return -1;
2917 
2918     while (state != NULL)
2919     {
2920 	switch (state->c)
2921 	{
2922 	    case NFA_END_INVISIBLE:
2923 	    case NFA_END_INVISIBLE_NEG:
2924 		/* the end, return what we have */
2925 		return len;
2926 
2927 	    case NFA_SPLIT:
2928 		/* two alternatives, use the maximum */
2929 		l = nfa_max_width(state->out, depth + 1);
2930 		r = nfa_max_width(state->out1, depth + 1);
2931 		if (l < 0 || r < 0)
2932 		    return -1;
2933 		return len + (l > r ? l : r);
2934 
2935 	    case NFA_ANY:
2936 	    case NFA_START_COLL:
2937 	    case NFA_START_NEG_COLL:
2938 		/* matches some character, including composing chars */
2939 #ifdef FEAT_MBYTE
2940 		if (enc_utf8)
2941 		    len += MB_MAXBYTES;
2942 		else if (has_mbyte)
2943 		    len += 2;
2944 		else
2945 #endif
2946 		    ++len;
2947 		if (state->c != NFA_ANY)
2948 		{
2949 		    /* skip over the characters */
2950 		    state = state->out1->out;
2951 		    continue;
2952 		}
2953 		break;
2954 
2955 	    case NFA_DIGIT:
2956 	    case NFA_WHITE:
2957 	    case NFA_HEX:
2958 	    case NFA_OCTAL:
2959 		/* ascii */
2960 		++len;
2961 		break;
2962 
2963 	    case NFA_IDENT:
2964 	    case NFA_SIDENT:
2965 	    case NFA_KWORD:
2966 	    case NFA_SKWORD:
2967 	    case NFA_FNAME:
2968 	    case NFA_SFNAME:
2969 	    case NFA_PRINT:
2970 	    case NFA_SPRINT:
2971 	    case NFA_NWHITE:
2972 	    case NFA_NDIGIT:
2973 	    case NFA_NHEX:
2974 	    case NFA_NOCTAL:
2975 	    case NFA_WORD:
2976 	    case NFA_NWORD:
2977 	    case NFA_HEAD:
2978 	    case NFA_NHEAD:
2979 	    case NFA_ALPHA:
2980 	    case NFA_NALPHA:
2981 	    case NFA_LOWER:
2982 	    case NFA_NLOWER:
2983 	    case NFA_UPPER:
2984 	    case NFA_NUPPER:
2985 	    case NFA_LOWER_IC:
2986 	    case NFA_NLOWER_IC:
2987 	    case NFA_UPPER_IC:
2988 	    case NFA_NUPPER_IC:
2989 	    case NFA_ANY_COMPOSING:
2990 		/* possibly non-ascii */
2991 #ifdef FEAT_MBYTE
2992 		if (has_mbyte)
2993 		    len += 3;
2994 		else
2995 #endif
2996 		    ++len;
2997 		break;
2998 
2999 	    case NFA_START_INVISIBLE:
3000 	    case NFA_START_INVISIBLE_NEG:
3001 	    case NFA_START_INVISIBLE_BEFORE:
3002 	    case NFA_START_INVISIBLE_BEFORE_NEG:
3003 		/* zero-width, out1 points to the END state */
3004 		state = state->out1->out;
3005 		continue;
3006 
3007 	    case NFA_BACKREF1:
3008 	    case NFA_BACKREF2:
3009 	    case NFA_BACKREF3:
3010 	    case NFA_BACKREF4:
3011 	    case NFA_BACKREF5:
3012 	    case NFA_BACKREF6:
3013 	    case NFA_BACKREF7:
3014 	    case NFA_BACKREF8:
3015 	    case NFA_BACKREF9:
3016 #ifdef FEAT_SYN_HL
3017 	    case NFA_ZREF1:
3018 	    case NFA_ZREF2:
3019 	    case NFA_ZREF3:
3020 	    case NFA_ZREF4:
3021 	    case NFA_ZREF5:
3022 	    case NFA_ZREF6:
3023 	    case NFA_ZREF7:
3024 	    case NFA_ZREF8:
3025 	    case NFA_ZREF9:
3026 #endif
3027 	    case NFA_NEWL:
3028 	    case NFA_SKIP:
3029 		/* unknown width */
3030 		return -1;
3031 
3032 	    case NFA_BOL:
3033 	    case NFA_EOL:
3034 	    case NFA_BOF:
3035 	    case NFA_EOF:
3036 	    case NFA_BOW:
3037 	    case NFA_EOW:
3038 	    case NFA_MOPEN:
3039 	    case NFA_MOPEN1:
3040 	    case NFA_MOPEN2:
3041 	    case NFA_MOPEN3:
3042 	    case NFA_MOPEN4:
3043 	    case NFA_MOPEN5:
3044 	    case NFA_MOPEN6:
3045 	    case NFA_MOPEN7:
3046 	    case NFA_MOPEN8:
3047 	    case NFA_MOPEN9:
3048 #ifdef FEAT_SYN_HL
3049 	    case NFA_ZOPEN:
3050 	    case NFA_ZOPEN1:
3051 	    case NFA_ZOPEN2:
3052 	    case NFA_ZOPEN3:
3053 	    case NFA_ZOPEN4:
3054 	    case NFA_ZOPEN5:
3055 	    case NFA_ZOPEN6:
3056 	    case NFA_ZOPEN7:
3057 	    case NFA_ZOPEN8:
3058 	    case NFA_ZOPEN9:
3059 	    case NFA_ZCLOSE:
3060 	    case NFA_ZCLOSE1:
3061 	    case NFA_ZCLOSE2:
3062 	    case NFA_ZCLOSE3:
3063 	    case NFA_ZCLOSE4:
3064 	    case NFA_ZCLOSE5:
3065 	    case NFA_ZCLOSE6:
3066 	    case NFA_ZCLOSE7:
3067 	    case NFA_ZCLOSE8:
3068 	    case NFA_ZCLOSE9:
3069 #endif
3070 	    case NFA_MCLOSE:
3071 	    case NFA_MCLOSE1:
3072 	    case NFA_MCLOSE2:
3073 	    case NFA_MCLOSE3:
3074 	    case NFA_MCLOSE4:
3075 	    case NFA_MCLOSE5:
3076 	    case NFA_MCLOSE6:
3077 	    case NFA_MCLOSE7:
3078 	    case NFA_MCLOSE8:
3079 	    case NFA_MCLOSE9:
3080 	    case NFA_NOPEN:
3081 	    case NFA_NCLOSE:
3082 
3083 	    case NFA_LNUM_GT:
3084 	    case NFA_LNUM_LT:
3085 	    case NFA_COL_GT:
3086 	    case NFA_COL_LT:
3087 	    case NFA_VCOL_GT:
3088 	    case NFA_VCOL_LT:
3089 	    case NFA_MARK_GT:
3090 	    case NFA_MARK_LT:
3091 	    case NFA_VISUAL:
3092 	    case NFA_LNUM:
3093 	    case NFA_CURSOR:
3094 	    case NFA_COL:
3095 	    case NFA_VCOL:
3096 	    case NFA_MARK:
3097 
3098 	    case NFA_ZSTART:
3099 	    case NFA_ZEND:
3100 	    case NFA_OPT_CHARS:
3101 	    case NFA_EMPTY:
3102 	    case NFA_START_PATTERN:
3103 	    case NFA_END_PATTERN:
3104 	    case NFA_COMPOSING:
3105 	    case NFA_END_COMPOSING:
3106 		/* zero-width */
3107 		break;
3108 
3109 	    default:
3110 		if (state->c < 0)
3111 		    /* don't know what this is */
3112 		    return -1;
3113 		/* normal character */
3114 		len += MB_CHAR2LEN(state->c);
3115 		break;
3116 	}
3117 
3118 	/* normal way to continue */
3119 	state = state->out;
3120     }
3121 
3122     /* unrecognized, "cannot happen" */
3123     return -1;
3124 }
3125 
3126 /*
3127  * Convert a postfix form into its equivalent NFA.
3128  * Return the NFA start state on success, NULL otherwise.
3129  */
3130     static nfa_state_T *
3131 post2nfa(postfix, end, nfa_calc_size)
3132     int		*postfix;
3133     int		*end;
3134     int		nfa_calc_size;
3135 {
3136     int		*p;
3137     int		mopen;
3138     int		mclose;
3139     Frag_T	*stack = NULL;
3140     Frag_T	*stackp = NULL;
3141     Frag_T	*stack_end = NULL;
3142     Frag_T	e1;
3143     Frag_T	e2;
3144     Frag_T	e;
3145     nfa_state_T	*s;
3146     nfa_state_T	*s1;
3147     nfa_state_T	*matchstate;
3148     nfa_state_T	*ret = NULL;
3149 
3150     if (postfix == NULL)
3151 	return NULL;
3152 
3153 #define PUSH(s)	    st_push((s), &stackp, stack_end)
3154 #define POP()	    st_pop(&stackp, stack);		\
3155 		    if (stackp < stack)			\
3156 		    {					\
3157 			st_error(postfix, end, p);	\
3158 			vim_free(stack);		\
3159 			return NULL;			\
3160 		    }
3161 
3162     if (nfa_calc_size == FALSE)
3163     {
3164 	/* Allocate space for the stack. Max states on the stack : nstate */
3165 	stack = (Frag_T *)lalloc((nstate + 1) * sizeof(Frag_T), TRUE);
3166 	stackp = stack;
3167 	stack_end = stack + (nstate + 1);
3168     }
3169 
3170     for (p = postfix; p < end; ++p)
3171     {
3172 	switch (*p)
3173 	{
3174 	case NFA_CONCAT:
3175 	    /* Concatenation.
3176 	     * Pay attention: this operator does not exist in the r.e. itself
3177 	     * (it is implicit, really).  It is added when r.e. is translated
3178 	     * to postfix form in re2post(). */
3179 	    if (nfa_calc_size == TRUE)
3180 	    {
3181 		/* nstate += 0; */
3182 		break;
3183 	    }
3184 	    e2 = POP();
3185 	    e1 = POP();
3186 	    patch(e1.out, e2.start);
3187 	    PUSH(frag(e1.start, e2.out));
3188 	    break;
3189 
3190 	case NFA_OR:
3191 	    /* Alternation */
3192 	    if (nfa_calc_size == TRUE)
3193 	    {
3194 		nstate++;
3195 		break;
3196 	    }
3197 	    e2 = POP();
3198 	    e1 = POP();
3199 	    s = alloc_state(NFA_SPLIT, e1.start, e2.start);
3200 	    if (s == NULL)
3201 		goto theend;
3202 	    PUSH(frag(s, append(e1.out, e2.out)));
3203 	    break;
3204 
3205 	case NFA_STAR:
3206 	    /* Zero or more, prefer more */
3207 	    if (nfa_calc_size == TRUE)
3208 	    {
3209 		nstate++;
3210 		break;
3211 	    }
3212 	    e = POP();
3213 	    s = alloc_state(NFA_SPLIT, e.start, NULL);
3214 	    if (s == NULL)
3215 		goto theend;
3216 	    patch(e.out, s);
3217 	    PUSH(frag(s, list1(&s->out1)));
3218 	    break;
3219 
3220 	case NFA_STAR_NONGREEDY:
3221 	    /* Zero or more, prefer zero */
3222 	    if (nfa_calc_size == TRUE)
3223 	    {
3224 		nstate++;
3225 		break;
3226 	    }
3227 	    e = POP();
3228 	    s = alloc_state(NFA_SPLIT, NULL, e.start);
3229 	    if (s == NULL)
3230 		goto theend;
3231 	    patch(e.out, s);
3232 	    PUSH(frag(s, list1(&s->out)));
3233 	    break;
3234 
3235 	case NFA_QUEST:
3236 	    /* one or zero atoms=> greedy match */
3237 	    if (nfa_calc_size == TRUE)
3238 	    {
3239 		nstate++;
3240 		break;
3241 	    }
3242 	    e = POP();
3243 	    s = alloc_state(NFA_SPLIT, e.start, NULL);
3244 	    if (s == NULL)
3245 		goto theend;
3246 	    PUSH(frag(s, append(e.out, list1(&s->out1))));
3247 	    break;
3248 
3249 	case NFA_QUEST_NONGREEDY:
3250 	    /* zero or one atoms => non-greedy match */
3251 	    if (nfa_calc_size == TRUE)
3252 	    {
3253 		nstate++;
3254 		break;
3255 	    }
3256 	    e = POP();
3257 	    s = alloc_state(NFA_SPLIT, NULL, e.start);
3258 	    if (s == NULL)
3259 		goto theend;
3260 	    PUSH(frag(s, append(e.out, list1(&s->out))));
3261 	    break;
3262 
3263 	case NFA_END_COLL:
3264 	case NFA_END_NEG_COLL:
3265 	    /* On the stack is the sequence starting with NFA_START_COLL or
3266 	     * NFA_START_NEG_COLL and all possible characters. Patch it to
3267 	     * add the output to the start. */
3268 	    if (nfa_calc_size == TRUE)
3269 	    {
3270 		nstate++;
3271 		break;
3272 	    }
3273 	    e = POP();
3274 	    s = alloc_state(NFA_END_COLL, NULL, NULL);
3275 	    if (s == NULL)
3276 		goto theend;
3277 	    patch(e.out, s);
3278 	    e.start->out1 = s;
3279 	    PUSH(frag(e.start, list1(&s->out)));
3280 	    break;
3281 
3282 	case NFA_RANGE:
3283 	    /* Before this are two characters, the low and high end of a
3284 	     * range.  Turn them into two states with MIN and MAX. */
3285 	    if (nfa_calc_size == TRUE)
3286 	    {
3287 		/* nstate += 0; */
3288 		break;
3289 	    }
3290 	    e2 = POP();
3291 	    e1 = POP();
3292 	    e2.start->val = e2.start->c;
3293 	    e2.start->c = NFA_RANGE_MAX;
3294 	    e1.start->val = e1.start->c;
3295 	    e1.start->c = NFA_RANGE_MIN;
3296 	    patch(e1.out, e2.start);
3297 	    PUSH(frag(e1.start, e2.out));
3298 	    break;
3299 
3300 	case NFA_EMPTY:
3301 	    /* 0-length, used in a repetition with max/min count of 0 */
3302 	    if (nfa_calc_size == TRUE)
3303 	    {
3304 		nstate++;
3305 		break;
3306 	    }
3307 	    s = alloc_state(NFA_EMPTY, NULL, NULL);
3308 	    if (s == NULL)
3309 		goto theend;
3310 	    PUSH(frag(s, list1(&s->out)));
3311 	    break;
3312 
3313 	case NFA_OPT_CHARS:
3314 	  {
3315 	    int    n;
3316 
3317 	    /* \%[abc] implemented as:
3318 	     *    NFA_SPLIT
3319 	     *    +-CHAR(a)
3320 	     *    | +-NFA_SPLIT
3321 	     *    |   +-CHAR(b)
3322 	     *    |   | +-NFA_SPLIT
3323 	     *    |   |   +-CHAR(c)
3324 	     *    |   |   | +-next
3325 	     *    |   |   +- next
3326 	     *    |   +- next
3327 	     *    +- next
3328 	     */
3329 	    n = *++p; /* get number of characters */
3330 	    if (nfa_calc_size == TRUE)
3331 	    {
3332 		nstate += n;
3333 		break;
3334 	    }
3335 	    s = NULL; /* avoid compiler warning */
3336 	    e1.out = NULL; /* stores list with out1's */
3337 	    s1 = NULL; /* previous NFA_SPLIT to connect to */
3338 	    while (n-- > 0)
3339 	    {
3340 		e = POP(); /* get character */
3341 		s = alloc_state(NFA_SPLIT, e.start, NULL);
3342 		if (s == NULL)
3343 		    goto theend;
3344 		if (e1.out == NULL)
3345 		    e1 = e;
3346 		patch(e.out, s1);
3347 		append(e1.out, list1(&s->out1));
3348 		s1 = s;
3349 	    }
3350 	    PUSH(frag(s, e1.out));
3351 	    break;
3352 	  }
3353 
3354 	case NFA_PREV_ATOM_NO_WIDTH:
3355 	case NFA_PREV_ATOM_NO_WIDTH_NEG:
3356 	case NFA_PREV_ATOM_JUST_BEFORE:
3357 	case NFA_PREV_ATOM_JUST_BEFORE_NEG:
3358 	case NFA_PREV_ATOM_LIKE_PATTERN:
3359 	  {
3360 	    int before = (*p == NFA_PREV_ATOM_JUST_BEFORE
3361 				      || *p == NFA_PREV_ATOM_JUST_BEFORE_NEG);
3362 	    int pattern = (*p == NFA_PREV_ATOM_LIKE_PATTERN);
3363 	    int start_state;
3364 	    int end_state;
3365 	    int n = 0;
3366 	    nfa_state_T *zend;
3367 	    nfa_state_T *skip;
3368 
3369 	    switch (*p)
3370 	    {
3371 		case NFA_PREV_ATOM_NO_WIDTH:
3372 		    start_state = NFA_START_INVISIBLE;
3373 		    end_state = NFA_END_INVISIBLE;
3374 		    break;
3375 		case NFA_PREV_ATOM_NO_WIDTH_NEG:
3376 		    start_state = NFA_START_INVISIBLE_NEG;
3377 		    end_state = NFA_END_INVISIBLE_NEG;
3378 		    break;
3379 		case NFA_PREV_ATOM_JUST_BEFORE:
3380 		    start_state = NFA_START_INVISIBLE_BEFORE;
3381 		    end_state = NFA_END_INVISIBLE;
3382 		    break;
3383 		case NFA_PREV_ATOM_JUST_BEFORE_NEG:
3384 		    start_state = NFA_START_INVISIBLE_BEFORE_NEG;
3385 		    end_state = NFA_END_INVISIBLE_NEG;
3386 		    break;
3387 		default: /* NFA_PREV_ATOM_LIKE_PATTERN: */
3388 		    start_state = NFA_START_PATTERN;
3389 		    end_state = NFA_END_PATTERN;
3390 		    break;
3391 	    }
3392 
3393 	    if (before)
3394 		n = *++p; /* get the count */
3395 
3396 	    /* The \@= operator: match the preceding atom with zero width.
3397 	     * The \@! operator: no match for the preceding atom.
3398 	     * The \@<= operator: match for the preceding atom.
3399 	     * The \@<! operator: no match for the preceding atom.
3400 	     * Surrounds the preceding atom with START_INVISIBLE and
3401 	     * END_INVISIBLE, similarly to MOPEN. */
3402 
3403 	    if (nfa_calc_size == TRUE)
3404 	    {
3405 		nstate += pattern ? 4 : 2;
3406 		break;
3407 	    }
3408 	    e = POP();
3409 	    s1 = alloc_state(end_state, NULL, NULL);
3410 	    if (s1 == NULL)
3411 		goto theend;
3412 
3413 	    s = alloc_state(start_state, e.start, s1);
3414 	    if (s == NULL)
3415 		goto theend;
3416 	    if (pattern)
3417 	    {
3418 		/* NFA_ZEND -> NFA_END_PATTERN -> NFA_SKIP -> what follows. */
3419 		skip = alloc_state(NFA_SKIP, NULL, NULL);
3420 		zend = alloc_state(NFA_ZEND, s1, NULL);
3421 		s1->out= skip;
3422 		patch(e.out, zend);
3423 		PUSH(frag(s, list1(&skip->out)));
3424 	    }
3425 	    else
3426 	    {
3427 		patch(e.out, s1);
3428 		PUSH(frag(s, list1(&s1->out)));
3429 		if (before)
3430 		{
3431 		    if (n <= 0)
3432 			/* See if we can guess the maximum width, it avoids a
3433 			 * lot of pointless tries. */
3434 			n = nfa_max_width(e.start, 0);
3435 		    s->val = n; /* store the count */
3436 		}
3437 	    }
3438 	    break;
3439 	  }
3440 
3441 #ifdef FEAT_MBYTE
3442 	case NFA_COMPOSING:	/* char with composing char */
3443 #if 0
3444 	    /* TODO */
3445 	    if (regflags & RF_ICOMBINE)
3446 	    {
3447 		/* use the base character only */
3448 	    }
3449 #endif
3450 	    /* FALLTHROUGH */
3451 #endif
3452 
3453 	case NFA_MOPEN:	/* \( \) Submatch */
3454 	case NFA_MOPEN1:
3455 	case NFA_MOPEN2:
3456 	case NFA_MOPEN3:
3457 	case NFA_MOPEN4:
3458 	case NFA_MOPEN5:
3459 	case NFA_MOPEN6:
3460 	case NFA_MOPEN7:
3461 	case NFA_MOPEN8:
3462 	case NFA_MOPEN9:
3463 #ifdef FEAT_SYN_HL
3464 	case NFA_ZOPEN:	/* \z( \) Submatch */
3465 	case NFA_ZOPEN1:
3466 	case NFA_ZOPEN2:
3467 	case NFA_ZOPEN3:
3468 	case NFA_ZOPEN4:
3469 	case NFA_ZOPEN5:
3470 	case NFA_ZOPEN6:
3471 	case NFA_ZOPEN7:
3472 	case NFA_ZOPEN8:
3473 	case NFA_ZOPEN9:
3474 #endif
3475 	case NFA_NOPEN:	/* \%( \) "Invisible Submatch" */
3476 	    if (nfa_calc_size == TRUE)
3477 	    {
3478 		nstate += 2;
3479 		break;
3480 	    }
3481 
3482 	    mopen = *p;
3483 	    switch (*p)
3484 	    {
3485 		case NFA_NOPEN: mclose = NFA_NCLOSE; break;
3486 #ifdef FEAT_SYN_HL
3487 		case NFA_ZOPEN: mclose = NFA_ZCLOSE; break;
3488 		case NFA_ZOPEN1: mclose = NFA_ZCLOSE1; break;
3489 		case NFA_ZOPEN2: mclose = NFA_ZCLOSE2; break;
3490 		case NFA_ZOPEN3: mclose = NFA_ZCLOSE3; break;
3491 		case NFA_ZOPEN4: mclose = NFA_ZCLOSE4; break;
3492 		case NFA_ZOPEN5: mclose = NFA_ZCLOSE5; break;
3493 		case NFA_ZOPEN6: mclose = NFA_ZCLOSE6; break;
3494 		case NFA_ZOPEN7: mclose = NFA_ZCLOSE7; break;
3495 		case NFA_ZOPEN8: mclose = NFA_ZCLOSE8; break;
3496 		case NFA_ZOPEN9: mclose = NFA_ZCLOSE9; break;
3497 #endif
3498 #ifdef FEAT_MBYTE
3499 		case NFA_COMPOSING: mclose = NFA_END_COMPOSING; break;
3500 #endif
3501 		default:
3502 		    /* NFA_MOPEN, NFA_MOPEN1 .. NFA_MOPEN9 */
3503 		    mclose = *p + NSUBEXP;
3504 		    break;
3505 	    }
3506 
3507 	    /* Allow "NFA_MOPEN" as a valid postfix representation for
3508 	     * the empty regexp "". In this case, the NFA will be
3509 	     * NFA_MOPEN -> NFA_MCLOSE. Note that this also allows
3510 	     * empty groups of parenthesis, and empty mbyte chars */
3511 	    if (stackp == stack)
3512 	    {
3513 		s = alloc_state(mopen, NULL, NULL);
3514 		if (s == NULL)
3515 		    goto theend;
3516 		s1 = alloc_state(mclose, NULL, NULL);
3517 		if (s1 == NULL)
3518 		    goto theend;
3519 		patch(list1(&s->out), s1);
3520 		PUSH(frag(s, list1(&s1->out)));
3521 		break;
3522 	    }
3523 
3524 	    /* At least one node was emitted before NFA_MOPEN, so
3525 	     * at least one node will be between NFA_MOPEN and NFA_MCLOSE */
3526 	    e = POP();
3527 	    s = alloc_state(mopen, e.start, NULL);   /* `(' */
3528 	    if (s == NULL)
3529 		goto theend;
3530 
3531 	    s1 = alloc_state(mclose, NULL, NULL);   /* `)' */
3532 	    if (s1 == NULL)
3533 		goto theend;
3534 	    patch(e.out, s1);
3535 
3536 #ifdef FEAT_MBYTE
3537 	    if (mopen == NFA_COMPOSING)
3538 		/* COMPOSING->out1 = END_COMPOSING */
3539 		patch(list1(&s->out1), s1);
3540 #endif
3541 
3542 	    PUSH(frag(s, list1(&s1->out)));
3543 	    break;
3544 
3545 	case NFA_BACKREF1:
3546 	case NFA_BACKREF2:
3547 	case NFA_BACKREF3:
3548 	case NFA_BACKREF4:
3549 	case NFA_BACKREF5:
3550 	case NFA_BACKREF6:
3551 	case NFA_BACKREF7:
3552 	case NFA_BACKREF8:
3553 	case NFA_BACKREF9:
3554 #ifdef FEAT_SYN_HL
3555 	case NFA_ZREF1:
3556 	case NFA_ZREF2:
3557 	case NFA_ZREF3:
3558 	case NFA_ZREF4:
3559 	case NFA_ZREF5:
3560 	case NFA_ZREF6:
3561 	case NFA_ZREF7:
3562 	case NFA_ZREF8:
3563 	case NFA_ZREF9:
3564 #endif
3565 	    if (nfa_calc_size == TRUE)
3566 	    {
3567 		nstate += 2;
3568 		break;
3569 	    }
3570 	    s = alloc_state(*p, NULL, NULL);
3571 	    if (s == NULL)
3572 		goto theend;
3573 	    s1 = alloc_state(NFA_SKIP, NULL, NULL);
3574 	    if (s1 == NULL)
3575 		goto theend;
3576 	    patch(list1(&s->out), s1);
3577 	    PUSH(frag(s, list1(&s1->out)));
3578 	    break;
3579 
3580 	case NFA_LNUM:
3581 	case NFA_LNUM_GT:
3582 	case NFA_LNUM_LT:
3583 	case NFA_VCOL:
3584 	case NFA_VCOL_GT:
3585 	case NFA_VCOL_LT:
3586 	case NFA_COL:
3587 	case NFA_COL_GT:
3588 	case NFA_COL_LT:
3589 	case NFA_MARK:
3590 	case NFA_MARK_GT:
3591 	case NFA_MARK_LT:
3592 	  {
3593 	    int n = *++p; /* lnum, col or mark name */
3594 
3595 	    if (nfa_calc_size == TRUE)
3596 	    {
3597 		nstate += 1;
3598 		break;
3599 	    }
3600 	    s = alloc_state(p[-1], NULL, NULL);
3601 	    if (s == NULL)
3602 		goto theend;
3603 	    s->val = n;
3604 	    PUSH(frag(s, list1(&s->out)));
3605 	    break;
3606 	  }
3607 
3608 	case NFA_ZSTART:
3609 	case NFA_ZEND:
3610 	default:
3611 	    /* Operands */
3612 	    if (nfa_calc_size == TRUE)
3613 	    {
3614 		nstate++;
3615 		break;
3616 	    }
3617 	    s = alloc_state(*p, NULL, NULL);
3618 	    if (s == NULL)
3619 		goto theend;
3620 	    PUSH(frag(s, list1(&s->out)));
3621 	    break;
3622 
3623 	} /* switch(*p) */
3624 
3625     } /* for(p = postfix; *p; ++p) */
3626 
3627     if (nfa_calc_size == TRUE)
3628     {
3629 	nstate++;
3630 	goto theend;	/* Return value when counting size is ignored anyway */
3631     }
3632 
3633     e = POP();
3634     if (stackp != stack)
3635     {
3636 	vim_free(stack);
3637 	EMSG_RET_NULL(_("E875: (NFA regexp) (While converting from postfix to NFA), too many states left on stack"));
3638     }
3639 
3640     if (istate >= nstate)
3641     {
3642 	vim_free(stack);
3643 	EMSG_RET_NULL(_("E876: (NFA regexp) Not enough space to store the whole NFA "));
3644     }
3645 
3646     matchstate = &state_ptr[istate++]; /* the match state */
3647     matchstate->c = NFA_MATCH;
3648     matchstate->out = matchstate->out1 = NULL;
3649     matchstate->id = 0;
3650 
3651     patch(e.out, matchstate);
3652     ret = e.start;
3653 
3654 theend:
3655     vim_free(stack);
3656     return ret;
3657 
3658 #undef POP1
3659 #undef PUSH1
3660 #undef POP2
3661 #undef PUSH2
3662 #undef POP
3663 #undef PUSH
3664 }
3665 
3666 /*
3667  * After building the NFA program, inspect it to add optimization hints.
3668  */
3669     static void
3670 nfa_postprocess(prog)
3671     nfa_regprog_T   *prog;
3672 {
3673     int i;
3674     int c;
3675 
3676     for (i = 0; i < prog->nstate; ++i)
3677     {
3678 	c = prog->state[i].c;
3679 	if (c == NFA_START_INVISIBLE
3680 		|| c == NFA_START_INVISIBLE_NEG
3681 		|| c == NFA_START_INVISIBLE_BEFORE
3682 		|| c == NFA_START_INVISIBLE_BEFORE_NEG)
3683 	{
3684 	    int directly;
3685 
3686 	    /* Do it directly when what follows is possibly the end of the
3687 	     * match. */
3688 	    if (match_follows(prog->state[i].out1->out, 0))
3689 		directly = TRUE;
3690 	    else
3691 	    {
3692 		int ch_invisible = failure_chance(prog->state[i].out, 0);
3693 		int ch_follows = failure_chance(prog->state[i].out1->out, 0);
3694 
3695 		/* Postpone when the invisible match is expensive or has a
3696 		 * lower chance of failing. */
3697 		if (c == NFA_START_INVISIBLE_BEFORE
3698 		     || c == NFA_START_INVISIBLE_BEFORE_NEG)
3699 		{
3700 		    /* "before" matches are very expensive when
3701 		     * unbounded, always prefer what follows then,
3702 		     * unless what follows will always match.
3703 		     * Otherwise strongly prefer what follows. */
3704 		    if (prog->state[i].val <= 0 && ch_follows > 0)
3705 			directly = FALSE;
3706 		    else
3707 			directly = ch_follows * 10 < ch_invisible;
3708 		}
3709 		else
3710 		{
3711 		    /* normal invisible, first do the one with the
3712 		     * highest failure chance */
3713 		    directly = ch_follows < ch_invisible;
3714 		}
3715 	    }
3716 	    if (directly)
3717 		/* switch to the _FIRST state */
3718 		++prog->state[i].c;
3719 	}
3720     }
3721 }
3722 
3723 /****************************************************************
3724  * NFA execution code.
3725  ****************************************************************/
3726 
3727 typedef struct
3728 {
3729     int	    in_use; /* number of subexpr with useful info */
3730 
3731     /* When REG_MULTI is TRUE list.multi is used, otherwise list.line. */
3732     union
3733     {
3734 	struct multipos
3735 	{
3736 	    linenr_T	start_lnum;
3737 	    linenr_T	end_lnum;
3738 	    colnr_T	start_col;
3739 	    colnr_T	end_col;
3740 	} multi[NSUBEXP];
3741 	struct linepos
3742 	{
3743 	    char_u	*start;
3744 	    char_u	*end;
3745 	} line[NSUBEXP];
3746     } list;
3747 } regsub_T;
3748 
3749 typedef struct
3750 {
3751     regsub_T	norm; /* \( .. \) matches */
3752 #ifdef FEAT_SYN_HL
3753     regsub_T	synt; /* \z( .. \) matches */
3754 #endif
3755 } regsubs_T;
3756 
3757 /* nfa_pim_T stores a Postponed Invisible Match. */
3758 typedef struct nfa_pim_S nfa_pim_T;
3759 struct nfa_pim_S
3760 {
3761     int		result;		/* NFA_PIM_*, see below */
3762     nfa_state_T	*state;		/* the invisible match start state */
3763     regsubs_T	subs;		/* submatch info, only party used */
3764     union
3765     {
3766 	lpos_T	pos;
3767 	char_u	*ptr;
3768     } end;			/* where the match must end */
3769 };
3770 
3771 /* Values for done in nfa_pim_T. */
3772 #define NFA_PIM_UNUSED   0	/* pim not used */
3773 #define NFA_PIM_TODO     1	/* pim not done yet */
3774 #define NFA_PIM_MATCH    2	/* pim executed, matches */
3775 #define NFA_PIM_NOMATCH  3	/* pim executed, no match */
3776 
3777 
3778 /* nfa_thread_T contains execution information of a NFA state */
3779 typedef struct
3780 {
3781     nfa_state_T	*state;
3782     int		count;
3783     nfa_pim_T	pim;		/* if pim.result != NFA_PIM_UNUSED: postponed
3784 				 * invisible match */
3785     regsubs_T	subs;		/* submatch info, only party used */
3786 } nfa_thread_T;
3787 
3788 /* nfa_list_T contains the alternative NFA execution states. */
3789 typedef struct
3790 {
3791     nfa_thread_T    *t;		/* allocated array of states */
3792     int		    n;		/* nr of states currently in "t" */
3793     int		    len;	/* max nr of states in "t" */
3794     int		    id;		/* ID of the list */
3795     int		    has_pim;	/* TRUE when any state has a PIM */
3796 } nfa_list_T;
3797 
3798 #ifdef ENABLE_LOG
3799 static void log_subsexpr __ARGS((regsubs_T *subs));
3800 static void log_subexpr __ARGS((regsub_T *sub));
3801 static char *pim_info __ARGS((nfa_pim_T *pim));
3802 
3803     static void
3804 log_subsexpr(subs)
3805     regsubs_T *subs;
3806 {
3807     log_subexpr(&subs->norm);
3808 # ifdef FEAT_SYN_HL
3809     if (nfa_has_zsubexpr)
3810 	log_subexpr(&subs->synt);
3811 # endif
3812 }
3813 
3814     static void
3815 log_subexpr(sub)
3816     regsub_T *sub;
3817 {
3818     int j;
3819 
3820     for (j = 0; j < sub->in_use; j++)
3821 	if (REG_MULTI)
3822 	    fprintf(log_fd, "*** group %d, start: c=%d, l=%d, end: c=%d, l=%d\n",
3823 		    j,
3824 		    sub->list.multi[j].start_col,
3825 		    (int)sub->list.multi[j].start_lnum,
3826 		    sub->list.multi[j].end_col,
3827 		    (int)sub->list.multi[j].end_lnum);
3828 	else
3829 	{
3830 	    char *s = (char *)sub->list.line[j].start;
3831 	    char *e = (char *)sub->list.line[j].end;
3832 
3833 	    fprintf(log_fd, "*** group %d, start: \"%s\", end: \"%s\"\n",
3834 		    j,
3835 		    s == NULL ? "NULL" : s,
3836 		    e == NULL ? "NULL" : e);
3837 	}
3838 }
3839 
3840     static char *
3841 pim_info(pim)
3842     nfa_pim_T *pim;
3843 {
3844     static char buf[30];
3845 
3846     if (pim == NULL || pim->result == NFA_PIM_UNUSED)
3847 	buf[0] = NUL;
3848     else
3849     {
3850 	sprintf(buf, " PIM col %d", REG_MULTI ? (int)pim->end.pos.col
3851 		: (int)(pim->end.ptr - reginput));
3852     }
3853     return buf;
3854 }
3855 
3856 #endif
3857 
3858 /* Used during execution: whether a match has been found. */
3859 static int nfa_match;
3860 #ifdef FEAT_RELTIME
3861 static proftime_T  *nfa_time_limit;
3862 static int         nfa_time_count;
3863 #endif
3864 
3865 static void copy_pim __ARGS((nfa_pim_T *to, nfa_pim_T *from));
3866 static void clear_sub __ARGS((regsub_T *sub));
3867 static void copy_sub __ARGS((regsub_T *to, regsub_T *from));
3868 static void copy_sub_off __ARGS((regsub_T *to, regsub_T *from));
3869 static void copy_ze_off __ARGS((regsub_T *to, regsub_T *from));
3870 static int sub_equal __ARGS((regsub_T *sub1, regsub_T *sub2));
3871 static int match_backref __ARGS((regsub_T *sub, int subidx, int *bytelen));
3872 static int has_state_with_pos __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs, nfa_pim_T *pim));
3873 static int pim_equal __ARGS((nfa_pim_T *one, nfa_pim_T *two));
3874 static int state_in_list __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs));
3875 static regsubs_T *addstate __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs_arg, nfa_pim_T *pim, int off));
3876 static void addstate_here __ARGS((nfa_list_T *l, nfa_state_T *state, regsubs_T *subs, nfa_pim_T *pim, int *ip));
3877 
3878 /*
3879  * Copy postponed invisible match info from "from" to "to".
3880  */
3881     static void
3882 copy_pim(to, from)
3883     nfa_pim_T *to;
3884     nfa_pim_T *from;
3885 {
3886     to->result = from->result;
3887     to->state = from->state;
3888     copy_sub(&to->subs.norm, &from->subs.norm);
3889 #ifdef FEAT_SYN_HL
3890     if (nfa_has_zsubexpr)
3891 	copy_sub(&to->subs.synt, &from->subs.synt);
3892 #endif
3893     to->end = from->end;
3894 }
3895 
3896     static void
3897 clear_sub(sub)
3898     regsub_T *sub;
3899 {
3900     if (REG_MULTI)
3901 	/* Use 0xff to set lnum to -1 */
3902 	vim_memset(sub->list.multi, 0xff,
3903 				      sizeof(struct multipos) * nfa_nsubexpr);
3904     else
3905 	vim_memset(sub->list.line, 0, sizeof(struct linepos) * nfa_nsubexpr);
3906     sub->in_use = 0;
3907 }
3908 
3909 /*
3910  * Copy the submatches from "from" to "to".
3911  */
3912     static void
3913 copy_sub(to, from)
3914     regsub_T	*to;
3915     regsub_T	*from;
3916 {
3917     to->in_use = from->in_use;
3918     if (from->in_use > 0)
3919     {
3920 	/* Copy the match start and end positions. */
3921 	if (REG_MULTI)
3922 	    mch_memmove(&to->list.multi[0],
3923 			&from->list.multi[0],
3924 			sizeof(struct multipos) * from->in_use);
3925 	else
3926 	    mch_memmove(&to->list.line[0],
3927 			&from->list.line[0],
3928 			sizeof(struct linepos) * from->in_use);
3929     }
3930 }
3931 
3932 /*
3933  * Like copy_sub() but exclude the main match.
3934  */
3935     static void
3936 copy_sub_off(to, from)
3937     regsub_T	*to;
3938     regsub_T	*from;
3939 {
3940     if (to->in_use < from->in_use)
3941 	to->in_use = from->in_use;
3942     if (from->in_use > 1)
3943     {
3944 	/* Copy the match start and end positions. */
3945 	if (REG_MULTI)
3946 	    mch_memmove(&to->list.multi[1],
3947 			&from->list.multi[1],
3948 			sizeof(struct multipos) * (from->in_use - 1));
3949 	else
3950 	    mch_memmove(&to->list.line[1],
3951 			&from->list.line[1],
3952 			sizeof(struct linepos) * (from->in_use - 1));
3953     }
3954 }
3955 
3956 /*
3957  * Like copy_sub() but only do the end of the main match if \ze is present.
3958  */
3959     static void
3960 copy_ze_off(to, from)
3961     regsub_T	*to;
3962     regsub_T	*from;
3963 {
3964     if (nfa_has_zend)
3965     {
3966 	if (REG_MULTI)
3967 	{
3968 	    if (from->list.multi[0].end_lnum >= 0)
3969             {
3970 		to->list.multi[0].end_lnum = from->list.multi[0].end_lnum;
3971 		to->list.multi[0].end_col = from->list.multi[0].end_col;
3972             }
3973 	}
3974 	else
3975 	{
3976 	    if (from->list.line[0].end != NULL)
3977 		to->list.line[0].end = from->list.line[0].end;
3978 	}
3979     }
3980 }
3981 
3982 /*
3983  * Return TRUE if "sub1" and "sub2" have the same start positions.
3984  * When using back-references also check the end position.
3985  */
3986     static int
3987 sub_equal(sub1, sub2)
3988     regsub_T	*sub1;
3989     regsub_T	*sub2;
3990 {
3991     int		i;
3992     int		todo;
3993     linenr_T	s1;
3994     linenr_T	s2;
3995     char_u	*sp1;
3996     char_u	*sp2;
3997 
3998     todo = sub1->in_use > sub2->in_use ? sub1->in_use : sub2->in_use;
3999     if (REG_MULTI)
4000     {
4001 	for (i = 0; i < todo; ++i)
4002 	{
4003 	    if (i < sub1->in_use)
4004 		s1 = sub1->list.multi[i].start_lnum;
4005 	    else
4006 		s1 = -1;
4007 	    if (i < sub2->in_use)
4008 		s2 = sub2->list.multi[i].start_lnum;
4009 	    else
4010 		s2 = -1;
4011 	    if (s1 != s2)
4012 		return FALSE;
4013 	    if (s1 != -1 && sub1->list.multi[i].start_col
4014 					     != sub2->list.multi[i].start_col)
4015 		return FALSE;
4016 
4017 	    if (nfa_has_backref)
4018 	    {
4019 		if (i < sub1->in_use)
4020 		    s1 = sub1->list.multi[i].end_lnum;
4021 		else
4022 		    s1 = -1;
4023 		if (i < sub2->in_use)
4024 		    s2 = sub2->list.multi[i].end_lnum;
4025 		else
4026 		    s2 = -1;
4027 		if (s1 != s2)
4028 		    return FALSE;
4029 		if (s1 != -1 && sub1->list.multi[i].end_col
4030 					       != sub2->list.multi[i].end_col)
4031 		return FALSE;
4032 	    }
4033 	}
4034     }
4035     else
4036     {
4037 	for (i = 0; i < todo; ++i)
4038 	{
4039 	    if (i < sub1->in_use)
4040 		sp1 = sub1->list.line[i].start;
4041 	    else
4042 		sp1 = NULL;
4043 	    if (i < sub2->in_use)
4044 		sp2 = sub2->list.line[i].start;
4045 	    else
4046 		sp2 = NULL;
4047 	    if (sp1 != sp2)
4048 		return FALSE;
4049 	    if (nfa_has_backref)
4050 	    {
4051 		if (i < sub1->in_use)
4052 		    sp1 = sub1->list.line[i].end;
4053 		else
4054 		    sp1 = NULL;
4055 		if (i < sub2->in_use)
4056 		    sp2 = sub2->list.line[i].end;
4057 		else
4058 		    sp2 = NULL;
4059 		if (sp1 != sp2)
4060 		    return FALSE;
4061 	    }
4062 	}
4063     }
4064 
4065     return TRUE;
4066 }
4067 
4068 #ifdef ENABLE_LOG
4069     static void
4070 report_state(char *action,
4071 	     regsub_T *sub,
4072 	     nfa_state_T *state,
4073 	     int lid,
4074 	     nfa_pim_T *pim)
4075 {
4076     int col;
4077 
4078     if (sub->in_use <= 0)
4079 	col = -1;
4080     else if (REG_MULTI)
4081 	col = sub->list.multi[0].start_col;
4082     else
4083 	col = (int)(sub->list.line[0].start - regline);
4084     nfa_set_code(state->c);
4085     fprintf(log_fd, "> %s state %d to list %d. char %d: %s (start col %d)%s\n",
4086 	    action, abs(state->id), lid, state->c, code, col,
4087 	    pim_info(pim));
4088 }
4089 #endif
4090 
4091 /*
4092  * Return TRUE if the same state is already in list "l" with the same
4093  * positions as "subs".
4094  */
4095     static int
4096 has_state_with_pos(l, state, subs, pim)
4097     nfa_list_T		*l;	/* runtime state list */
4098     nfa_state_T		*state;	/* state to update */
4099     regsubs_T		*subs;	/* pointers to subexpressions */
4100     nfa_pim_T		*pim;	/* postponed match or NULL */
4101 {
4102     nfa_thread_T	*thread;
4103     int			i;
4104 
4105     for (i = 0; i < l->n; ++i)
4106     {
4107 	thread = &l->t[i];
4108 	if (thread->state->id == state->id
4109 		&& sub_equal(&thread->subs.norm, &subs->norm)
4110 #ifdef FEAT_SYN_HL
4111 		&& (!nfa_has_zsubexpr
4112 				|| sub_equal(&thread->subs.synt, &subs->synt))
4113 #endif
4114 		&& pim_equal(&thread->pim, pim))
4115 	    return TRUE;
4116     }
4117     return FALSE;
4118 }
4119 
4120 /*
4121  * Return TRUE if "one" and "two" are equal.  That includes when both are not
4122  * set.
4123  */
4124     static int
4125 pim_equal(one, two)
4126     nfa_pim_T *one;
4127     nfa_pim_T *two;
4128 {
4129     int one_unused = (one == NULL || one->result == NFA_PIM_UNUSED);
4130     int two_unused = (two == NULL || two->result == NFA_PIM_UNUSED);
4131 
4132     if (one_unused)
4133 	/* one is unused: equal when two is also unused */
4134 	return two_unused;
4135     if (two_unused)
4136 	/* one is used and two is not: not equal */
4137 	return FALSE;
4138     /* compare the state id */
4139     if (one->state->id != two->state->id)
4140 	return FALSE;
4141     /* compare the position */
4142     if (REG_MULTI)
4143 	return one->end.pos.lnum == two->end.pos.lnum
4144 	    && one->end.pos.col == two->end.pos.col;
4145     return one->end.ptr == two->end.ptr;
4146 }
4147 
4148 /*
4149  * Return TRUE if "state" leads to a NFA_MATCH without advancing the input.
4150  */
4151     static int
4152 match_follows(startstate, depth)
4153     nfa_state_T *startstate;
4154     int		depth;
4155 {
4156     nfa_state_T	    *state = startstate;
4157 
4158     /* avoid too much recursion */
4159     if (depth > 10)
4160 	return FALSE;
4161 
4162     while (state != NULL)
4163     {
4164 	switch (state->c)
4165 	{
4166 	    case NFA_MATCH:
4167 	    case NFA_MCLOSE:
4168 	    case NFA_END_INVISIBLE:
4169 	    case NFA_END_INVISIBLE_NEG:
4170 	    case NFA_END_PATTERN:
4171 		return TRUE;
4172 
4173 	    case NFA_SPLIT:
4174 		return match_follows(state->out, depth + 1)
4175 				     || match_follows(state->out1, depth + 1);
4176 
4177 	    case NFA_START_INVISIBLE:
4178 	    case NFA_START_INVISIBLE_FIRST:
4179 	    case NFA_START_INVISIBLE_BEFORE:
4180 	    case NFA_START_INVISIBLE_BEFORE_FIRST:
4181 	    case NFA_START_INVISIBLE_NEG:
4182 	    case NFA_START_INVISIBLE_NEG_FIRST:
4183 	    case NFA_START_INVISIBLE_BEFORE_NEG:
4184 	    case NFA_START_INVISIBLE_BEFORE_NEG_FIRST:
4185 	    case NFA_COMPOSING:
4186 		/* skip ahead to next state */
4187 		state = state->out1->out;
4188 		continue;
4189 
4190 	    case NFA_ANY:
4191 	    case NFA_ANY_COMPOSING:
4192 	    case NFA_IDENT:
4193 	    case NFA_SIDENT:
4194 	    case NFA_KWORD:
4195 	    case NFA_SKWORD:
4196 	    case NFA_FNAME:
4197 	    case NFA_SFNAME:
4198 	    case NFA_PRINT:
4199 	    case NFA_SPRINT:
4200 	    case NFA_WHITE:
4201 	    case NFA_NWHITE:
4202 	    case NFA_DIGIT:
4203 	    case NFA_NDIGIT:
4204 	    case NFA_HEX:
4205 	    case NFA_NHEX:
4206 	    case NFA_OCTAL:
4207 	    case NFA_NOCTAL:
4208 	    case NFA_WORD:
4209 	    case NFA_NWORD:
4210 	    case NFA_HEAD:
4211 	    case NFA_NHEAD:
4212 	    case NFA_ALPHA:
4213 	    case NFA_NALPHA:
4214 	    case NFA_LOWER:
4215 	    case NFA_NLOWER:
4216 	    case NFA_UPPER:
4217 	    case NFA_NUPPER:
4218 	    case NFA_LOWER_IC:
4219 	    case NFA_NLOWER_IC:
4220 	    case NFA_UPPER_IC:
4221 	    case NFA_NUPPER_IC:
4222 	    case NFA_START_COLL:
4223 	    case NFA_START_NEG_COLL:
4224 	    case NFA_NEWL:
4225 		/* state will advance input */
4226 		return FALSE;
4227 
4228 	    default:
4229 		if (state->c > 0)
4230 		    /* state will advance input */
4231 		    return FALSE;
4232 
4233 		/* Others: zero-width or possibly zero-width, might still find
4234 		 * a match at the same position, keep looking. */
4235 		break;
4236 	}
4237 	state = state->out;
4238     }
4239     return FALSE;
4240 }
4241 
4242 
4243 /*
4244  * Return TRUE if "state" is already in list "l".
4245  */
4246     static int
4247 state_in_list(l, state, subs)
4248     nfa_list_T		*l;	/* runtime state list */
4249     nfa_state_T		*state;	/* state to update */
4250     regsubs_T		*subs;	/* pointers to subexpressions */
4251 {
4252     if (state->lastlist[nfa_ll_index] == l->id)
4253     {
4254 	if (!nfa_has_backref || has_state_with_pos(l, state, subs, NULL))
4255 	    return TRUE;
4256     }
4257     return FALSE;
4258 }
4259 
4260 /*
4261  * Add "state" and possibly what follows to state list ".".
4262  * Returns "subs_arg", possibly copied into temp_subs.
4263  */
4264     static regsubs_T *
4265 addstate(l, state, subs_arg, pim, off)
4266     nfa_list_T		*l;	    /* runtime state list */
4267     nfa_state_T		*state;	    /* state to update */
4268     regsubs_T		*subs_arg;  /* pointers to subexpressions */
4269     nfa_pim_T		*pim;	    /* postponed look-behind match */
4270     int			off;	    /* byte offset, when -1 go to next line */
4271 {
4272     int			subidx;
4273     nfa_thread_T	*thread;
4274     lpos_T		save_lpos;
4275     int			save_in_use;
4276     char_u		*save_ptr;
4277     int			i;
4278     regsub_T		*sub;
4279     regsubs_T		*subs = subs_arg;
4280     static regsubs_T	temp_subs;
4281 #ifdef ENABLE_LOG
4282     int			did_print = FALSE;
4283 #endif
4284 
4285     switch (state->c)
4286     {
4287 	case NFA_NCLOSE:
4288 	case NFA_MCLOSE:
4289 	case NFA_MCLOSE1:
4290 	case NFA_MCLOSE2:
4291 	case NFA_MCLOSE3:
4292 	case NFA_MCLOSE4:
4293 	case NFA_MCLOSE5:
4294 	case NFA_MCLOSE6:
4295 	case NFA_MCLOSE7:
4296 	case NFA_MCLOSE8:
4297 	case NFA_MCLOSE9:
4298 #ifdef FEAT_SYN_HL
4299 	case NFA_ZCLOSE:
4300 	case NFA_ZCLOSE1:
4301 	case NFA_ZCLOSE2:
4302 	case NFA_ZCLOSE3:
4303 	case NFA_ZCLOSE4:
4304 	case NFA_ZCLOSE5:
4305 	case NFA_ZCLOSE6:
4306 	case NFA_ZCLOSE7:
4307 	case NFA_ZCLOSE8:
4308 	case NFA_ZCLOSE9:
4309 #endif
4310 	case NFA_MOPEN:
4311 	case NFA_ZEND:
4312 	case NFA_SPLIT:
4313 	case NFA_EMPTY:
4314 	    /* These nodes are not added themselves but their "out" and/or
4315 	     * "out1" may be added below.  */
4316 	    break;
4317 
4318 	case NFA_BOL:
4319 	case NFA_BOF:
4320 	    /* "^" won't match past end-of-line, don't bother trying.
4321 	     * Except when at the end of the line, or when we are going to the
4322 	     * next line for a look-behind match. */
4323 	    if (reginput > regline
4324 		    && *reginput != NUL
4325 		    && (nfa_endp == NULL
4326 			|| !REG_MULTI
4327 			|| reglnum == nfa_endp->se_u.pos.lnum))
4328 		goto skip_add;
4329 	    /* FALLTHROUGH */
4330 
4331 	case NFA_MOPEN1:
4332 	case NFA_MOPEN2:
4333 	case NFA_MOPEN3:
4334 	case NFA_MOPEN4:
4335 	case NFA_MOPEN5:
4336 	case NFA_MOPEN6:
4337 	case NFA_MOPEN7:
4338 	case NFA_MOPEN8:
4339 	case NFA_MOPEN9:
4340 #ifdef FEAT_SYN_HL
4341 	case NFA_ZOPEN:
4342 	case NFA_ZOPEN1:
4343 	case NFA_ZOPEN2:
4344 	case NFA_ZOPEN3:
4345 	case NFA_ZOPEN4:
4346 	case NFA_ZOPEN5:
4347 	case NFA_ZOPEN6:
4348 	case NFA_ZOPEN7:
4349 	case NFA_ZOPEN8:
4350 	case NFA_ZOPEN9:
4351 #endif
4352 	case NFA_NOPEN:
4353 	case NFA_ZSTART:
4354 	    /* These nodes need to be added so that we can bail out when it
4355 	     * was added to this list before at the same position to avoid an
4356 	     * endless loop for "\(\)*" */
4357 
4358 	default:
4359 	    if (state->lastlist[nfa_ll_index] == l->id && state->c != NFA_SKIP)
4360 	    {
4361 		/* This state is already in the list, don't add it again,
4362 		 * unless it is an MOPEN that is used for a backreference or
4363 		 * when there is a PIM. For NFA_MATCH check the position,
4364 		 * lower position is preferred. */
4365 		if (!nfa_has_backref && pim == NULL && !l->has_pim
4366 						     && state->c != NFA_MATCH)
4367 		{
4368 skip_add:
4369 #ifdef ENABLE_LOG
4370 		    nfa_set_code(state->c);
4371 		    fprintf(log_fd, "> Not adding state %d to list %d. char %d: %s\n",
4372 			    abs(state->id), l->id, state->c, code);
4373 #endif
4374 		    return subs;
4375 		}
4376 
4377 		/* Do not add the state again when it exists with the same
4378 		 * positions. */
4379 		if (has_state_with_pos(l, state, subs, pim))
4380 		    goto skip_add;
4381 	    }
4382 
4383 	    /* When there are backreferences or PIMs the number of states may
4384 	     * be (a lot) bigger than anticipated. */
4385 	    if (l->n == l->len)
4386 	    {
4387 		int newlen = l->len * 3 / 2 + 50;
4388 
4389 		if (subs != &temp_subs)
4390 		{
4391 		    /* "subs" may point into the current array, need to make a
4392 		     * copy before it becomes invalid. */
4393 		    copy_sub(&temp_subs.norm, &subs->norm);
4394 #ifdef FEAT_SYN_HL
4395 		    if (nfa_has_zsubexpr)
4396 			copy_sub(&temp_subs.synt, &subs->synt);
4397 #endif
4398 		    subs = &temp_subs;
4399 		}
4400 
4401 		/* TODO: check for vim_realloc() returning NULL. */
4402 		l->t = vim_realloc(l->t, newlen * sizeof(nfa_thread_T));
4403 		l->len = newlen;
4404 	    }
4405 
4406 	    /* add the state to the list */
4407 	    state->lastlist[nfa_ll_index] = l->id;
4408 	    thread = &l->t[l->n++];
4409 	    thread->state = state;
4410 	    if (pim == NULL)
4411 		thread->pim.result = NFA_PIM_UNUSED;
4412 	    else
4413 	    {
4414 		copy_pim(&thread->pim, pim);
4415 		l->has_pim = TRUE;
4416 	    }
4417 	    copy_sub(&thread->subs.norm, &subs->norm);
4418 #ifdef FEAT_SYN_HL
4419 	    if (nfa_has_zsubexpr)
4420 		copy_sub(&thread->subs.synt, &subs->synt);
4421 #endif
4422 #ifdef ENABLE_LOG
4423 	    report_state("Adding", &thread->subs.norm, state, l->id, pim);
4424 	    did_print = TRUE;
4425 #endif
4426     }
4427 
4428 #ifdef ENABLE_LOG
4429     if (!did_print)
4430 	report_state("Processing", &subs->norm, state, l->id, pim);
4431 #endif
4432     switch (state->c)
4433     {
4434 	case NFA_MATCH:
4435 	    break;
4436 
4437 	case NFA_SPLIT:
4438 	    /* order matters here */
4439 	    subs = addstate(l, state->out, subs, pim, off);
4440 	    subs = addstate(l, state->out1, subs, pim, off);
4441 	    break;
4442 
4443 	case NFA_EMPTY:
4444 	case NFA_NOPEN:
4445 	case NFA_NCLOSE:
4446 	    subs = addstate(l, state->out, subs, pim, off);
4447 	    break;
4448 
4449 	case NFA_MOPEN:
4450 	case NFA_MOPEN1:
4451 	case NFA_MOPEN2:
4452 	case NFA_MOPEN3:
4453 	case NFA_MOPEN4:
4454 	case NFA_MOPEN5:
4455 	case NFA_MOPEN6:
4456 	case NFA_MOPEN7:
4457 	case NFA_MOPEN8:
4458 	case NFA_MOPEN9:
4459 #ifdef FEAT_SYN_HL
4460 	case NFA_ZOPEN:
4461 	case NFA_ZOPEN1:
4462 	case NFA_ZOPEN2:
4463 	case NFA_ZOPEN3:
4464 	case NFA_ZOPEN4:
4465 	case NFA_ZOPEN5:
4466 	case NFA_ZOPEN6:
4467 	case NFA_ZOPEN7:
4468 	case NFA_ZOPEN8:
4469 	case NFA_ZOPEN9:
4470 #endif
4471 	case NFA_ZSTART:
4472 	    if (state->c == NFA_ZSTART)
4473 	    {
4474 		subidx = 0;
4475 		sub = &subs->norm;
4476 	    }
4477 #ifdef FEAT_SYN_HL
4478 	    else if (state->c >= NFA_ZOPEN && state->c <= NFA_ZOPEN9)
4479 	    {
4480 		subidx = state->c - NFA_ZOPEN;
4481 		sub = &subs->synt;
4482 	    }
4483 #endif
4484 	    else
4485 	    {
4486 		subidx = state->c - NFA_MOPEN;
4487 		sub = &subs->norm;
4488 	    }
4489 
4490 	    /* avoid compiler warnings */
4491 	    save_ptr = NULL;
4492 	    save_lpos.lnum = 0;
4493 	    save_lpos.col = 0;
4494 
4495 	    /* Set the position (with "off" added) in the subexpression.  Save
4496 	     * and restore it when it was in use.  Otherwise fill any gap. */
4497 	    if (REG_MULTI)
4498 	    {
4499 		if (subidx < sub->in_use)
4500 		{
4501 		    save_lpos.lnum = sub->list.multi[subidx].start_lnum;
4502 		    save_lpos.col = sub->list.multi[subidx].start_col;
4503 		    save_in_use = -1;
4504 		}
4505 		else
4506 		{
4507 		    save_in_use = sub->in_use;
4508 		    for (i = sub->in_use; i < subidx; ++i)
4509 		    {
4510 			sub->list.multi[i].start_lnum = -1;
4511 			sub->list.multi[i].end_lnum = -1;
4512 		    }
4513 		    sub->in_use = subidx + 1;
4514 		}
4515 		if (off == -1)
4516 		{
4517 		    sub->list.multi[subidx].start_lnum = reglnum + 1;
4518 		    sub->list.multi[subidx].start_col = 0;
4519 		}
4520 		else
4521 		{
4522 		    sub->list.multi[subidx].start_lnum = reglnum;
4523 		    sub->list.multi[subidx].start_col =
4524 					  (colnr_T)(reginput - regline + off);
4525 		}
4526 		sub->list.multi[subidx].end_lnum = -1;
4527 	    }
4528 	    else
4529 	    {
4530 		if (subidx < sub->in_use)
4531 		{
4532 		    save_ptr = sub->list.line[subidx].start;
4533 		    save_in_use = -1;
4534 		}
4535 		else
4536 		{
4537 		    save_in_use = sub->in_use;
4538 		    for (i = sub->in_use; i < subidx; ++i)
4539 		    {
4540 			sub->list.line[i].start = NULL;
4541 			sub->list.line[i].end = NULL;
4542 		    }
4543 		    sub->in_use = subidx + 1;
4544 		}
4545 		sub->list.line[subidx].start = reginput + off;
4546 	    }
4547 
4548 	    subs = addstate(l, state->out, subs, pim, off);
4549 	    /* "subs" may have changed, need to set "sub" again */
4550 #ifdef FEAT_SYN_HL
4551 	    if (state->c >= NFA_ZOPEN && state->c <= NFA_ZOPEN9)
4552 		sub = &subs->synt;
4553 	    else
4554 #endif
4555 		sub = &subs->norm;
4556 
4557 	    if (save_in_use == -1)
4558 	    {
4559 		if (REG_MULTI)
4560                 {
4561 		    sub->list.multi[subidx].start_lnum = save_lpos.lnum;
4562 		    sub->list.multi[subidx].start_col = save_lpos.col;
4563                 }
4564 		else
4565 		    sub->list.line[subidx].start = save_ptr;
4566 	    }
4567 	    else
4568 		sub->in_use = save_in_use;
4569 	    break;
4570 
4571 	case NFA_MCLOSE:
4572 	    if (nfa_has_zend && (REG_MULTI
4573 			? subs->norm.list.multi[0].end_lnum >= 0
4574 			: subs->norm.list.line[0].end != NULL))
4575 	    {
4576 		/* Do not overwrite the position set by \ze. */
4577 		subs = addstate(l, state->out, subs, pim, off);
4578 		break;
4579 	    }
4580 	case NFA_MCLOSE1:
4581 	case NFA_MCLOSE2:
4582 	case NFA_MCLOSE3:
4583 	case NFA_MCLOSE4:
4584 	case NFA_MCLOSE5:
4585 	case NFA_MCLOSE6:
4586 	case NFA_MCLOSE7:
4587 	case NFA_MCLOSE8:
4588 	case NFA_MCLOSE9:
4589 #ifdef FEAT_SYN_HL
4590 	case NFA_ZCLOSE:
4591 	case NFA_ZCLOSE1:
4592 	case NFA_ZCLOSE2:
4593 	case NFA_ZCLOSE3:
4594 	case NFA_ZCLOSE4:
4595 	case NFA_ZCLOSE5:
4596 	case NFA_ZCLOSE6:
4597 	case NFA_ZCLOSE7:
4598 	case NFA_ZCLOSE8:
4599 	case NFA_ZCLOSE9:
4600 #endif
4601 	case NFA_ZEND:
4602 	    if (state->c == NFA_ZEND)
4603 	    {
4604 		subidx = 0;
4605 		sub = &subs->norm;
4606 	    }
4607 #ifdef FEAT_SYN_HL
4608 	    else if (state->c >= NFA_ZCLOSE && state->c <= NFA_ZCLOSE9)
4609 	    {
4610 		subidx = state->c - NFA_ZCLOSE;
4611 		sub = &subs->synt;
4612 	    }
4613 #endif
4614 	    else
4615 	    {
4616 		subidx = state->c - NFA_MCLOSE;
4617 		sub = &subs->norm;
4618 	    }
4619 
4620 	    /* We don't fill in gaps here, there must have been an MOPEN that
4621 	     * has done that. */
4622 	    save_in_use = sub->in_use;
4623 	    if (sub->in_use <= subidx)
4624 		sub->in_use = subidx + 1;
4625 	    if (REG_MULTI)
4626 	    {
4627 		save_lpos.lnum = sub->list.multi[subidx].end_lnum;
4628 		save_lpos.col = sub->list.multi[subidx].end_col;
4629 		if (off == -1)
4630 		{
4631 		    sub->list.multi[subidx].end_lnum = reglnum + 1;
4632 		    sub->list.multi[subidx].end_col = 0;
4633 		}
4634 		else
4635 		{
4636 		    sub->list.multi[subidx].end_lnum = reglnum;
4637 		    sub->list.multi[subidx].end_col =
4638 					  (colnr_T)(reginput - regline + off);
4639 		}
4640 		/* avoid compiler warnings */
4641 		save_ptr = NULL;
4642 	    }
4643 	    else
4644 	    {
4645 		save_ptr = sub->list.line[subidx].end;
4646 		sub->list.line[subidx].end = reginput + off;
4647 		/* avoid compiler warnings */
4648 		save_lpos.lnum = 0;
4649 		save_lpos.col = 0;
4650 	    }
4651 
4652 	    subs = addstate(l, state->out, subs, pim, off);
4653 	    /* "subs" may have changed, need to set "sub" again */
4654 #ifdef FEAT_SYN_HL
4655 	    if (state->c >= NFA_ZCLOSE && state->c <= NFA_ZCLOSE9)
4656 		sub = &subs->synt;
4657 	    else
4658 #endif
4659 		sub = &subs->norm;
4660 
4661 	    if (REG_MULTI)
4662             {
4663 		sub->list.multi[subidx].end_lnum = save_lpos.lnum;
4664 		sub->list.multi[subidx].end_col = save_lpos.col;
4665             }
4666 	    else
4667 		sub->list.line[subidx].end = save_ptr;
4668 	    sub->in_use = save_in_use;
4669 	    break;
4670     }
4671     return subs;
4672 }
4673 
4674 /*
4675  * Like addstate(), but the new state(s) are put at position "*ip".
4676  * Used for zero-width matches, next state to use is the added one.
4677  * This makes sure the order of states to be tried does not change, which
4678  * matters for alternatives.
4679  */
4680     static void
4681 addstate_here(l, state, subs, pim, ip)
4682     nfa_list_T		*l;	/* runtime state list */
4683     nfa_state_T		*state;	/* state to update */
4684     regsubs_T		*subs;	/* pointers to subexpressions */
4685     nfa_pim_T		*pim;   /* postponed look-behind match */
4686     int			*ip;
4687 {
4688     int tlen = l->n;
4689     int count;
4690     int listidx = *ip;
4691 
4692     /* first add the state(s) at the end, so that we know how many there are */
4693     addstate(l, state, subs, pim, 0);
4694 
4695     /* when "*ip" was at the end of the list, nothing to do */
4696     if (listidx + 1 == tlen)
4697 	return;
4698 
4699     /* re-order to put the new state at the current position */
4700     count = l->n - tlen;
4701     if (count == 0)
4702 	return; /* no state got added */
4703     if (count == 1)
4704     {
4705 	/* overwrite the current state */
4706 	l->t[listidx] = l->t[l->n - 1];
4707     }
4708     else if (count > 1)
4709     {
4710 	if (l->n + count - 1 >= l->len)
4711 	{
4712 	    /* not enough space to move the new states, reallocate the list
4713 	     * and move the states to the right position */
4714 	    nfa_thread_T *newl;
4715 
4716 	    l->len = l->len * 3 / 2 + 50;
4717 	    newl = (nfa_thread_T *)alloc(l->len * sizeof(nfa_thread_T));
4718 	    if (newl == NULL)
4719 		return;
4720 	    mch_memmove(&(newl[0]),
4721 		    &(l->t[0]),
4722 		    sizeof(nfa_thread_T) * listidx);
4723 	    mch_memmove(&(newl[listidx]),
4724 		    &(l->t[l->n - count]),
4725 		    sizeof(nfa_thread_T) * count);
4726 	    mch_memmove(&(newl[listidx + count]),
4727 		    &(l->t[listidx + 1]),
4728 		    sizeof(nfa_thread_T) * (l->n - count - listidx - 1));
4729 	    vim_free(l->t);
4730 	    l->t = newl;
4731 	}
4732 	else
4733 	{
4734 	    /* make space for new states, then move them from the
4735 	     * end to the current position */
4736 	    mch_memmove(&(l->t[listidx + count]),
4737 		    &(l->t[listidx + 1]),
4738 		    sizeof(nfa_thread_T) * (l->n - listidx - 1));
4739 	    mch_memmove(&(l->t[listidx]),
4740 		    &(l->t[l->n - 1]),
4741 		    sizeof(nfa_thread_T) * count);
4742 	}
4743     }
4744     --l->n;
4745     *ip = listidx - 1;
4746 }
4747 
4748 /*
4749  * Check character class "class" against current character c.
4750  */
4751     static int
4752 check_char_class(class, c)
4753     int		class;
4754     int		c;
4755 {
4756     switch (class)
4757     {
4758 	case NFA_CLASS_ALNUM:
4759 	    if (c >= 1 && c <= 255 && isalnum(c))
4760 		return OK;
4761 	    break;
4762 	case NFA_CLASS_ALPHA:
4763 	    if (c >= 1 && c <= 255 && isalpha(c))
4764 		return OK;
4765 	    break;
4766 	case NFA_CLASS_BLANK:
4767 	    if (c == ' ' || c == '\t')
4768 		return OK;
4769 	    break;
4770 	case NFA_CLASS_CNTRL:
4771 	    if (c >= 1 && c <= 255 && iscntrl(c))
4772 		return OK;
4773 	    break;
4774 	case NFA_CLASS_DIGIT:
4775 	    if (VIM_ISDIGIT(c))
4776 		return OK;
4777 	    break;
4778 	case NFA_CLASS_GRAPH:
4779 	    if (c >= 1 && c <= 255 && isgraph(c))
4780 		return OK;
4781 	    break;
4782 	case NFA_CLASS_LOWER:
4783 	    if (MB_ISLOWER(c))
4784 		return OK;
4785 	    break;
4786 	case NFA_CLASS_PRINT:
4787 	    if (vim_isprintc(c))
4788 		return OK;
4789 	    break;
4790 	case NFA_CLASS_PUNCT:
4791 	    if (c >= 1 && c <= 255 && ispunct(c))
4792 		return OK;
4793 	    break;
4794 	case NFA_CLASS_SPACE:
4795 	    if ((c >= 9 && c <= 13) || (c == ' '))
4796 		return OK;
4797 	    break;
4798 	case NFA_CLASS_UPPER:
4799 	    if (MB_ISUPPER(c))
4800 		return OK;
4801 	    break;
4802 	case NFA_CLASS_XDIGIT:
4803 	    if (vim_isxdigit(c))
4804 		return OK;
4805 	    break;
4806 	case NFA_CLASS_TAB:
4807 	    if (c == '\t')
4808 		return OK;
4809 	    break;
4810 	case NFA_CLASS_RETURN:
4811 	    if (c == '\r')
4812 		return OK;
4813 	    break;
4814 	case NFA_CLASS_BACKSPACE:
4815 	    if (c == '\b')
4816 		return OK;
4817 	    break;
4818 	case NFA_CLASS_ESCAPE:
4819 	    if (c == '\033')
4820 		return OK;
4821 	    break;
4822 
4823 	default:
4824 	    /* should not be here :P */
4825 	    EMSGN(_(e_ill_char_class), class);
4826 	    return FAIL;
4827     }
4828     return FAIL;
4829 }
4830 
4831 /*
4832  * Check for a match with subexpression "subidx".
4833  * Return TRUE if it matches.
4834  */
4835     static int
4836 match_backref(sub, subidx, bytelen)
4837     regsub_T	*sub;	    /* pointers to subexpressions */
4838     int		subidx;
4839     int		*bytelen;   /* out: length of match in bytes */
4840 {
4841     int		len;
4842 
4843     if (sub->in_use <= subidx)
4844     {
4845 retempty:
4846 	/* backref was not set, match an empty string */
4847 	*bytelen = 0;
4848 	return TRUE;
4849     }
4850 
4851     if (REG_MULTI)
4852     {
4853 	if (sub->list.multi[subidx].start_lnum < 0
4854 				       || sub->list.multi[subidx].end_lnum < 0)
4855 	    goto retempty;
4856 	if (sub->list.multi[subidx].start_lnum == reglnum
4857 			       && sub->list.multi[subidx].end_lnum == reglnum)
4858 	{
4859 	    len = sub->list.multi[subidx].end_col
4860 					  - sub->list.multi[subidx].start_col;
4861 	    if (cstrncmp(regline + sub->list.multi[subidx].start_col,
4862 							 reginput, &len) == 0)
4863 	    {
4864 		*bytelen = len;
4865 		return TRUE;
4866 	    }
4867 	}
4868 	else
4869 	{
4870 	    if (match_with_backref(
4871 			sub->list.multi[subidx].start_lnum,
4872 			sub->list.multi[subidx].start_col,
4873 			sub->list.multi[subidx].end_lnum,
4874 			sub->list.multi[subidx].end_col,
4875 			bytelen) == RA_MATCH)
4876 		return TRUE;
4877 	}
4878     }
4879     else
4880     {
4881 	if (sub->list.line[subidx].start == NULL
4882 					|| sub->list.line[subidx].end == NULL)
4883 	    goto retempty;
4884 	len = (int)(sub->list.line[subidx].end - sub->list.line[subidx].start);
4885 	if (cstrncmp(sub->list.line[subidx].start, reginput, &len) == 0)
4886 	{
4887 	    *bytelen = len;
4888 	    return TRUE;
4889 	}
4890     }
4891     return FALSE;
4892 }
4893 
4894 #ifdef FEAT_SYN_HL
4895 
4896 static int match_zref __ARGS((int subidx, int *bytelen));
4897 
4898 /*
4899  * Check for a match with \z subexpression "subidx".
4900  * Return TRUE if it matches.
4901  */
4902     static int
4903 match_zref(subidx, bytelen)
4904     int		subidx;
4905     int		*bytelen;   /* out: length of match in bytes */
4906 {
4907     int		len;
4908 
4909     cleanup_zsubexpr();
4910     if (re_extmatch_in == NULL || re_extmatch_in->matches[subidx] == NULL)
4911     {
4912 	/* backref was not set, match an empty string */
4913 	*bytelen = 0;
4914 	return TRUE;
4915     }
4916 
4917     len = (int)STRLEN(re_extmatch_in->matches[subidx]);
4918     if (cstrncmp(re_extmatch_in->matches[subidx], reginput, &len) == 0)
4919     {
4920 	*bytelen = len;
4921 	return TRUE;
4922     }
4923     return FALSE;
4924 }
4925 #endif
4926 
4927 /*
4928  * Save list IDs for all NFA states of "prog" into "list".
4929  * Also reset the IDs to zero.
4930  * Only used for the recursive value lastlist[1].
4931  */
4932     static void
4933 nfa_save_listids(prog, list)
4934     nfa_regprog_T   *prog;
4935     int		    *list;
4936 {
4937     int		    i;
4938     nfa_state_T	    *p;
4939 
4940     /* Order in the list is reverse, it's a bit faster that way. */
4941     p = &prog->state[0];
4942     for (i = prog->nstate; --i >= 0; )
4943     {
4944 	list[i] = p->lastlist[1];
4945 	p->lastlist[1] = 0;
4946 	++p;
4947     }
4948 }
4949 
4950 /*
4951  * Restore list IDs from "list" to all NFA states.
4952  */
4953     static void
4954 nfa_restore_listids(prog, list)
4955     nfa_regprog_T   *prog;
4956     int		    *list;
4957 {
4958     int		    i;
4959     nfa_state_T	    *p;
4960 
4961     p = &prog->state[0];
4962     for (i = prog->nstate; --i >= 0; )
4963     {
4964 	p->lastlist[1] = list[i];
4965 	++p;
4966     }
4967 }
4968 
4969     static int
4970 nfa_re_num_cmp(val, op, pos)
4971     long_u	val;
4972     int		op;
4973     long_u	pos;
4974 {
4975     if (op == 1) return pos > val;
4976     if (op == 2) return pos < val;
4977     return val == pos;
4978 }
4979 
4980 static int recursive_regmatch __ARGS((nfa_state_T *state, nfa_pim_T *pim, nfa_regprog_T *prog, regsubs_T *submatch, regsubs_T *m, int **listids));
4981 static int nfa_regmatch __ARGS((nfa_regprog_T *prog, nfa_state_T *start, regsubs_T *submatch, regsubs_T *m));
4982 
4983 /*
4984  * Recursively call nfa_regmatch()
4985  * "pim" is NULL or contains info about a Postponed Invisible Match (start
4986  * position).
4987  */
4988     static int
4989 recursive_regmatch(state, pim, prog, submatch, m, listids)
4990     nfa_state_T	    *state;
4991     nfa_pim_T	    *pim;
4992     nfa_regprog_T   *prog;
4993     regsubs_T	    *submatch;
4994     regsubs_T	    *m;
4995     int		    **listids;
4996 {
4997     int		save_reginput_col = (int)(reginput - regline);
4998     int		save_reglnum = reglnum;
4999     int		save_nfa_match = nfa_match;
5000     int		save_nfa_listid = nfa_listid;
5001     save_se_T   *save_nfa_endp = nfa_endp;
5002     save_se_T   endpos;
5003     save_se_T   *endposp = NULL;
5004     int		result;
5005     int		need_restore = FALSE;
5006 
5007     if (pim != NULL)
5008     {
5009 	/* start at the position where the postponed match was */
5010 	if (REG_MULTI)
5011 	    reginput = regline + pim->end.pos.col;
5012 	else
5013 	    reginput = pim->end.ptr;
5014     }
5015 
5016     if (state->c == NFA_START_INVISIBLE_BEFORE
5017         || state->c == NFA_START_INVISIBLE_BEFORE_FIRST
5018         || state->c == NFA_START_INVISIBLE_BEFORE_NEG
5019         || state->c == NFA_START_INVISIBLE_BEFORE_NEG_FIRST)
5020     {
5021 	/* The recursive match must end at the current position. When "pim" is
5022 	 * not NULL it specifies the current position. */
5023 	endposp = &endpos;
5024 	if (REG_MULTI)
5025 	{
5026 	    if (pim == NULL)
5027 	    {
5028 		endpos.se_u.pos.col = (int)(reginput - regline);
5029 		endpos.se_u.pos.lnum = reglnum;
5030 	    }
5031 	    else
5032 		endpos.se_u.pos = pim->end.pos;
5033 	}
5034 	else
5035 	{
5036 	    if (pim == NULL)
5037 		endpos.se_u.ptr = reginput;
5038 	    else
5039 		endpos.se_u.ptr = pim->end.ptr;
5040 	}
5041 
5042 	/* Go back the specified number of bytes, or as far as the
5043 	 * start of the previous line, to try matching "\@<=" or
5044 	 * not matching "\@<!". This is very inefficient, limit the number of
5045 	 * bytes if possible. */
5046 	if (state->val <= 0)
5047 	{
5048 	    if (REG_MULTI)
5049 	    {
5050 		regline = reg_getline(--reglnum);
5051 		if (regline == NULL)
5052 		    /* can't go before the first line */
5053 		    regline = reg_getline(++reglnum);
5054 	    }
5055 	    reginput = regline;
5056 	}
5057 	else
5058 	{
5059 	    if (REG_MULTI && (int)(reginput - regline) < state->val)
5060 	    {
5061 		/* Not enough bytes in this line, go to end of
5062 		 * previous line. */
5063 		regline = reg_getline(--reglnum);
5064 		if (regline == NULL)
5065 		{
5066 		    /* can't go before the first line */
5067 		    regline = reg_getline(++reglnum);
5068 		    reginput = regline;
5069 		}
5070 		else
5071 		    reginput = regline + STRLEN(regline);
5072 	    }
5073 	    if ((int)(reginput - regline) >= state->val)
5074 	    {
5075 		reginput -= state->val;
5076 #ifdef FEAT_MBYTE
5077 		if (has_mbyte)
5078 		    reginput -= mb_head_off(regline, reginput);
5079 #endif
5080 	    }
5081 	    else
5082 		reginput = regline;
5083 	}
5084     }
5085 
5086 #ifdef ENABLE_LOG
5087     if (log_fd != stderr)
5088 	fclose(log_fd);
5089     log_fd = NULL;
5090 #endif
5091     /* Have to clear the lastlist field of the NFA nodes, so that
5092      * nfa_regmatch() and addstate() can run properly after recursion. */
5093     if (nfa_ll_index == 1)
5094     {
5095 	/* Already calling nfa_regmatch() recursively.  Save the lastlist[1]
5096 	 * values and clear them. */
5097 	if (*listids == NULL)
5098 	{
5099 	    *listids = (int *)lalloc(sizeof(int) * nstate, TRUE);
5100 	    if (*listids == NULL)
5101 	    {
5102 		EMSG(_("E878: (NFA) Could not allocate memory for branch traversal!"));
5103 		return 0;
5104 	    }
5105 	}
5106 	nfa_save_listids(prog, *listids);
5107 	need_restore = TRUE;
5108 	/* any value of nfa_listid will do */
5109     }
5110     else
5111     {
5112 	/* First recursive nfa_regmatch() call, switch to the second lastlist
5113 	 * entry.  Make sure nfa_listid is different from a previous recursive
5114 	 * call, because some states may still have this ID. */
5115 	++nfa_ll_index;
5116 	if (nfa_listid <= nfa_alt_listid)
5117 	    nfa_listid = nfa_alt_listid;
5118     }
5119 
5120     /* Call nfa_regmatch() to check if the current concat matches at this
5121      * position. The concat ends with the node NFA_END_INVISIBLE */
5122     nfa_endp = endposp;
5123     result = nfa_regmatch(prog, state->out, submatch, m);
5124 
5125     if (need_restore)
5126 	nfa_restore_listids(prog, *listids);
5127     else
5128     {
5129 	--nfa_ll_index;
5130 	nfa_alt_listid = nfa_listid;
5131     }
5132 
5133     /* restore position in input text */
5134     reglnum = save_reglnum;
5135     if (REG_MULTI)
5136 	regline = reg_getline(reglnum);
5137     reginput = regline + save_reginput_col;
5138     nfa_match = save_nfa_match;
5139     nfa_endp = save_nfa_endp;
5140     nfa_listid = save_nfa_listid;
5141 
5142 #ifdef ENABLE_LOG
5143     log_fd = fopen(NFA_REGEXP_RUN_LOG, "a");
5144     if (log_fd != NULL)
5145     {
5146 	fprintf(log_fd, "****************************\n");
5147 	fprintf(log_fd, "FINISHED RUNNING nfa_regmatch() recursively\n");
5148 	fprintf(log_fd, "MATCH = %s\n", result == TRUE ? "OK" : "FALSE");
5149 	fprintf(log_fd, "****************************\n");
5150     }
5151     else
5152     {
5153 	EMSG(_("Could not open temporary log file for writing, displaying on stderr ... "));
5154 	log_fd = stderr;
5155     }
5156 #endif
5157 
5158     return result;
5159 }
5160 
5161 static int skip_to_start __ARGS((int c, colnr_T *colp));
5162 static long find_match_text __ARGS((colnr_T startcol, int regstart, char_u *match_text));
5163 
5164 /*
5165  * Estimate the chance of a match with "state" failing.
5166  * empty match: 0
5167  * NFA_ANY: 1
5168  * specific character: 99
5169  */
5170     static int
5171 failure_chance(state, depth)
5172     nfa_state_T *state;
5173     int		depth;
5174 {
5175     int c = state->c;
5176     int l, r;
5177 
5178     /* detect looping */
5179     if (depth > 4)
5180 	return 1;
5181 
5182     switch (c)
5183     {
5184 	case NFA_SPLIT:
5185 	    if (state->out->c == NFA_SPLIT || state->out1->c == NFA_SPLIT)
5186 		/* avoid recursive stuff */
5187 		return 1;
5188 	    /* two alternatives, use the lowest failure chance */
5189 	    l = failure_chance(state->out, depth + 1);
5190 	    r = failure_chance(state->out1, depth + 1);
5191 	    return l < r ? l : r;
5192 
5193 	case NFA_ANY:
5194 	    /* matches anything, unlikely to fail */
5195 	    return 1;
5196 
5197 	case NFA_MATCH:
5198 	case NFA_MCLOSE:
5199 	case NFA_ANY_COMPOSING:
5200 	    /* empty match works always */
5201 	    return 0;
5202 
5203 	case NFA_START_INVISIBLE:
5204 	case NFA_START_INVISIBLE_FIRST:
5205 	case NFA_START_INVISIBLE_NEG:
5206 	case NFA_START_INVISIBLE_NEG_FIRST:
5207 	case NFA_START_INVISIBLE_BEFORE:
5208 	case NFA_START_INVISIBLE_BEFORE_FIRST:
5209 	case NFA_START_INVISIBLE_BEFORE_NEG:
5210 	case NFA_START_INVISIBLE_BEFORE_NEG_FIRST:
5211 	case NFA_START_PATTERN:
5212 	    /* recursive regmatch is expensive, use low failure chance */
5213 	    return 5;
5214 
5215 	case NFA_BOL:
5216 	case NFA_EOL:
5217 	case NFA_BOF:
5218 	case NFA_EOF:
5219 	case NFA_NEWL:
5220 	    return 99;
5221 
5222 	case NFA_BOW:
5223 	case NFA_EOW:
5224 	    return 90;
5225 
5226 	case NFA_MOPEN:
5227 	case NFA_MOPEN1:
5228 	case NFA_MOPEN2:
5229 	case NFA_MOPEN3:
5230 	case NFA_MOPEN4:
5231 	case NFA_MOPEN5:
5232 	case NFA_MOPEN6:
5233 	case NFA_MOPEN7:
5234 	case NFA_MOPEN8:
5235 	case NFA_MOPEN9:
5236 #ifdef FEAT_SYN_HL
5237 	case NFA_ZOPEN:
5238 	case NFA_ZOPEN1:
5239 	case NFA_ZOPEN2:
5240 	case NFA_ZOPEN3:
5241 	case NFA_ZOPEN4:
5242 	case NFA_ZOPEN5:
5243 	case NFA_ZOPEN6:
5244 	case NFA_ZOPEN7:
5245 	case NFA_ZOPEN8:
5246 	case NFA_ZOPEN9:
5247 	case NFA_ZCLOSE:
5248 	case NFA_ZCLOSE1:
5249 	case NFA_ZCLOSE2:
5250 	case NFA_ZCLOSE3:
5251 	case NFA_ZCLOSE4:
5252 	case NFA_ZCLOSE5:
5253 	case NFA_ZCLOSE6:
5254 	case NFA_ZCLOSE7:
5255 	case NFA_ZCLOSE8:
5256 	case NFA_ZCLOSE9:
5257 #endif
5258 	case NFA_NOPEN:
5259 	case NFA_MCLOSE1:
5260 	case NFA_MCLOSE2:
5261 	case NFA_MCLOSE3:
5262 	case NFA_MCLOSE4:
5263 	case NFA_MCLOSE5:
5264 	case NFA_MCLOSE6:
5265 	case NFA_MCLOSE7:
5266 	case NFA_MCLOSE8:
5267 	case NFA_MCLOSE9:
5268 	case NFA_NCLOSE:
5269 	    return failure_chance(state->out, depth + 1);
5270 
5271 	case NFA_BACKREF1:
5272 	case NFA_BACKREF2:
5273 	case NFA_BACKREF3:
5274 	case NFA_BACKREF4:
5275 	case NFA_BACKREF5:
5276 	case NFA_BACKREF6:
5277 	case NFA_BACKREF7:
5278 	case NFA_BACKREF8:
5279 	case NFA_BACKREF9:
5280 #ifdef FEAT_SYN_HL
5281 	case NFA_ZREF1:
5282 	case NFA_ZREF2:
5283 	case NFA_ZREF3:
5284 	case NFA_ZREF4:
5285 	case NFA_ZREF5:
5286 	case NFA_ZREF6:
5287 	case NFA_ZREF7:
5288 	case NFA_ZREF8:
5289 	case NFA_ZREF9:
5290 #endif
5291 	    /* backreferences don't match in many places */
5292 	    return 94;
5293 
5294 	case NFA_LNUM_GT:
5295 	case NFA_LNUM_LT:
5296 	case NFA_COL_GT:
5297 	case NFA_COL_LT:
5298 	case NFA_VCOL_GT:
5299 	case NFA_VCOL_LT:
5300 	case NFA_MARK_GT:
5301 	case NFA_MARK_LT:
5302 	case NFA_VISUAL:
5303 	    /* before/after positions don't match very often */
5304 	    return 85;
5305 
5306 	case NFA_LNUM:
5307 	    return 90;
5308 
5309 	case NFA_CURSOR:
5310 	case NFA_COL:
5311 	case NFA_VCOL:
5312 	case NFA_MARK:
5313 	    /* specific positions rarely match */
5314 	    return 98;
5315 
5316 	case NFA_COMPOSING:
5317 	    return 95;
5318 
5319 	default:
5320 	    if (c > 0)
5321 		/* character match fails often */
5322 		return 95;
5323     }
5324 
5325     /* something else, includes character classes */
5326     return 50;
5327 }
5328 
5329 /*
5330  * Skip until the char "c" we know a match must start with.
5331  */
5332     static int
5333 skip_to_start(c, colp)
5334     int		c;
5335     colnr_T	*colp;
5336 {
5337     char_u *s;
5338 
5339     /* Used often, do some work to avoid call overhead. */
5340     if (!ireg_ic
5341 #ifdef FEAT_MBYTE
5342 		&& !has_mbyte
5343 #endif
5344 		)
5345 	s = vim_strbyte(regline + *colp, c);
5346     else
5347 	s = cstrchr(regline + *colp, c);
5348     if (s == NULL)
5349 	return FAIL;
5350     *colp = (int)(s - regline);
5351     return OK;
5352 }
5353 
5354 /*
5355  * Check for a match with match_text.
5356  * Called after skip_to_start() has found regstart.
5357  * Returns zero for no match, 1 for a match.
5358  */
5359     static long
5360 find_match_text(startcol, regstart, match_text)
5361     colnr_T startcol;
5362     int	    regstart;
5363     char_u  *match_text;
5364 {
5365     colnr_T col = startcol;
5366     int	    c1, c2;
5367     int	    len1, len2;
5368     int	    match;
5369 
5370     for (;;)
5371     {
5372 	match = TRUE;
5373 	len2 = MB_CHAR2LEN(regstart); /* skip regstart */
5374 	for (len1 = 0; match_text[len1] != NUL; len1 += MB_CHAR2LEN(c1))
5375 	{
5376 	    c1 = PTR2CHAR(match_text + len1);
5377 	    c2 = PTR2CHAR(regline + col + len2);
5378 	    if (c1 != c2 && (!ireg_ic || MB_TOLOWER(c1) != MB_TOLOWER(c2)))
5379 	    {
5380 		match = FALSE;
5381 		break;
5382 	    }
5383 	    len2 += MB_CHAR2LEN(c2);
5384 	}
5385 	if (match
5386 #ifdef FEAT_MBYTE
5387 		/* check that no composing char follows */
5388 		&& !(enc_utf8
5389 			   && utf_iscomposing(PTR2CHAR(regline + col + len2)))
5390 #endif
5391 		)
5392 	{
5393 	    cleanup_subexpr();
5394 	    if (REG_MULTI)
5395 	    {
5396 		reg_startpos[0].lnum = reglnum;
5397 		reg_startpos[0].col = col;
5398 		reg_endpos[0].lnum = reglnum;
5399 		reg_endpos[0].col = col + len2;
5400 	    }
5401 	    else
5402 	    {
5403 		reg_startp[0] = regline + col;
5404 		reg_endp[0] = regline + col + len2;
5405 	    }
5406 	    return 1L;
5407 	}
5408 
5409 	/* Try finding regstart after the current match. */
5410 	col += MB_CHAR2LEN(regstart); /* skip regstart */
5411 	if (skip_to_start(regstart, &col) == FAIL)
5412 	    break;
5413     }
5414     return 0L;
5415 }
5416 
5417 /*
5418  * Main matching routine.
5419  *
5420  * Run NFA to determine whether it matches reginput.
5421  *
5422  * When "nfa_endp" is not NULL it is a required end-of-match position.
5423  *
5424  * Return TRUE if there is a match, FALSE otherwise.
5425  * When there is a match "submatch" contains the positions.
5426  * Note: Caller must ensure that: start != NULL.
5427  */
5428     static int
5429 nfa_regmatch(prog, start, submatch, m)
5430     nfa_regprog_T	*prog;
5431     nfa_state_T		*start;
5432     regsubs_T		*submatch;
5433     regsubs_T		*m;
5434 {
5435     int		result;
5436     size_t	size = 0;
5437     int		flag = 0;
5438     int		go_to_nextline = FALSE;
5439     nfa_thread_T *t;
5440     nfa_list_T	list[2];
5441     int		listidx;
5442     nfa_list_T	*thislist;
5443     nfa_list_T	*nextlist;
5444     int		*listids = NULL;
5445     nfa_state_T *add_state;
5446     int		add_here;
5447     int		add_count;
5448     int		add_off = 0;
5449     int		toplevel = start->c == NFA_MOPEN;
5450 #ifdef NFA_REGEXP_DEBUG_LOG
5451     FILE	*debug = fopen(NFA_REGEXP_DEBUG_LOG, "a");
5452 
5453     if (debug == NULL)
5454     {
5455 	EMSG2(_("(NFA) COULD NOT OPEN %s !"), NFA_REGEXP_DEBUG_LOG);
5456 	return FALSE;
5457     }
5458 #endif
5459     /* Some patterns may take a long time to match, especially when using
5460      * recursive_regmatch(). Allow interrupting them with CTRL-C. */
5461     fast_breakcheck();
5462     if (got_int)
5463 	return FALSE;
5464 #ifdef FEAT_RELTIME
5465     if (nfa_time_limit != NULL && profile_passed_limit(nfa_time_limit))
5466 	return FALSE;
5467 #endif
5468 
5469     nfa_match = FALSE;
5470 
5471     /* Allocate memory for the lists of nodes. */
5472     size = (nstate + 1) * sizeof(nfa_thread_T);
5473 
5474     list[0].t = (nfa_thread_T *)lalloc(size, TRUE);
5475     list[0].len = nstate + 1;
5476     list[1].t = (nfa_thread_T *)lalloc(size, TRUE);
5477     list[1].len = nstate + 1;
5478     if (list[0].t == NULL || list[1].t == NULL)
5479 	goto theend;
5480 
5481 #ifdef ENABLE_LOG
5482     log_fd = fopen(NFA_REGEXP_RUN_LOG, "a");
5483     if (log_fd != NULL)
5484     {
5485 	fprintf(log_fd, "**********************************\n");
5486 	nfa_set_code(start->c);
5487 	fprintf(log_fd, " RUNNING nfa_regmatch() starting with state %d, code %s\n",
5488 	abs(start->id), code);
5489 	fprintf(log_fd, "**********************************\n");
5490     }
5491     else
5492     {
5493 	EMSG(_("Could not open temporary log file for writing, displaying on stderr ... "));
5494 	log_fd = stderr;
5495     }
5496 #endif
5497 
5498     thislist = &list[0];
5499     thislist->n = 0;
5500     thislist->has_pim = FALSE;
5501     nextlist = &list[1];
5502     nextlist->n = 0;
5503     nextlist->has_pim = FALSE;
5504 #ifdef ENABLE_LOG
5505     fprintf(log_fd, "(---) STARTSTATE first\n");
5506 #endif
5507     thislist->id = nfa_listid + 1;
5508 
5509     /* Inline optimized code for addstate(thislist, start, m, 0) if we know
5510      * it's the first MOPEN. */
5511     if (toplevel)
5512     {
5513 	if (REG_MULTI)
5514 	{
5515 	    m->norm.list.multi[0].start_lnum = reglnum;
5516 	    m->norm.list.multi[0].start_col = (colnr_T)(reginput - regline);
5517 	}
5518 	else
5519 	    m->norm.list.line[0].start = reginput;
5520 	m->norm.in_use = 1;
5521 	addstate(thislist, start->out, m, NULL, 0);
5522     }
5523     else
5524 	addstate(thislist, start, m, NULL, 0);
5525 
5526 #define	ADD_STATE_IF_MATCH(state)			\
5527     if (result) {					\
5528 	add_state = state->out;				\
5529 	add_off = clen;					\
5530     }
5531 
5532     /*
5533      * Run for each character.
5534      */
5535     for (;;)
5536     {
5537 	int	curc;
5538 	int	clen;
5539 
5540 #ifdef FEAT_MBYTE
5541 	if (has_mbyte)
5542 	{
5543 	    curc = (*mb_ptr2char)(reginput);
5544 	    clen = (*mb_ptr2len)(reginput);
5545 	}
5546 	else
5547 #endif
5548 	{
5549 	    curc = *reginput;
5550 	    clen = 1;
5551 	}
5552 	if (curc == NUL)
5553 	{
5554 	    clen = 0;
5555 	    go_to_nextline = FALSE;
5556 	}
5557 
5558 	/* swap lists */
5559 	thislist = &list[flag];
5560 	nextlist = &list[flag ^= 1];
5561 	nextlist->n = 0;	    /* clear nextlist */
5562 	nextlist->has_pim = FALSE;
5563 	++nfa_listid;
5564 	if (prog->re_engine == AUTOMATIC_ENGINE && nfa_listid >= NFA_MAX_STATES)
5565 	{
5566 	    /* too many states, retry with old engine */
5567 	    nfa_match = NFA_TOO_EXPENSIVE;
5568 	    goto theend;
5569 	}
5570 
5571 	thislist->id = nfa_listid;
5572 	nextlist->id = nfa_listid + 1;
5573 
5574 #ifdef ENABLE_LOG
5575 	fprintf(log_fd, "------------------------------------------\n");
5576 	fprintf(log_fd, ">>> Reginput is \"%s\"\n", reginput);
5577 	fprintf(log_fd, ">>> Advanced one character ... Current char is %c (code %d) \n", curc, (int)curc);
5578 	fprintf(log_fd, ">>> Thislist has %d states available: ", thislist->n);
5579 	{
5580 	    int i;
5581 
5582 	    for (i = 0; i < thislist->n; i++)
5583 		fprintf(log_fd, "%d  ", abs(thislist->t[i].state->id));
5584 	}
5585 	fprintf(log_fd, "\n");
5586 #endif
5587 
5588 #ifdef NFA_REGEXP_DEBUG_LOG
5589 	fprintf(debug, "\n-------------------\n");
5590 #endif
5591 	/*
5592 	 * If the state lists are empty we can stop.
5593 	 */
5594 	if (thislist->n == 0)
5595 	    break;
5596 
5597 	/* compute nextlist */
5598 	for (listidx = 0; listidx < thislist->n; ++listidx)
5599 	{
5600 	    t = &thislist->t[listidx];
5601 
5602 #ifdef NFA_REGEXP_DEBUG_LOG
5603 	    nfa_set_code(t->state->c);
5604 	    fprintf(debug, "%s, ", code);
5605 #endif
5606 #ifdef ENABLE_LOG
5607 	    {
5608 		int col;
5609 
5610 		if (t->subs.norm.in_use <= 0)
5611 		    col = -1;
5612 		else if (REG_MULTI)
5613 		    col = t->subs.norm.list.multi[0].start_col;
5614 		else
5615 		    col = (int)(t->subs.norm.list.line[0].start - regline);
5616 		nfa_set_code(t->state->c);
5617 		fprintf(log_fd, "(%d) char %d %s (start col %d)%s ... \n",
5618 			abs(t->state->id), (int)t->state->c, code, col,
5619 			pim_info(&t->pim));
5620 	    }
5621 #endif
5622 
5623 	    /*
5624 	     * Handle the possible codes of the current state.
5625 	     * The most important is NFA_MATCH.
5626 	     */
5627 	    add_state = NULL;
5628 	    add_here = FALSE;
5629 	    add_count = 0;
5630 	    switch (t->state->c)
5631 	    {
5632 	    case NFA_MATCH:
5633 	      {
5634 #ifdef FEAT_MBYTE
5635 		/* If the match ends before a composing characters and
5636 		 * ireg_icombine is not set, that is not really a match. */
5637 		if (enc_utf8 && !ireg_icombine && utf_iscomposing(curc))
5638 		    break;
5639 #endif
5640 		nfa_match = TRUE;
5641 		copy_sub(&submatch->norm, &t->subs.norm);
5642 #ifdef FEAT_SYN_HL
5643 		if (nfa_has_zsubexpr)
5644 		    copy_sub(&submatch->synt, &t->subs.synt);
5645 #endif
5646 #ifdef ENABLE_LOG
5647 		log_subsexpr(&t->subs);
5648 #endif
5649 		/* Found the left-most longest match, do not look at any other
5650 		 * states at this position.  When the list of states is going
5651 		 * to be empty quit without advancing, so that "reginput" is
5652 		 * correct. */
5653 		if (nextlist->n == 0)
5654 		    clen = 0;
5655 		goto nextchar;
5656 	      }
5657 
5658 	    case NFA_END_INVISIBLE:
5659 	    case NFA_END_INVISIBLE_NEG:
5660 	    case NFA_END_PATTERN:
5661 		/*
5662 		 * This is only encountered after a NFA_START_INVISIBLE or
5663 		 * NFA_START_INVISIBLE_BEFORE node.
5664 		 * They surround a zero-width group, used with "\@=", "\&",
5665 		 * "\@!", "\@<=" and "\@<!".
5666 		 * If we got here, it means that the current "invisible" group
5667 		 * finished successfully, so return control to the parent
5668 		 * nfa_regmatch().  For a look-behind match only when it ends
5669 		 * in the position in "nfa_endp".
5670 		 * Submatches are stored in *m, and used in the parent call.
5671 		 */
5672 #ifdef ENABLE_LOG
5673 		if (nfa_endp != NULL)
5674 		{
5675 		    if (REG_MULTI)
5676 			fprintf(log_fd, "Current lnum: %d, endp lnum: %d; current col: %d, endp col: %d\n",
5677 				(int)reglnum,
5678 				(int)nfa_endp->se_u.pos.lnum,
5679 				(int)(reginput - regline),
5680 				nfa_endp->se_u.pos.col);
5681 		    else
5682 			fprintf(log_fd, "Current col: %d, endp col: %d\n",
5683 				(int)(reginput - regline),
5684 				(int)(nfa_endp->se_u.ptr - reginput));
5685 		}
5686 #endif
5687 		/* If "nfa_endp" is set it's only a match if it ends at
5688 		 * "nfa_endp" */
5689 		if (nfa_endp != NULL && (REG_MULTI
5690 			? (reglnum != nfa_endp->se_u.pos.lnum
5691 			    || (int)(reginput - regline)
5692 						!= nfa_endp->se_u.pos.col)
5693 			: reginput != nfa_endp->se_u.ptr))
5694 		    break;
5695 
5696 		/* do not set submatches for \@! */
5697 		if (t->state->c != NFA_END_INVISIBLE_NEG)
5698 		{
5699 		    copy_sub(&m->norm, &t->subs.norm);
5700 #ifdef FEAT_SYN_HL
5701 		    if (nfa_has_zsubexpr)
5702 			copy_sub(&m->synt, &t->subs.synt);
5703 #endif
5704 		}
5705 #ifdef ENABLE_LOG
5706 		fprintf(log_fd, "Match found:\n");
5707 		log_subsexpr(m);
5708 #endif
5709 		nfa_match = TRUE;
5710 		/* See comment above at "goto nextchar". */
5711 		if (nextlist->n == 0)
5712 		    clen = 0;
5713 		goto nextchar;
5714 
5715 	    case NFA_START_INVISIBLE:
5716 	    case NFA_START_INVISIBLE_FIRST:
5717 	    case NFA_START_INVISIBLE_NEG:
5718 	    case NFA_START_INVISIBLE_NEG_FIRST:
5719 	    case NFA_START_INVISIBLE_BEFORE:
5720 	    case NFA_START_INVISIBLE_BEFORE_FIRST:
5721 	    case NFA_START_INVISIBLE_BEFORE_NEG:
5722 	    case NFA_START_INVISIBLE_BEFORE_NEG_FIRST:
5723 		{
5724 #ifdef ENABLE_LOG
5725 		    fprintf(log_fd, "Failure chance invisible: %d, what follows: %d\n",
5726 			    failure_chance(t->state->out, 0),
5727 			    failure_chance(t->state->out1->out, 0));
5728 #endif
5729 		    /* Do it directly if there already is a PIM or when
5730 		     * nfa_postprocess() detected it will work better. */
5731 		    if (t->pim.result != NFA_PIM_UNUSED
5732 			 || t->state->c == NFA_START_INVISIBLE_FIRST
5733 			 || t->state->c == NFA_START_INVISIBLE_NEG_FIRST
5734 			 || t->state->c == NFA_START_INVISIBLE_BEFORE_FIRST
5735 			 || t->state->c == NFA_START_INVISIBLE_BEFORE_NEG_FIRST)
5736 		    {
5737 			int in_use = m->norm.in_use;
5738 
5739 			/* Copy submatch info for the recursive call, opposite
5740 			 * of what happens on success below. */
5741 			copy_sub_off(&m->norm, &t->subs.norm);
5742 #ifdef FEAT_SYN_HL
5743 			if (nfa_has_zsubexpr)
5744 			    copy_sub_off(&m->synt, &t->subs.synt);
5745 #endif
5746 
5747 			/*
5748 			 * First try matching the invisible match, then what
5749 			 * follows.
5750 			 */
5751 			result = recursive_regmatch(t->state, NULL, prog,
5752 						       submatch, m, &listids);
5753 			if (result == NFA_TOO_EXPENSIVE)
5754 			{
5755 			    nfa_match = result;
5756 			    goto theend;
5757 			}
5758 
5759 			/* for \@! and \@<! it is a match when the result is
5760 			 * FALSE */
5761 			if (result != (t->state->c == NFA_START_INVISIBLE_NEG
5762 			       || t->state->c == NFA_START_INVISIBLE_NEG_FIRST
5763 			       || t->state->c
5764 					   == NFA_START_INVISIBLE_BEFORE_NEG
5765 			       || t->state->c
5766 				     == NFA_START_INVISIBLE_BEFORE_NEG_FIRST))
5767 			{
5768 			    /* Copy submatch info from the recursive call */
5769 			    copy_sub_off(&t->subs.norm, &m->norm);
5770 #ifdef FEAT_SYN_HL
5771 			    if (nfa_has_zsubexpr)
5772 				copy_sub_off(&t->subs.synt, &m->synt);
5773 #endif
5774 			    /* If the pattern has \ze and it matched in the
5775 			     * sub pattern, use it. */
5776 			    copy_ze_off(&t->subs.norm, &m->norm);
5777 
5778 			    /* t->state->out1 is the corresponding
5779 			     * END_INVISIBLE node; Add its out to the current
5780 			     * list (zero-width match). */
5781 			    add_here = TRUE;
5782 			    add_state = t->state->out1->out;
5783 			}
5784 			m->norm.in_use = in_use;
5785 		    }
5786 		    else
5787 		    {
5788 			nfa_pim_T pim;
5789 
5790 			/*
5791 			 * First try matching what follows.  Only if a match
5792 			 * is found verify the invisible match matches.  Add a
5793 			 * nfa_pim_T to the following states, it contains info
5794 			 * about the invisible match.
5795 			 */
5796 			pim.state = t->state;
5797 			pim.result = NFA_PIM_TODO;
5798 			pim.subs.norm.in_use = 0;
5799 #ifdef FEAT_SYN_HL
5800 			pim.subs.synt.in_use = 0;
5801 #endif
5802 			if (REG_MULTI)
5803 			{
5804 			    pim.end.pos.col = (int)(reginput - regline);
5805 			    pim.end.pos.lnum = reglnum;
5806 			}
5807 			else
5808 			    pim.end.ptr = reginput;
5809 
5810 			/* t->state->out1 is the corresponding END_INVISIBLE
5811 			 * node; Add its out to the current list (zero-width
5812 			 * match). */
5813 			addstate_here(thislist, t->state->out1->out, &t->subs,
5814 							       &pim, &listidx);
5815 		    }
5816 		}
5817 		break;
5818 
5819 	    case NFA_START_PATTERN:
5820 	      {
5821 		nfa_state_T *skip = NULL;
5822 #ifdef ENABLE_LOG
5823 		int	    skip_lid = 0;
5824 #endif
5825 
5826 		/* There is no point in trying to match the pattern if the
5827 		 * output state is not going to be added to the list. */
5828 		if (state_in_list(nextlist, t->state->out1->out, &t->subs))
5829 		{
5830 		    skip = t->state->out1->out;
5831 #ifdef ENABLE_LOG
5832 		    skip_lid = nextlist->id;
5833 #endif
5834 		}
5835 		else if (state_in_list(nextlist,
5836 					  t->state->out1->out->out, &t->subs))
5837 		{
5838 		    skip = t->state->out1->out->out;
5839 #ifdef ENABLE_LOG
5840 		    skip_lid = nextlist->id;
5841 #endif
5842 		}
5843 		else if (state_in_list(thislist,
5844 					  t->state->out1->out->out, &t->subs))
5845 		{
5846 		    skip = t->state->out1->out->out;
5847 #ifdef ENABLE_LOG
5848 		    skip_lid = thislist->id;
5849 #endif
5850 		}
5851 		if (skip != NULL)
5852 		{
5853 #ifdef ENABLE_LOG
5854 		    nfa_set_code(skip->c);
5855 		    fprintf(log_fd, "> Not trying to match pattern, output state %d is already in list %d. char %d: %s\n",
5856 			    abs(skip->id), skip_lid, skip->c, code);
5857 #endif
5858 		    break;
5859 		}
5860 		/* Copy submatch info to the recursive call, opposite of what
5861 		 * happens afterwards. */
5862 		copy_sub_off(&m->norm, &t->subs.norm);
5863 #ifdef FEAT_SYN_HL
5864 		if (nfa_has_zsubexpr)
5865 		    copy_sub_off(&m->synt, &t->subs.synt);
5866 #endif
5867 
5868 		/* First try matching the pattern. */
5869 		result = recursive_regmatch(t->state, NULL, prog,
5870 						       submatch, m, &listids);
5871 		if (result == NFA_TOO_EXPENSIVE)
5872 		{
5873 		    nfa_match = result;
5874 		    goto theend;
5875 		}
5876 		if (result)
5877 		{
5878 		    int bytelen;
5879 
5880 #ifdef ENABLE_LOG
5881 		    fprintf(log_fd, "NFA_START_PATTERN matches:\n");
5882 		    log_subsexpr(m);
5883 #endif
5884 		    /* Copy submatch info from the recursive call */
5885 		    copy_sub_off(&t->subs.norm, &m->norm);
5886 #ifdef FEAT_SYN_HL
5887 		    if (nfa_has_zsubexpr)
5888 			copy_sub_off(&t->subs.synt, &m->synt);
5889 #endif
5890 		    /* Now we need to skip over the matched text and then
5891 		     * continue with what follows. */
5892 		    if (REG_MULTI)
5893 			/* TODO: multi-line match */
5894 			bytelen = m->norm.list.multi[0].end_col
5895 						  - (int)(reginput - regline);
5896 		    else
5897 			bytelen = (int)(m->norm.list.line[0].end - reginput);
5898 
5899 #ifdef ENABLE_LOG
5900 		    fprintf(log_fd, "NFA_START_PATTERN length: %d\n", bytelen);
5901 #endif
5902 		    if (bytelen == 0)
5903 		    {
5904 			/* empty match, output of corresponding
5905 			 * NFA_END_PATTERN/NFA_SKIP to be used at current
5906 			 * position */
5907 			add_here = TRUE;
5908 			add_state = t->state->out1->out->out;
5909 		    }
5910 		    else if (bytelen <= clen)
5911 		    {
5912 			/* match current character, output of corresponding
5913 			 * NFA_END_PATTERN to be used at next position. */
5914 			add_state = t->state->out1->out->out;
5915 			add_off = clen;
5916 		    }
5917 		    else
5918 		    {
5919 			/* skip over the matched characters, set character
5920 			 * count in NFA_SKIP */
5921 			add_state = t->state->out1->out;
5922 			add_off = bytelen;
5923 			add_count = bytelen - clen;
5924 		    }
5925 		}
5926 		break;
5927 	      }
5928 
5929 	    case NFA_BOL:
5930 		if (reginput == regline)
5931 		{
5932 		    add_here = TRUE;
5933 		    add_state = t->state->out;
5934 		}
5935 		break;
5936 
5937 	    case NFA_EOL:
5938 		if (curc == NUL)
5939 		{
5940 		    add_here = TRUE;
5941 		    add_state = t->state->out;
5942 		}
5943 		break;
5944 
5945 	    case NFA_BOW:
5946 		result = TRUE;
5947 
5948 		if (curc == NUL)
5949 		    result = FALSE;
5950 #ifdef FEAT_MBYTE
5951 		else if (has_mbyte)
5952 		{
5953 		    int this_class;
5954 
5955 		    /* Get class of current and previous char (if it exists). */
5956 		    this_class = mb_get_class_buf(reginput, reg_buf);
5957 		    if (this_class <= 1)
5958 			result = FALSE;
5959 		    else if (reg_prev_class() == this_class)
5960 			result = FALSE;
5961 		}
5962 #endif
5963 		else if (!vim_iswordc_buf(curc, reg_buf)
5964 			   || (reginput > regline
5965 				   && vim_iswordc_buf(reginput[-1], reg_buf)))
5966 		    result = FALSE;
5967 		if (result)
5968 		{
5969 		    add_here = TRUE;
5970 		    add_state = t->state->out;
5971 		}
5972 		break;
5973 
5974 	    case NFA_EOW:
5975 		result = TRUE;
5976 		if (reginput == regline)
5977 		    result = FALSE;
5978 #ifdef FEAT_MBYTE
5979 		else if (has_mbyte)
5980 		{
5981 		    int this_class, prev_class;
5982 
5983 		    /* Get class of current and previous char (if it exists). */
5984 		    this_class = mb_get_class_buf(reginput, reg_buf);
5985 		    prev_class = reg_prev_class();
5986 		    if (this_class == prev_class
5987 					|| prev_class == 0 || prev_class == 1)
5988 			result = FALSE;
5989 		}
5990 #endif
5991 		else if (!vim_iswordc_buf(reginput[-1], reg_buf)
5992 			|| (reginput[0] != NUL
5993 					   && vim_iswordc_buf(curc, reg_buf)))
5994 		    result = FALSE;
5995 		if (result)
5996 		{
5997 		    add_here = TRUE;
5998 		    add_state = t->state->out;
5999 		}
6000 		break;
6001 
6002 	    case NFA_BOF:
6003 		if (reglnum == 0 && reginput == regline
6004 					&& (!REG_MULTI || reg_firstlnum == 1))
6005 		{
6006 		    add_here = TRUE;
6007 		    add_state = t->state->out;
6008 		}
6009 		break;
6010 
6011 	    case NFA_EOF:
6012 		if (reglnum == reg_maxline && curc == NUL)
6013 		{
6014 		    add_here = TRUE;
6015 		    add_state = t->state->out;
6016 		}
6017 		break;
6018 
6019 #ifdef FEAT_MBYTE
6020 	    case NFA_COMPOSING:
6021 	    {
6022 		int	    mc = curc;
6023 		int	    len = 0;
6024 		nfa_state_T *end;
6025 		nfa_state_T *sta;
6026 		int	    cchars[MAX_MCO];
6027 		int	    ccount = 0;
6028 		int	    j;
6029 
6030 		sta = t->state->out;
6031 		len = 0;
6032 		if (utf_iscomposing(sta->c))
6033 		{
6034 		    /* Only match composing character(s), ignore base
6035 		     * character.  Used for ".{composing}" and "{composing}"
6036 		     * (no preceding character). */
6037 		    len += mb_char2len(mc);
6038 		}
6039 		if (ireg_icombine && len == 0)
6040 		{
6041 		    /* If \Z was present, then ignore composing characters.
6042 		     * When ignoring the base character this always matches. */
6043 		    if (len == 0 && sta->c != curc)
6044 			result = FAIL;
6045 		    else
6046 			result = OK;
6047 		    while (sta->c != NFA_END_COMPOSING)
6048 			sta = sta->out;
6049 		}
6050 
6051 		/* Check base character matches first, unless ignored. */
6052 		else if (len > 0 || mc == sta->c)
6053 		{
6054 		    if (len == 0)
6055 		    {
6056 			len += mb_char2len(mc);
6057 			sta = sta->out;
6058 		    }
6059 
6060 		    /* We don't care about the order of composing characters.
6061 		     * Get them into cchars[] first. */
6062 		    while (len < clen)
6063 		    {
6064 			mc = mb_ptr2char(reginput + len);
6065 			cchars[ccount++] = mc;
6066 			len += mb_char2len(mc);
6067 			if (ccount == MAX_MCO)
6068 			    break;
6069 		    }
6070 
6071 		    /* Check that each composing char in the pattern matches a
6072 		     * composing char in the text.  We do not check if all
6073 		     * composing chars are matched. */
6074 		    result = OK;
6075 		    while (sta->c != NFA_END_COMPOSING)
6076 		    {
6077 			for (j = 0; j < ccount; ++j)
6078 			    if (cchars[j] == sta->c)
6079 				break;
6080 			if (j == ccount)
6081 			{
6082 			    result = FAIL;
6083 			    break;
6084 			}
6085 			sta = sta->out;
6086 		    }
6087 		}
6088 		else
6089 		    result = FAIL;
6090 
6091 		end = t->state->out1;	    /* NFA_END_COMPOSING */
6092 		ADD_STATE_IF_MATCH(end);
6093 		break;
6094 	    }
6095 #endif
6096 
6097 	    case NFA_NEWL:
6098 		if (curc == NUL && !reg_line_lbr && REG_MULTI
6099 						    && reglnum <= reg_maxline)
6100 		{
6101 		    go_to_nextline = TRUE;
6102 		    /* Pass -1 for the offset, which means taking the position
6103 		     * at the start of the next line. */
6104 		    add_state = t->state->out;
6105 		    add_off = -1;
6106 		}
6107 		else if (curc == '\n' && reg_line_lbr)
6108 		{
6109 		    /* match \n as if it is an ordinary character */
6110 		    add_state = t->state->out;
6111 		    add_off = 1;
6112 		}
6113 		break;
6114 
6115 	    case NFA_START_COLL:
6116 	    case NFA_START_NEG_COLL:
6117 	      {
6118 		/* What follows is a list of characters, until NFA_END_COLL.
6119 		 * One of them must match or none of them must match. */
6120 		nfa_state_T	*state;
6121 		int		result_if_matched;
6122 		int		c1, c2;
6123 
6124 		/* Never match EOL. If it's part of the collection it is added
6125 		 * as a separate state with an OR. */
6126 		if (curc == NUL)
6127 		    break;
6128 
6129 		state = t->state->out;
6130 		result_if_matched = (t->state->c == NFA_START_COLL);
6131 		for (;;)
6132 		{
6133 		    if (state->c == NFA_END_COLL)
6134 		    {
6135 			result = !result_if_matched;
6136 			break;
6137 		    }
6138 		    if (state->c == NFA_RANGE_MIN)
6139 		    {
6140 			c1 = state->val;
6141 			state = state->out; /* advance to NFA_RANGE_MAX */
6142 			c2 = state->val;
6143 #ifdef ENABLE_LOG
6144 			fprintf(log_fd, "NFA_RANGE_MIN curc=%d c1=%d c2=%d\n",
6145 				curc, c1, c2);
6146 #endif
6147 			if (curc >= c1 && curc <= c2)
6148 			{
6149 			    result = result_if_matched;
6150 			    break;
6151 			}
6152 			if (ireg_ic)
6153 			{
6154 			    int curc_low = MB_TOLOWER(curc);
6155 			    int done = FALSE;
6156 
6157 			    for ( ; c1 <= c2; ++c1)
6158 				if (MB_TOLOWER(c1) == curc_low)
6159 				{
6160 				    result = result_if_matched;
6161 				    done = TRUE;
6162 				    break;
6163 				}
6164 			    if (done)
6165 				break;
6166 			}
6167 		    }
6168 		    else if (state->c < 0 ? check_char_class(state->c, curc)
6169 			        : (curc == state->c
6170 				   || (ireg_ic && MB_TOLOWER(curc)
6171 						    == MB_TOLOWER(state->c))))
6172 		    {
6173 			result = result_if_matched;
6174 			break;
6175 		    }
6176 		    state = state->out;
6177 		}
6178 		if (result)
6179 		{
6180 		    /* next state is in out of the NFA_END_COLL, out1 of
6181 		     * START points to the END state */
6182 		    add_state = t->state->out1->out;
6183 		    add_off = clen;
6184 		}
6185 		break;
6186 	      }
6187 
6188 	    case NFA_ANY:
6189 		/* Any char except '\0', (end of input) does not match. */
6190 		if (curc > 0)
6191 		{
6192 		    add_state = t->state->out;
6193 		    add_off = clen;
6194 		}
6195 		break;
6196 
6197 	    case NFA_ANY_COMPOSING:
6198 		/* On a composing character skip over it.  Otherwise do
6199 		 * nothing.  Always matches. */
6200 #ifdef FEAT_MBYTE
6201 		if (enc_utf8 && utf_iscomposing(curc))
6202 		{
6203 		    add_off = clen;
6204 		}
6205 		else
6206 #endif
6207 		{
6208 		    add_here = TRUE;
6209 		    add_off = 0;
6210 		}
6211 		add_state = t->state->out;
6212 		break;
6213 
6214 	    /*
6215 	     * Character classes like \a for alpha, \d for digit etc.
6216 	     */
6217 	    case NFA_IDENT:	/*  \i	*/
6218 		result = vim_isIDc(curc);
6219 		ADD_STATE_IF_MATCH(t->state);
6220 		break;
6221 
6222 	    case NFA_SIDENT:	/*  \I	*/
6223 		result = !VIM_ISDIGIT(curc) && vim_isIDc(curc);
6224 		ADD_STATE_IF_MATCH(t->state);
6225 		break;
6226 
6227 	    case NFA_KWORD:	/*  \k	*/
6228 		result = vim_iswordp_buf(reginput, reg_buf);
6229 		ADD_STATE_IF_MATCH(t->state);
6230 		break;
6231 
6232 	    case NFA_SKWORD:	/*  \K	*/
6233 		result = !VIM_ISDIGIT(curc)
6234 					&& vim_iswordp_buf(reginput, reg_buf);
6235 		ADD_STATE_IF_MATCH(t->state);
6236 		break;
6237 
6238 	    case NFA_FNAME:	/*  \f	*/
6239 		result = vim_isfilec(curc);
6240 		ADD_STATE_IF_MATCH(t->state);
6241 		break;
6242 
6243 	    case NFA_SFNAME:	/*  \F	*/
6244 		result = !VIM_ISDIGIT(curc) && vim_isfilec(curc);
6245 		ADD_STATE_IF_MATCH(t->state);
6246 		break;
6247 
6248 	    case NFA_PRINT:	/*  \p	*/
6249 		result = vim_isprintc(PTR2CHAR(reginput));
6250 		ADD_STATE_IF_MATCH(t->state);
6251 		break;
6252 
6253 	    case NFA_SPRINT:	/*  \P	*/
6254 		result = !VIM_ISDIGIT(curc) && vim_isprintc(PTR2CHAR(reginput));
6255 		ADD_STATE_IF_MATCH(t->state);
6256 		break;
6257 
6258 	    case NFA_WHITE:	/*  \s	*/
6259 		result = vim_iswhite(curc);
6260 		ADD_STATE_IF_MATCH(t->state);
6261 		break;
6262 
6263 	    case NFA_NWHITE:	/*  \S	*/
6264 		result = curc != NUL && !vim_iswhite(curc);
6265 		ADD_STATE_IF_MATCH(t->state);
6266 		break;
6267 
6268 	    case NFA_DIGIT:	/*  \d	*/
6269 		result = ri_digit(curc);
6270 		ADD_STATE_IF_MATCH(t->state);
6271 		break;
6272 
6273 	    case NFA_NDIGIT:	/*  \D	*/
6274 		result = curc != NUL && !ri_digit(curc);
6275 		ADD_STATE_IF_MATCH(t->state);
6276 		break;
6277 
6278 	    case NFA_HEX:	/*  \x	*/
6279 		result = ri_hex(curc);
6280 		ADD_STATE_IF_MATCH(t->state);
6281 		break;
6282 
6283 	    case NFA_NHEX:	/*  \X	*/
6284 		result = curc != NUL && !ri_hex(curc);
6285 		ADD_STATE_IF_MATCH(t->state);
6286 		break;
6287 
6288 	    case NFA_OCTAL:	/*  \o	*/
6289 		result = ri_octal(curc);
6290 		ADD_STATE_IF_MATCH(t->state);
6291 		break;
6292 
6293 	    case NFA_NOCTAL:	/*  \O	*/
6294 		result = curc != NUL && !ri_octal(curc);
6295 		ADD_STATE_IF_MATCH(t->state);
6296 		break;
6297 
6298 	    case NFA_WORD:	/*  \w	*/
6299 		result = ri_word(curc);
6300 		ADD_STATE_IF_MATCH(t->state);
6301 		break;
6302 
6303 	    case NFA_NWORD:	/*  \W	*/
6304 		result = curc != NUL && !ri_word(curc);
6305 		ADD_STATE_IF_MATCH(t->state);
6306 		break;
6307 
6308 	    case NFA_HEAD:	/*  \h	*/
6309 		result = ri_head(curc);
6310 		ADD_STATE_IF_MATCH(t->state);
6311 		break;
6312 
6313 	    case NFA_NHEAD:	/*  \H	*/
6314 		result = curc != NUL && !ri_head(curc);
6315 		ADD_STATE_IF_MATCH(t->state);
6316 		break;
6317 
6318 	    case NFA_ALPHA:	/*  \a	*/
6319 		result = ri_alpha(curc);
6320 		ADD_STATE_IF_MATCH(t->state);
6321 		break;
6322 
6323 	    case NFA_NALPHA:	/*  \A	*/
6324 		result = curc != NUL && !ri_alpha(curc);
6325 		ADD_STATE_IF_MATCH(t->state);
6326 		break;
6327 
6328 	    case NFA_LOWER:	/*  \l	*/
6329 		result = ri_lower(curc);
6330 		ADD_STATE_IF_MATCH(t->state);
6331 		break;
6332 
6333 	    case NFA_NLOWER:	/*  \L	*/
6334 		result = curc != NUL && !ri_lower(curc);
6335 		ADD_STATE_IF_MATCH(t->state);
6336 		break;
6337 
6338 	    case NFA_UPPER:	/*  \u	*/
6339 		result = ri_upper(curc);
6340 		ADD_STATE_IF_MATCH(t->state);
6341 		break;
6342 
6343 	    case NFA_NUPPER:	/* \U	*/
6344 		result = curc != NUL && !ri_upper(curc);
6345 		ADD_STATE_IF_MATCH(t->state);
6346 		break;
6347 
6348 	    case NFA_LOWER_IC:	/* [a-z] */
6349 		result = ri_lower(curc) || (ireg_ic && ri_upper(curc));
6350 		ADD_STATE_IF_MATCH(t->state);
6351 		break;
6352 
6353 	    case NFA_NLOWER_IC:	/* [^a-z] */
6354 		result = curc != NUL
6355 			  && !(ri_lower(curc) || (ireg_ic && ri_upper(curc)));
6356 		ADD_STATE_IF_MATCH(t->state);
6357 		break;
6358 
6359 	    case NFA_UPPER_IC:	/* [A-Z] */
6360 		result = ri_upper(curc) || (ireg_ic && ri_lower(curc));
6361 		ADD_STATE_IF_MATCH(t->state);
6362 		break;
6363 
6364 	    case NFA_NUPPER_IC:	/* ^[A-Z] */
6365 		result = curc != NUL
6366 			  && !(ri_upper(curc) || (ireg_ic && ri_lower(curc)));
6367 		ADD_STATE_IF_MATCH(t->state);
6368 		break;
6369 
6370 	    case NFA_BACKREF1:
6371 	    case NFA_BACKREF2:
6372 	    case NFA_BACKREF3:
6373 	    case NFA_BACKREF4:
6374 	    case NFA_BACKREF5:
6375 	    case NFA_BACKREF6:
6376 	    case NFA_BACKREF7:
6377 	    case NFA_BACKREF8:
6378 	    case NFA_BACKREF9:
6379 #ifdef FEAT_SYN_HL
6380 	    case NFA_ZREF1:
6381 	    case NFA_ZREF2:
6382 	    case NFA_ZREF3:
6383 	    case NFA_ZREF4:
6384 	    case NFA_ZREF5:
6385 	    case NFA_ZREF6:
6386 	    case NFA_ZREF7:
6387 	    case NFA_ZREF8:
6388 	    case NFA_ZREF9:
6389 #endif
6390 		/* \1 .. \9  \z1 .. \z9 */
6391 	      {
6392 		int subidx;
6393 		int bytelen;
6394 
6395 		if (t->state->c <= NFA_BACKREF9)
6396 		{
6397 		    subidx = t->state->c - NFA_BACKREF1 + 1;
6398 		    result = match_backref(&t->subs.norm, subidx, &bytelen);
6399 		}
6400 #ifdef FEAT_SYN_HL
6401 		else
6402 		{
6403 		    subidx = t->state->c - NFA_ZREF1 + 1;
6404 		    result = match_zref(subidx, &bytelen);
6405 		}
6406 #endif
6407 
6408 		if (result)
6409 		{
6410 		    if (bytelen == 0)
6411 		    {
6412 			/* empty match always works, output of NFA_SKIP to be
6413 			 * used next */
6414 			add_here = TRUE;
6415 			add_state = t->state->out->out;
6416 		    }
6417 		    else if (bytelen <= clen)
6418 		    {
6419 			/* match current character, jump ahead to out of
6420 			 * NFA_SKIP */
6421 			add_state = t->state->out->out;
6422 			add_off = clen;
6423 		    }
6424 		    else
6425 		    {
6426 			/* skip over the matched characters, set character
6427 			 * count in NFA_SKIP */
6428 			add_state = t->state->out;
6429 			add_off = bytelen;
6430 			add_count = bytelen - clen;
6431 		    }
6432 		}
6433 		break;
6434 	      }
6435 	    case NFA_SKIP:
6436 	      /* character of previous matching \1 .. \9  or \@> */
6437 	      if (t->count - clen <= 0)
6438 	      {
6439 		  /* end of match, go to what follows */
6440 		  add_state = t->state->out;
6441 		  add_off = clen;
6442 	      }
6443 	      else
6444 	      {
6445 		  /* add state again with decremented count */
6446 		  add_state = t->state;
6447 		  add_off = 0;
6448 		  add_count = t->count - clen;
6449 	      }
6450 	      break;
6451 
6452 	    case NFA_LNUM:
6453 	    case NFA_LNUM_GT:
6454 	    case NFA_LNUM_LT:
6455 		result = (REG_MULTI &&
6456 			nfa_re_num_cmp(t->state->val, t->state->c - NFA_LNUM,
6457 			    (long_u)(reglnum + reg_firstlnum)));
6458 		if (result)
6459 		{
6460 		    add_here = TRUE;
6461 		    add_state = t->state->out;
6462 		}
6463 		break;
6464 
6465 	    case NFA_COL:
6466 	    case NFA_COL_GT:
6467 	    case NFA_COL_LT:
6468 		result = nfa_re_num_cmp(t->state->val, t->state->c - NFA_COL,
6469 			(long_u)(reginput - regline) + 1);
6470 		if (result)
6471 		{
6472 		    add_here = TRUE;
6473 		    add_state = t->state->out;
6474 		}
6475 		break;
6476 
6477 	    case NFA_VCOL:
6478 	    case NFA_VCOL_GT:
6479 	    case NFA_VCOL_LT:
6480 		{
6481 		    int     op = t->state->c - NFA_VCOL;
6482 		    colnr_T col = (colnr_T)(reginput - regline);
6483 		    win_T   *wp = reg_win == NULL ? curwin : reg_win;
6484 
6485 		    /* Bail out quickly when there can't be a match, avoid the
6486 		     * overhead of win_linetabsize() on long lines. */
6487 		    if (op != 1 && col > t->state->val
6488 #ifdef FEAT_MBYTE
6489 			    * (has_mbyte ? MB_MAXBYTES : 1)
6490 #endif
6491 			    )
6492 			break;
6493 		    result = FALSE;
6494 		    if (op == 1 && col - 1 > t->state->val && col > 100)
6495 		    {
6496 			int ts = wp->w_buffer->b_p_ts;
6497 
6498 			/* Guess that a character won't use more columns than
6499 			 * 'tabstop', with a minimum of 4. */
6500 			if (ts < 4)
6501 			    ts = 4;
6502 			result = col > t->state->val * ts;
6503 		    }
6504 		    if (!result)
6505 			result = nfa_re_num_cmp(t->state->val, op,
6506 				(long_u)win_linetabsize(wp, regline, col) + 1);
6507 		    if (result)
6508 		    {
6509 			add_here = TRUE;
6510 			add_state = t->state->out;
6511 		    }
6512 		}
6513 		break;
6514 
6515 	    case NFA_MARK:
6516 	    case NFA_MARK_GT:
6517 	    case NFA_MARK_LT:
6518 	      {
6519 		pos_T	*pos = getmark_buf(reg_buf, t->state->val, FALSE);
6520 
6521 		/* Compare the mark position to the match position. */
6522 		result = (pos != NULL		     /* mark doesn't exist */
6523 			&& pos->lnum > 0    /* mark isn't set in reg_buf */
6524 			&& (pos->lnum == reglnum + reg_firstlnum
6525 				? (pos->col == (colnr_T)(reginput - regline)
6526 				    ? t->state->c == NFA_MARK
6527 				    : (pos->col < (colnr_T)(reginput - regline)
6528 					? t->state->c == NFA_MARK_GT
6529 					: t->state->c == NFA_MARK_LT))
6530 				: (pos->lnum < reglnum + reg_firstlnum
6531 				    ? t->state->c == NFA_MARK_GT
6532 				    : t->state->c == NFA_MARK_LT)));
6533 		if (result)
6534 		{
6535 		    add_here = TRUE;
6536 		    add_state = t->state->out;
6537 		}
6538 		break;
6539 	      }
6540 
6541 	    case NFA_CURSOR:
6542 		result = (reg_win != NULL
6543 			&& (reglnum + reg_firstlnum == reg_win->w_cursor.lnum)
6544 			&& ((colnr_T)(reginput - regline)
6545 						   == reg_win->w_cursor.col));
6546 		if (result)
6547 		{
6548 		    add_here = TRUE;
6549 		    add_state = t->state->out;
6550 		}
6551 		break;
6552 
6553 	    case NFA_VISUAL:
6554 		result = reg_match_visual();
6555 		if (result)
6556 		{
6557 		    add_here = TRUE;
6558 		    add_state = t->state->out;
6559 		}
6560 		break;
6561 
6562 	    case NFA_MOPEN1:
6563 	    case NFA_MOPEN2:
6564 	    case NFA_MOPEN3:
6565 	    case NFA_MOPEN4:
6566 	    case NFA_MOPEN5:
6567 	    case NFA_MOPEN6:
6568 	    case NFA_MOPEN7:
6569 	    case NFA_MOPEN8:
6570 	    case NFA_MOPEN9:
6571 #ifdef FEAT_SYN_HL
6572 	    case NFA_ZOPEN:
6573 	    case NFA_ZOPEN1:
6574 	    case NFA_ZOPEN2:
6575 	    case NFA_ZOPEN3:
6576 	    case NFA_ZOPEN4:
6577 	    case NFA_ZOPEN5:
6578 	    case NFA_ZOPEN6:
6579 	    case NFA_ZOPEN7:
6580 	    case NFA_ZOPEN8:
6581 	    case NFA_ZOPEN9:
6582 #endif
6583 	    case NFA_NOPEN:
6584 	    case NFA_ZSTART:
6585 		/* These states are only added to be able to bail out when
6586 		 * they are added again, nothing is to be done. */
6587 		break;
6588 
6589 	    default:	/* regular character */
6590 	      {
6591 		int c = t->state->c;
6592 
6593 #ifdef DEBUG
6594 		if (c < 0)
6595 		    EMSGN("INTERNAL: Negative state char: %ld", c);
6596 #endif
6597 		result = (c == curc);
6598 
6599 		if (!result && ireg_ic)
6600 		    result = MB_TOLOWER(c) == MB_TOLOWER(curc);
6601 #ifdef FEAT_MBYTE
6602 		/* If ireg_icombine is not set only skip over the character
6603 		 * itself.  When it is set skip over composing characters. */
6604 		if (result && enc_utf8 && !ireg_icombine)
6605 		    clen = utf_ptr2len(reginput);
6606 #endif
6607 		ADD_STATE_IF_MATCH(t->state);
6608 		break;
6609 	      }
6610 
6611 	    } /* switch (t->state->c) */
6612 
6613 	    if (add_state != NULL)
6614 	    {
6615 		nfa_pim_T *pim;
6616 		nfa_pim_T pim_copy;
6617 
6618 		if (t->pim.result == NFA_PIM_UNUSED)
6619 		    pim = NULL;
6620 		else
6621 		    pim = &t->pim;
6622 
6623 		/* Handle the postponed invisible match if the match might end
6624 		 * without advancing and before the end of the line. */
6625 		if (pim != NULL && (clen == 0 || match_follows(add_state, 0)))
6626 		{
6627 		    if (pim->result == NFA_PIM_TODO)
6628 		    {
6629 #ifdef ENABLE_LOG
6630 			fprintf(log_fd, "\n");
6631 			fprintf(log_fd, "==================================\n");
6632 			fprintf(log_fd, "Postponed recursive nfa_regmatch()\n");
6633 			fprintf(log_fd, "\n");
6634 #endif
6635 			result = recursive_regmatch(pim->state, pim,
6636 						 prog, submatch, m, &listids);
6637 			pim->result = result ? NFA_PIM_MATCH : NFA_PIM_NOMATCH;
6638 			/* for \@! and \@<! it is a match when the result is
6639 			 * FALSE */
6640 			if (result != (pim->state->c == NFA_START_INVISIBLE_NEG
6641 			     || pim->state->c == NFA_START_INVISIBLE_NEG_FIRST
6642 			     || pim->state->c
6643 					   == NFA_START_INVISIBLE_BEFORE_NEG
6644 			     || pim->state->c
6645 				     == NFA_START_INVISIBLE_BEFORE_NEG_FIRST))
6646 			{
6647 			    /* Copy submatch info from the recursive call */
6648 			    copy_sub_off(&pim->subs.norm, &m->norm);
6649 #ifdef FEAT_SYN_HL
6650 			    if (nfa_has_zsubexpr)
6651 				copy_sub_off(&pim->subs.synt, &m->synt);
6652 #endif
6653 			}
6654 		    }
6655 		    else
6656 		    {
6657 			result = (pim->result == NFA_PIM_MATCH);
6658 #ifdef ENABLE_LOG
6659 			fprintf(log_fd, "\n");
6660 			fprintf(log_fd, "Using previous recursive nfa_regmatch() result, result == %d\n", pim->result);
6661 			fprintf(log_fd, "MATCH = %s\n", result == TRUE ? "OK" : "FALSE");
6662 			fprintf(log_fd, "\n");
6663 #endif
6664 		    }
6665 
6666 		    /* for \@! and \@<! it is a match when result is FALSE */
6667 		    if (result != (pim->state->c == NFA_START_INVISIBLE_NEG
6668 			     || pim->state->c == NFA_START_INVISIBLE_NEG_FIRST
6669 			     || pim->state->c
6670 					   == NFA_START_INVISIBLE_BEFORE_NEG
6671 			     || pim->state->c
6672 				     == NFA_START_INVISIBLE_BEFORE_NEG_FIRST))
6673 		    {
6674 			/* Copy submatch info from the recursive call */
6675 			copy_sub_off(&t->subs.norm, &pim->subs.norm);
6676 #ifdef FEAT_SYN_HL
6677 			if (nfa_has_zsubexpr)
6678 			    copy_sub_off(&t->subs.synt, &pim->subs.synt);
6679 #endif
6680 		    }
6681 		    else
6682 			/* look-behind match failed, don't add the state */
6683 			continue;
6684 
6685 		    /* Postponed invisible match was handled, don't add it to
6686 		     * following states. */
6687 		    pim = NULL;
6688 		}
6689 
6690 		/* If "pim" points into l->t it will become invalid when
6691 		 * adding the state causes the list to be reallocated.  Make a
6692 		 * local copy to avoid that. */
6693 		if (pim == &t->pim)
6694 		{
6695 		    copy_pim(&pim_copy, pim);
6696 		    pim = &pim_copy;
6697 		}
6698 
6699 		if (add_here)
6700 		    addstate_here(thislist, add_state, &t->subs, pim, &listidx);
6701 		else
6702 		{
6703 		    addstate(nextlist, add_state, &t->subs, pim, add_off);
6704 		    if (add_count > 0)
6705 			nextlist->t[nextlist->n - 1].count = add_count;
6706 		}
6707 	    }
6708 
6709 	} /* for (thislist = thislist; thislist->state; thislist++) */
6710 
6711 	/* Look for the start of a match in the current position by adding the
6712 	 * start state to the list of states.
6713 	 * The first found match is the leftmost one, thus the order of states
6714 	 * matters!
6715 	 * Do not add the start state in recursive calls of nfa_regmatch(),
6716 	 * because recursive calls should only start in the first position.
6717 	 * Unless "nfa_endp" is not NULL, then we match the end position.
6718 	 * Also don't start a match past the first line. */
6719 	if (nfa_match == FALSE
6720 		&& ((toplevel
6721 			&& reglnum == 0
6722 			&& clen != 0
6723 			&& (ireg_maxcol == 0
6724 			    || (colnr_T)(reginput - regline) < ireg_maxcol))
6725 		    || (nfa_endp != NULL
6726 			&& (REG_MULTI
6727 			    ? (reglnum < nfa_endp->se_u.pos.lnum
6728 			       || (reglnum == nfa_endp->se_u.pos.lnum
6729 			           && (int)(reginput - regline)
6730 						    < nfa_endp->se_u.pos.col))
6731 			    : reginput < nfa_endp->se_u.ptr))))
6732 	{
6733 #ifdef ENABLE_LOG
6734 	    fprintf(log_fd, "(---) STARTSTATE\n");
6735 #endif
6736 	    /* Inline optimized code for addstate() if we know the state is
6737 	     * the first MOPEN. */
6738 	    if (toplevel)
6739 	    {
6740 		int add = TRUE;
6741 		int c;
6742 
6743 		if (prog->regstart != NUL && clen != 0)
6744 		{
6745 		    if (nextlist->n == 0)
6746 		    {
6747 			colnr_T col = (colnr_T)(reginput - regline) + clen;
6748 
6749 			/* Nextlist is empty, we can skip ahead to the
6750 			 * character that must appear at the start. */
6751 			if (skip_to_start(prog->regstart, &col) == FAIL)
6752 			    break;
6753 #ifdef ENABLE_LOG
6754 			fprintf(log_fd, "  Skipping ahead %d bytes to regstart\n",
6755 				col - ((colnr_T)(reginput - regline) + clen));
6756 #endif
6757 			reginput = regline + col - clen;
6758 		    }
6759 		    else
6760 		    {
6761 			/* Checking if the required start character matches is
6762 			 * cheaper than adding a state that won't match. */
6763 			c = PTR2CHAR(reginput + clen);
6764 			if (c != prog->regstart && (!ireg_ic || MB_TOLOWER(c)
6765 					       != MB_TOLOWER(prog->regstart)))
6766 			{
6767 #ifdef ENABLE_LOG
6768 			    fprintf(log_fd, "  Skipping start state, regstart does not match\n");
6769 #endif
6770 			    add = FALSE;
6771 			}
6772 		    }
6773 		}
6774 
6775 		if (add)
6776 		{
6777 		    if (REG_MULTI)
6778 			m->norm.list.multi[0].start_col =
6779 					 (colnr_T)(reginput - regline) + clen;
6780 		    else
6781 			m->norm.list.line[0].start = reginput + clen;
6782 		    addstate(nextlist, start->out, m, NULL, clen);
6783 		}
6784 	    }
6785 	    else
6786 		addstate(nextlist, start, m, NULL, clen);
6787 	}
6788 
6789 #ifdef ENABLE_LOG
6790 	fprintf(log_fd, ">>> Thislist had %d states available: ", thislist->n);
6791 	{
6792 	    int i;
6793 
6794 	    for (i = 0; i < thislist->n; i++)
6795 		fprintf(log_fd, "%d  ", abs(thislist->t[i].state->id));
6796 	}
6797 	fprintf(log_fd, "\n");
6798 #endif
6799 
6800 nextchar:
6801 	/* Advance to the next character, or advance to the next line, or
6802 	 * finish. */
6803 	if (clen != 0)
6804 	    reginput += clen;
6805 	else if (go_to_nextline || (nfa_endp != NULL && REG_MULTI
6806 					&& reglnum < nfa_endp->se_u.pos.lnum))
6807 	    reg_nextline();
6808 	else
6809 	    break;
6810 
6811 	/* Allow interrupting with CTRL-C. */
6812 	line_breakcheck();
6813 	if (got_int)
6814 	    break;
6815 #ifdef FEAT_RELTIME
6816 	/* Check for timeout once in a twenty times to avoid overhead. */
6817 	if (nfa_time_limit != NULL && ++nfa_time_count == 20)
6818 	{
6819 	    nfa_time_count = 0;
6820 	    if (profile_passed_limit(nfa_time_limit))
6821 		break;
6822 	}
6823 #endif
6824     }
6825 
6826 #ifdef ENABLE_LOG
6827     if (log_fd != stderr)
6828 	fclose(log_fd);
6829     log_fd = NULL;
6830 #endif
6831 
6832 theend:
6833     /* Free memory */
6834     vim_free(list[0].t);
6835     vim_free(list[1].t);
6836     vim_free(listids);
6837 #undef ADD_STATE_IF_MATCH
6838 #ifdef NFA_REGEXP_DEBUG_LOG
6839     fclose(debug);
6840 #endif
6841 
6842     return nfa_match;
6843 }
6844 
6845 /*
6846  * Try match of "prog" with at regline["col"].
6847  * Returns <= 0 for failure, number of lines contained in the match otherwise.
6848  */
6849     static long
6850 nfa_regtry(prog, col, tm)
6851     nfa_regprog_T   *prog;
6852     colnr_T	    col;
6853     proftime_T	    *tm UNUSED;	/* timeout limit or NULL */
6854 {
6855     int		i;
6856     regsubs_T	subs, m;
6857     nfa_state_T	*start = prog->start;
6858     int		result;
6859 #ifdef ENABLE_LOG
6860     FILE	*f;
6861 #endif
6862 
6863     reginput = regline + col;
6864 #ifdef FEAT_RELTIME
6865     nfa_time_limit = tm;
6866     nfa_time_count = 0;
6867 #endif
6868 
6869 #ifdef ENABLE_LOG
6870     f = fopen(NFA_REGEXP_RUN_LOG, "a");
6871     if (f != NULL)
6872     {
6873 	fprintf(f, "\n\n\t=======================================================\n");
6874 #ifdef DEBUG
6875 	fprintf(f, "\tRegexp is \"%s\"\n", nfa_regengine.expr);
6876 #endif
6877 	fprintf(f, "\tInput text is \"%s\" \n", reginput);
6878 	fprintf(f, "\t=======================================================\n\n");
6879 	nfa_print_state(f, start);
6880 	fprintf(f, "\n\n");
6881 	fclose(f);
6882     }
6883     else
6884 	EMSG(_("Could not open temporary log file for writing "));
6885 #endif
6886 
6887     clear_sub(&subs.norm);
6888     clear_sub(&m.norm);
6889 #ifdef FEAT_SYN_HL
6890     clear_sub(&subs.synt);
6891     clear_sub(&m.synt);
6892 #endif
6893 
6894     result = nfa_regmatch(prog, start, &subs, &m);
6895     if (result == FALSE)
6896 	return 0;
6897     else if (result == NFA_TOO_EXPENSIVE)
6898 	return result;
6899 
6900     cleanup_subexpr();
6901     if (REG_MULTI)
6902     {
6903 	for (i = 0; i < subs.norm.in_use; i++)
6904 	{
6905 	    reg_startpos[i].lnum = subs.norm.list.multi[i].start_lnum;
6906 	    reg_startpos[i].col = subs.norm.list.multi[i].start_col;
6907 
6908 	    reg_endpos[i].lnum = subs.norm.list.multi[i].end_lnum;
6909 	    reg_endpos[i].col = subs.norm.list.multi[i].end_col;
6910 	}
6911 
6912 	if (reg_startpos[0].lnum < 0)
6913 	{
6914 	    reg_startpos[0].lnum = 0;
6915 	    reg_startpos[0].col = col;
6916 	}
6917 	if (reg_endpos[0].lnum < 0)
6918 	{
6919 	    /* pattern has a \ze but it didn't match, use current end */
6920 	    reg_endpos[0].lnum = reglnum;
6921 	    reg_endpos[0].col = (int)(reginput - regline);
6922 	}
6923 	else
6924 	    /* Use line number of "\ze". */
6925 	    reglnum = reg_endpos[0].lnum;
6926     }
6927     else
6928     {
6929 	for (i = 0; i < subs.norm.in_use; i++)
6930 	{
6931 	    reg_startp[i] = subs.norm.list.line[i].start;
6932 	    reg_endp[i] = subs.norm.list.line[i].end;
6933 	}
6934 
6935 	if (reg_startp[0] == NULL)
6936 	    reg_startp[0] = regline + col;
6937 	if (reg_endp[0] == NULL)
6938 	    reg_endp[0] = reginput;
6939     }
6940 
6941 #ifdef FEAT_SYN_HL
6942     /* Package any found \z(...\) matches for export. Default is none. */
6943     unref_extmatch(re_extmatch_out);
6944     re_extmatch_out = NULL;
6945 
6946     if (prog->reghasz == REX_SET)
6947     {
6948 	cleanup_zsubexpr();
6949 	re_extmatch_out = make_extmatch();
6950 	/* Loop over \z1, \z2, etc.  There is no \z0. */
6951 	for (i = 1; i < subs.synt.in_use; i++)
6952 	{
6953 	    if (REG_MULTI)
6954 	    {
6955 		struct multipos *mpos = &subs.synt.list.multi[i];
6956 
6957 		/* Only accept single line matches that are valid. */
6958 		if (mpos->start_lnum >= 0
6959 			&& mpos->start_lnum == mpos->end_lnum
6960 			&& mpos->end_col >= mpos->start_col)
6961 		    re_extmatch_out->matches[i] =
6962 			vim_strnsave(reg_getline(mpos->start_lnum)
6963 							    + mpos->start_col,
6964 					     mpos->end_col - mpos->start_col);
6965 	    }
6966 	    else
6967 	    {
6968 		struct linepos *lpos = &subs.synt.list.line[i];
6969 
6970 		if (lpos->start != NULL && lpos->end != NULL)
6971 		    re_extmatch_out->matches[i] =
6972 			    vim_strnsave(lpos->start,
6973 					      (int)(lpos->end - lpos->start));
6974 	    }
6975 	}
6976     }
6977 #endif
6978 
6979     return 1 + reglnum;
6980 }
6981 
6982 /*
6983  * Match a regexp against a string ("line" points to the string) or multiple
6984  * lines ("line" is NULL, use reg_getline()).
6985  *
6986  * Returns <= 0 for failure, number of lines contained in the match otherwise.
6987  */
6988     static long
6989 nfa_regexec_both(line, startcol, tm)
6990     char_u	*line;
6991     colnr_T	startcol;	/* column to start looking for match */
6992     proftime_T	*tm;		/* timeout limit or NULL */
6993 {
6994     nfa_regprog_T   *prog;
6995     long	    retval = 0L;
6996     int		    i;
6997     colnr_T	    col = startcol;
6998 
6999     if (REG_MULTI)
7000     {
7001 	prog = (nfa_regprog_T *)reg_mmatch->regprog;
7002 	line = reg_getline((linenr_T)0);    /* relative to the cursor */
7003 	reg_startpos = reg_mmatch->startpos;
7004 	reg_endpos = reg_mmatch->endpos;
7005     }
7006     else
7007     {
7008 	prog = (nfa_regprog_T *)reg_match->regprog;
7009 	reg_startp = reg_match->startp;
7010 	reg_endp = reg_match->endp;
7011     }
7012 
7013     /* Be paranoid... */
7014     if (prog == NULL || line == NULL)
7015     {
7016 	EMSG(_(e_null));
7017 	goto theend;
7018     }
7019 
7020     /* If pattern contains "\c" or "\C": overrule value of ireg_ic */
7021     if (prog->regflags & RF_ICASE)
7022 	ireg_ic = TRUE;
7023     else if (prog->regflags & RF_NOICASE)
7024 	ireg_ic = FALSE;
7025 
7026 #ifdef FEAT_MBYTE
7027     /* If pattern contains "\Z" overrule value of ireg_icombine */
7028     if (prog->regflags & RF_ICOMBINE)
7029 	ireg_icombine = TRUE;
7030 #endif
7031 
7032     regline = line;
7033     reglnum = 0;    /* relative to line */
7034 
7035     nfa_has_zend = prog->has_zend;
7036     nfa_has_backref = prog->has_backref;
7037     nfa_nsubexpr = prog->nsubexp;
7038     nfa_listid = 1;
7039     nfa_alt_listid = 2;
7040     nfa_regengine.expr = prog->pattern;
7041 
7042     if (prog->reganch && col > 0)
7043 	return 0L;
7044 
7045     need_clear_subexpr = TRUE;
7046 #ifdef FEAT_SYN_HL
7047     /* Clear the external match subpointers if necessary. */
7048     if (prog->reghasz == REX_SET)
7049     {
7050 	nfa_has_zsubexpr = TRUE;
7051 	need_clear_zsubexpr = TRUE;
7052     }
7053     else
7054 	nfa_has_zsubexpr = FALSE;
7055 #endif
7056 
7057     if (prog->regstart != NUL)
7058     {
7059 	/* Skip ahead until a character we know the match must start with.
7060 	 * When there is none there is no match. */
7061 	if (skip_to_start(prog->regstart, &col) == FAIL)
7062 	    return 0L;
7063 
7064 	/* If match_text is set it contains the full text that must match.
7065 	 * Nothing else to try. Doesn't handle combining chars well. */
7066 	if (prog->match_text != NULL
7067 #ifdef FEAT_MBYTE
7068 		    && !ireg_icombine
7069 #endif
7070 		)
7071 	    return find_match_text(col, prog->regstart, prog->match_text);
7072     }
7073 
7074     /* If the start column is past the maximum column: no need to try. */
7075     if (ireg_maxcol > 0 && col >= ireg_maxcol)
7076 	goto theend;
7077 
7078     nstate = prog->nstate;
7079     for (i = 0; i < nstate; ++i)
7080     {
7081 	prog->state[i].id = i;
7082 	prog->state[i].lastlist[0] = 0;
7083 	prog->state[i].lastlist[1] = 0;
7084     }
7085 
7086     retval = nfa_regtry(prog, col, tm);
7087 
7088     nfa_regengine.expr = NULL;
7089 
7090 theend:
7091     return retval;
7092 }
7093 
7094 /*
7095  * Compile a regular expression into internal code for the NFA matcher.
7096  * Returns the program in allocated space.  Returns NULL for an error.
7097  */
7098     static regprog_T *
7099 nfa_regcomp(expr, re_flags)
7100     char_u	*expr;
7101     int		re_flags;
7102 {
7103     nfa_regprog_T	*prog = NULL;
7104     size_t		prog_size;
7105     int			*postfix;
7106 
7107     if (expr == NULL)
7108 	return NULL;
7109 
7110     nfa_regengine.expr = expr;
7111     nfa_re_flags = re_flags;
7112 
7113     init_class_tab();
7114 
7115     if (nfa_regcomp_start(expr, re_flags) == FAIL)
7116 	return NULL;
7117 
7118     /* Build postfix form of the regexp. Needed to build the NFA
7119      * (and count its size). */
7120     postfix = re2post();
7121     if (postfix == NULL)
7122     {
7123 	/* TODO: only give this error for debugging? */
7124 	if (post_ptr >= post_end)
7125 	    EMSGN("Internal error: estimated max number of states insufficient: %ld", post_end - post_start);
7126 	goto fail;	    /* Cascaded (syntax?) error */
7127     }
7128 
7129     /*
7130      * In order to build the NFA, we parse the input regexp twice:
7131      * 1. first pass to count size (so we can allocate space)
7132      * 2. second to emit code
7133      */
7134 #ifdef ENABLE_LOG
7135     {
7136 	FILE *f = fopen(NFA_REGEXP_RUN_LOG, "a");
7137 
7138 	if (f != NULL)
7139 	{
7140 	    fprintf(f, "\n*****************************\n\n\n\n\tCompiling regexp \"%s\" ... hold on !\n", expr);
7141 	    fclose(f);
7142 	}
7143     }
7144 #endif
7145 
7146     /*
7147      * PASS 1
7148      * Count number of NFA states in "nstate". Do not build the NFA.
7149      */
7150     post2nfa(postfix, post_ptr, TRUE);
7151 
7152     /* allocate the regprog with space for the compiled regexp */
7153     prog_size = sizeof(nfa_regprog_T) + sizeof(nfa_state_T) * (nstate - 1);
7154     prog = (nfa_regprog_T *)lalloc(prog_size, TRUE);
7155     if (prog == NULL)
7156 	goto fail;
7157     state_ptr = prog->state;
7158 
7159     /*
7160      * PASS 2
7161      * Build the NFA
7162      */
7163     prog->start = post2nfa(postfix, post_ptr, FALSE);
7164     if (prog->start == NULL)
7165 	goto fail;
7166 
7167     prog->regflags = regflags;
7168     prog->engine = &nfa_regengine;
7169     prog->nstate = nstate;
7170     prog->has_zend = nfa_has_zend;
7171     prog->has_backref = nfa_has_backref;
7172     prog->nsubexp = regnpar;
7173 
7174     nfa_postprocess(prog);
7175 
7176     prog->reganch = nfa_get_reganch(prog->start, 0);
7177     prog->regstart = nfa_get_regstart(prog->start, 0);
7178     prog->match_text = nfa_get_match_text(prog->start);
7179 
7180 #ifdef ENABLE_LOG
7181     nfa_postfix_dump(expr, OK);
7182     nfa_dump(prog);
7183 #endif
7184 #ifdef FEAT_SYN_HL
7185     /* Remember whether this pattern has any \z specials in it. */
7186     prog->reghasz = re_has_z;
7187 #endif
7188     prog->pattern = vim_strsave(expr);
7189     nfa_regengine.expr = NULL;
7190 
7191 out:
7192     vim_free(post_start);
7193     post_start = post_ptr = post_end = NULL;
7194     state_ptr = NULL;
7195     return (regprog_T *)prog;
7196 
7197 fail:
7198     vim_free(prog);
7199     prog = NULL;
7200 #ifdef ENABLE_LOG
7201     nfa_postfix_dump(expr, FAIL);
7202 #endif
7203     nfa_regengine.expr = NULL;
7204     goto out;
7205 }
7206 
7207 /*
7208  * Free a compiled regexp program, returned by nfa_regcomp().
7209  */
7210     static void
7211 nfa_regfree(prog)
7212     regprog_T   *prog;
7213 {
7214     if (prog != NULL)
7215     {
7216 	vim_free(((nfa_regprog_T *)prog)->match_text);
7217 	vim_free(((nfa_regprog_T *)prog)->pattern);
7218 	vim_free(prog);
7219     }
7220 }
7221 
7222 /*
7223  * Match a regexp against a string.
7224  * "rmp->regprog" is a compiled regexp as returned by nfa_regcomp().
7225  * Uses curbuf for line count and 'iskeyword'.
7226  * If "line_lbr" is TRUE consider a "\n" in "line" to be a line break.
7227  *
7228  * Returns <= 0 for failure, number of lines contained in the match otherwise.
7229  */
7230     static int
7231 nfa_regexec_nl(rmp, line, col, line_lbr)
7232     regmatch_T	*rmp;
7233     char_u	*line;	/* string to match against */
7234     colnr_T	col;	/* column to start looking for match */
7235     int		line_lbr;
7236 {
7237     reg_match = rmp;
7238     reg_mmatch = NULL;
7239     reg_maxline = 0;
7240     reg_line_lbr = line_lbr;
7241     reg_buf = curbuf;
7242     reg_win = NULL;
7243     ireg_ic = rmp->rm_ic;
7244 #ifdef FEAT_MBYTE
7245     ireg_icombine = FALSE;
7246 #endif
7247     ireg_maxcol = 0;
7248     return nfa_regexec_both(line, col, NULL);
7249 }
7250 
7251 
7252 /*
7253  * Match a regexp against multiple lines.
7254  * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
7255  * Uses curbuf for line count and 'iskeyword'.
7256  *
7257  * Return <= 0 if there is no match.  Return number of lines contained in the
7258  * match otherwise.
7259  *
7260  * Note: the body is the same as bt_regexec() except for nfa_regexec_both()
7261  *
7262  * ! Also NOTE : match may actually be in another line. e.g.:
7263  * when r.e. is \nc, cursor is at 'a' and the text buffer looks like
7264  *
7265  * +-------------------------+
7266  * |a                        |
7267  * |b                        |
7268  * |c                        |
7269  * |                         |
7270  * +-------------------------+
7271  *
7272  * then nfa_regexec_multi() returns 3. while the original
7273  * vim_regexec_multi() returns 0 and a second call at line 2 will return 2.
7274  *
7275  * FIXME if this behavior is not compatible.
7276  */
7277     static long
7278 nfa_regexec_multi(rmp, win, buf, lnum, col, tm)
7279     regmmatch_T	*rmp;
7280     win_T	*win;		/* window in which to search or NULL */
7281     buf_T	*buf;		/* buffer in which to search */
7282     linenr_T	lnum;		/* nr of line to start looking for match */
7283     colnr_T	col;		/* column to start looking for match */
7284     proftime_T	*tm;		/* timeout limit or NULL */
7285 {
7286     reg_match = NULL;
7287     reg_mmatch = rmp;
7288     reg_buf = buf;
7289     reg_win = win;
7290     reg_firstlnum = lnum;
7291     reg_maxline = reg_buf->b_ml.ml_line_count - lnum;
7292     reg_line_lbr = FALSE;
7293     ireg_ic = rmp->rmm_ic;
7294 #ifdef FEAT_MBYTE
7295     ireg_icombine = FALSE;
7296 #endif
7297     ireg_maxcol = rmp->rmm_maxcol;
7298 
7299     return nfa_regexec_both(NULL, col, tm);
7300 }
7301 
7302 #ifdef DEBUG
7303 # undef ENABLE_LOG
7304 #endif
7305