xref: /vim-8.2.3635/src/regexp.c (revision 2bf24176)
1 /* vi:set ts=8 sts=4 sw=4:
2  *
3  * Handling of regular expressions: vim_regcomp(), vim_regexec(), vim_regsub()
4  *
5  * NOTICE:
6  *
7  * This is NOT the original regular expression code as written by Henry
8  * Spencer.  This code has been modified specifically for use with the VIM
9  * editor, and should not be used separately from Vim.  If you want a good
10  * regular expression library, get the original code.  The copyright notice
11  * that follows is from the original.
12  *
13  * END NOTICE
14  *
15  *	Copyright (c) 1986 by University of Toronto.
16  *	Written by Henry Spencer.  Not derived from licensed software.
17  *
18  *	Permission is granted to anyone to use this software for any
19  *	purpose on any computer system, and to redistribute it freely,
20  *	subject to the following restrictions:
21  *
22  *	1. The author is not responsible for the consequences of use of
23  *		this software, no matter how awful, even if they arise
24  *		from defects in it.
25  *
26  *	2. The origin of this software must not be misrepresented, either
27  *		by explicit claim or by omission.
28  *
29  *	3. Altered versions must be plainly marked as such, and must not
30  *		be misrepresented as being the original software.
31  *
32  * Beware that some of this code is subtly aware of the way operator
33  * precedence is structured in regular expressions.  Serious changes in
34  * regular-expression syntax might require a total rethink.
35  *
36  * Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
37  * Webb, Ciaran McCreesh and Bram Moolenaar.
38  * Named character class support added by Walter Briscoe (1998 Jul 01)
39  */
40 
41 /* Uncomment the first if you do not want to see debugging logs or files
42  * related to regular expressions, even when compiling with -DDEBUG.
43  * Uncomment the second to get the regexp debugging. */
44 /* #undef DEBUG */
45 /* #define DEBUG */
46 
47 #include "vim.h"
48 
49 #ifdef DEBUG
50 /* show/save debugging data when BT engine is used */
51 # define BT_REGEXP_DUMP
52 /* save the debugging data to a file instead of displaying it */
53 # define BT_REGEXP_LOG
54 # define BT_REGEXP_DEBUG_LOG
55 # define BT_REGEXP_DEBUG_LOG_NAME	"bt_regexp_debug.log"
56 #endif
57 
58 /*
59  * The "internal use only" fields in regexp.h are present to pass info from
60  * compile to execute that permits the execute phase to run lots faster on
61  * simple cases.  They are:
62  *
63  * regstart	char that must begin a match; NUL if none obvious; Can be a
64  *		multi-byte character.
65  * reganch	is the match anchored (at beginning-of-line only)?
66  * regmust	string (pointer into program) that match must include, or NULL
67  * regmlen	length of regmust string
68  * regflags	RF_ values or'ed together
69  *
70  * Regstart and reganch permit very fast decisions on suitable starting points
71  * for a match, cutting down the work a lot.  Regmust permits fast rejection
72  * of lines that cannot possibly match.  The regmust tests are costly enough
73  * that vim_regcomp() supplies a regmust only if the r.e. contains something
74  * potentially expensive (at present, the only such thing detected is * or +
75  * at the start of the r.e., which can involve a lot of backup).  Regmlen is
76  * supplied because the test in vim_regexec() needs it and vim_regcomp() is
77  * computing it anyway.
78  */
79 
80 /*
81  * Structure for regexp "program".  This is essentially a linear encoding
82  * of a nondeterministic finite-state machine (aka syntax charts or
83  * "railroad normal form" in parsing technology).  Each node is an opcode
84  * plus a "next" pointer, possibly plus an operand.  "Next" pointers of
85  * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
86  * pointer with a BRANCH on both ends of it is connecting two alternatives.
87  * (Here we have one of the subtle syntax dependencies:	an individual BRANCH
88  * (as opposed to a collection of them) is never concatenated with anything
89  * because of operator precedence).  The "next" pointer of a BRACES_COMPLEX
90  * node points to the node after the stuff to be repeated.
91  * The operand of some types of node is a literal string; for others, it is a
92  * node leading into a sub-FSM.  In particular, the operand of a BRANCH node
93  * is the first node of the branch.
94  * (NB this is *not* a tree structure: the tail of the branch connects to the
95  * thing following the set of BRANCHes.)
96  *
97  * pattern	is coded like:
98  *
99  *			  +-----------------+
100  *			  |		    V
101  * <aa>\|<bb>	BRANCH <aa> BRANCH <bb> --> END
102  *		     |	    ^	 |	    ^
103  *		     +------+	 +----------+
104  *
105  *
106  *		       +------------------+
107  *		       V		  |
108  * <aa>*	BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
109  *		     |	    |		    ^			   ^
110  *		     |	    +---------------+			   |
111  *		     +---------------------------------------------+
112  *
113  *
114  *		       +----------------------+
115  *		       V		      |
116  * <aa>\+	BRANCH <aa> --> BRANCH --> BACK  BRANCH --> NOTHING --> END
117  *		     |		     |		 ^			^
118  *		     |		     +-----------+			|
119  *		     +--------------------------------------------------+
120  *
121  *
122  *					+-------------------------+
123  *					V			  |
124  * <aa>\{}	BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK  END
125  *		     |				    |		     ^
126  *		     |				    +----------------+
127  *		     +-----------------------------------------------+
128  *
129  *
130  * <aa>\@!<bb>	BRANCH NOMATCH <aa> --> END  <bb> --> END
131  *		     |	     |		      ^       ^
132  *		     |	     +----------------+       |
133  *		     +--------------------------------+
134  *
135  *						      +---------+
136  *						      |		V
137  * \z[abc]	BRANCH BRANCH  a  BRANCH  b  BRANCH  c	BRANCH	NOTHING --> END
138  *		     |	    |	       |	  |	^		    ^
139  *		     |	    |	       |	  +-----+		    |
140  *		     |	    |	       +----------------+		    |
141  *		     |	    +---------------------------+		    |
142  *		     +------------------------------------------------------+
143  *
144  * They all start with a BRANCH for "\|" alternatives, even when there is only
145  * one alternative.
146  */
147 
148 /*
149  * The opcodes are:
150  */
151 
152 /* definition	number		   opnd?    meaning */
153 #define END		0	/*	End of program or NOMATCH operand. */
154 #define BOL		1	/*	Match "" at beginning of line. */
155 #define EOL		2	/*	Match "" at end of line. */
156 #define BRANCH		3	/* node Match this alternative, or the
157 				 *	next... */
158 #define BACK		4	/*	Match "", "next" ptr points backward. */
159 #define EXACTLY		5	/* str	Match this string. */
160 #define NOTHING		6	/*	Match empty string. */
161 #define STAR		7	/* node Match this (simple) thing 0 or more
162 				 *	times. */
163 #define PLUS		8	/* node Match this (simple) thing 1 or more
164 				 *	times. */
165 #define MATCH		9	/* node match the operand zero-width */
166 #define NOMATCH		10	/* node check for no match with operand */
167 #define BEHIND		11	/* node look behind for a match with operand */
168 #define NOBEHIND	12	/* node look behind for no match with operand */
169 #define SUBPAT		13	/* node match the operand here */
170 #define BRACE_SIMPLE	14	/* node Match this (simple) thing between m and
171 				 *	n times (\{m,n\}). */
172 #define BOW		15	/*	Match "" after [^a-zA-Z0-9_] */
173 #define EOW		16	/*	Match "" at    [^a-zA-Z0-9_] */
174 #define BRACE_LIMITS	17	/* nr nr  define the min & max for BRACE_SIMPLE
175 				 *	and BRACE_COMPLEX. */
176 #define NEWL		18	/*	Match line-break */
177 #define BHPOS		19	/*	End position for BEHIND or NOBEHIND */
178 
179 
180 /* character classes: 20-48 normal, 50-78 include a line-break */
181 #define ADD_NL		30
182 #define FIRST_NL	ANY + ADD_NL
183 #define ANY		20	/*	Match any one character. */
184 #define ANYOF		21	/* str	Match any character in this string. */
185 #define ANYBUT		22	/* str	Match any character not in this
186 				 *	string. */
187 #define IDENT		23	/*	Match identifier char */
188 #define SIDENT		24	/*	Match identifier char but no digit */
189 #define KWORD		25	/*	Match keyword char */
190 #define SKWORD		26	/*	Match word char but no digit */
191 #define FNAME		27	/*	Match file name char */
192 #define SFNAME		28	/*	Match file name char but no digit */
193 #define PRINT		29	/*	Match printable char */
194 #define SPRINT		30	/*	Match printable char but no digit */
195 #define WHITE		31	/*	Match whitespace char */
196 #define NWHITE		32	/*	Match non-whitespace char */
197 #define DIGIT		33	/*	Match digit char */
198 #define NDIGIT		34	/*	Match non-digit char */
199 #define HEX		35	/*	Match hex char */
200 #define NHEX		36	/*	Match non-hex char */
201 #define OCTAL		37	/*	Match octal char */
202 #define NOCTAL		38	/*	Match non-octal char */
203 #define WORD		39	/*	Match word char */
204 #define NWORD		40	/*	Match non-word char */
205 #define HEAD		41	/*	Match head char */
206 #define NHEAD		42	/*	Match non-head char */
207 #define ALPHA		43	/*	Match alpha char */
208 #define NALPHA		44	/*	Match non-alpha char */
209 #define LOWER		45	/*	Match lowercase char */
210 #define NLOWER		46	/*	Match non-lowercase char */
211 #define UPPER		47	/*	Match uppercase char */
212 #define NUPPER		48	/*	Match non-uppercase char */
213 #define LAST_NL		NUPPER + ADD_NL
214 #define WITH_NL(op)	((op) >= FIRST_NL && (op) <= LAST_NL)
215 
216 #define MOPEN		80  /* -89	 Mark this point in input as start of
217 				 *	 \( subexpr.  MOPEN + 0 marks start of
218 				 *	 match. */
219 #define MCLOSE		90  /* -99	 Analogous to MOPEN.  MCLOSE + 0 marks
220 				 *	 end of match. */
221 #define BACKREF		100 /* -109 node Match same string again \1-\9 */
222 
223 #ifdef FEAT_SYN_HL
224 # define ZOPEN		110 /* -119	 Mark this point in input as start of
225 				 *	 \z( subexpr. */
226 # define ZCLOSE		120 /* -129	 Analogous to ZOPEN. */
227 # define ZREF		130 /* -139 node Match external submatch \z1-\z9 */
228 #endif
229 
230 #define BRACE_COMPLEX	140 /* -149 node Match nodes between m & n times */
231 
232 #define NOPEN		150	/*	Mark this point in input as start of
233 					\%( subexpr. */
234 #define NCLOSE		151	/*	Analogous to NOPEN. */
235 
236 #define MULTIBYTECODE	200	/* mbc	Match one multi-byte character */
237 #define RE_BOF		201	/*	Match "" at beginning of file. */
238 #define RE_EOF		202	/*	Match "" at end of file. */
239 #define CURSOR		203	/*	Match location of cursor. */
240 
241 #define RE_LNUM		204	/* nr cmp  Match line number */
242 #define RE_COL		205	/* nr cmp  Match column number */
243 #define RE_VCOL		206	/* nr cmp  Match virtual column number */
244 
245 #define RE_MARK		207	/* mark cmp  Match mark position */
246 #define RE_VISUAL	208	/*	Match Visual area */
247 #define RE_COMPOSING	209	/* any composing characters */
248 
249 /*
250  * Magic characters have a special meaning, they don't match literally.
251  * Magic characters are negative.  This separates them from literal characters
252  * (possibly multi-byte).  Only ASCII characters can be Magic.
253  */
254 #define Magic(x)	((int)(x) - 256)
255 #define un_Magic(x)	((x) + 256)
256 #define is_Magic(x)	((x) < 0)
257 
258 static int no_Magic __ARGS((int x));
259 static int toggle_Magic __ARGS((int x));
260 
261     static int
262 no_Magic(x)
263     int		x;
264 {
265     if (is_Magic(x))
266 	return un_Magic(x);
267     return x;
268 }
269 
270     static int
271 toggle_Magic(x)
272     int		x;
273 {
274     if (is_Magic(x))
275 	return un_Magic(x);
276     return Magic(x);
277 }
278 
279 /*
280  * The first byte of the regexp internal "program" is actually this magic
281  * number; the start node begins in the second byte.  It's used to catch the
282  * most severe mutilation of the program by the caller.
283  */
284 
285 #define REGMAGIC	0234
286 
287 /*
288  * Opcode notes:
289  *
290  * BRANCH	The set of branches constituting a single choice are hooked
291  *		together with their "next" pointers, since precedence prevents
292  *		anything being concatenated to any individual branch.  The
293  *		"next" pointer of the last BRANCH in a choice points to the
294  *		thing following the whole choice.  This is also where the
295  *		final "next" pointer of each individual branch points; each
296  *		branch starts with the operand node of a BRANCH node.
297  *
298  * BACK		Normal "next" pointers all implicitly point forward; BACK
299  *		exists to make loop structures possible.
300  *
301  * STAR,PLUS	'=', and complex '*' and '+', are implemented as circular
302  *		BRANCH structures using BACK.  Simple cases (one character
303  *		per match) are implemented with STAR and PLUS for speed
304  *		and to minimize recursive plunges.
305  *
306  * BRACE_LIMITS	This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
307  *		node, and defines the min and max limits to be used for that
308  *		node.
309  *
310  * MOPEN,MCLOSE	...are numbered at compile time.
311  * ZOPEN,ZCLOSE	...ditto
312  */
313 
314 /*
315  * A node is one char of opcode followed by two chars of "next" pointer.
316  * "Next" pointers are stored as two 8-bit bytes, high order first.  The
317  * value is a positive offset from the opcode of the node containing it.
318  * An operand, if any, simply follows the node.  (Note that much of the
319  * code generation knows about this implicit relationship.)
320  *
321  * Using two bytes for the "next" pointer is vast overkill for most things,
322  * but allows patterns to get big without disasters.
323  */
324 #define OP(p)		((int)*(p))
325 #define NEXT(p)		(((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377))
326 #define OPERAND(p)	((p) + 3)
327 /* Obtain an operand that was stored as four bytes, MSB first. */
328 #define OPERAND_MIN(p)	(((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
329 			+ ((long)(p)[5] << 8) + (long)(p)[6])
330 /* Obtain a second operand stored as four bytes. */
331 #define OPERAND_MAX(p)	OPERAND_MIN((p) + 4)
332 /* Obtain a second single-byte operand stored after a four bytes operand. */
333 #define OPERAND_CMP(p)	(p)[7]
334 
335 /*
336  * Utility definitions.
337  */
338 #define UCHARAT(p)	((int)*(char_u *)(p))
339 
340 /* Used for an error (down from) vim_regcomp(): give the error message, set
341  * rc_did_emsg and return NULL */
342 #define EMSG_RET_NULL(m) return (EMSG(m), rc_did_emsg = TRUE, (void *)NULL)
343 #define EMSG_RET_FAIL(m) return (EMSG(m), rc_did_emsg = TRUE, FAIL)
344 #define EMSG2_RET_NULL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, (void *)NULL)
345 #define EMSG2_RET_FAIL(m, c) return (EMSG2((m), (c) ? "" : "\\"), rc_did_emsg = TRUE, FAIL)
346 #define EMSG_ONE_RET_NULL EMSG2_RET_NULL(_("E369: invalid item in %s%%[]"), reg_magic == MAGIC_ALL)
347 
348 #define MAX_LIMIT	(32767L << 16L)
349 
350 static int re_multi_type __ARGS((int));
351 static int cstrncmp __ARGS((char_u *s1, char_u *s2, int *n));
352 static char_u *cstrchr __ARGS((char_u *, int));
353 
354 #ifdef BT_REGEXP_DUMP
355 static void	regdump __ARGS((char_u *, bt_regprog_T *));
356 #endif
357 #ifdef DEBUG
358 static char_u	*regprop __ARGS((char_u *));
359 #endif
360 
361 static int re_mult_next __ARGS((char *what));
362 
363 static char_u e_missingbracket[] = N_("E769: Missing ] after %s[");
364 static char_u e_unmatchedpp[] = N_("E53: Unmatched %s%%(");
365 static char_u e_unmatchedp[] = N_("E54: Unmatched %s(");
366 static char_u e_unmatchedpar[] = N_("E55: Unmatched %s)");
367 #ifdef FEAT_SYN_HL
368 static char_u e_z_not_allowed[] = N_("E66: \\z( not allowed here");
369 static char_u e_z1_not_allowed[] = N_("E67: \\z1 et al. not allowed here");
370 #endif
371 static char_u e_missing_sb[] = N_("E69: Missing ] after %s%%[");
372 static char_u e_empty_sb[]  = N_("E70: Empty %s%%[]");
373 #define NOT_MULTI	0
374 #define MULTI_ONE	1
375 #define MULTI_MULT	2
376 /*
377  * Return NOT_MULTI if c is not a "multi" operator.
378  * Return MULTI_ONE if c is a single "multi" operator.
379  * Return MULTI_MULT if c is a multi "multi" operator.
380  */
381     static int
382 re_multi_type(c)
383     int c;
384 {
385     if (c == Magic('@') || c == Magic('=') || c == Magic('?'))
386 	return MULTI_ONE;
387     if (c == Magic('*') || c == Magic('+') || c == Magic('{'))
388 	return MULTI_MULT;
389     return NOT_MULTI;
390 }
391 
392 /*
393  * Flags to be passed up and down.
394  */
395 #define HASWIDTH	0x1	/* Known never to match null string. */
396 #define SIMPLE		0x2	/* Simple enough to be STAR/PLUS operand. */
397 #define SPSTART		0x4	/* Starts with * or +. */
398 #define HASNL		0x8	/* Contains some \n. */
399 #define HASLOOKBH	0x10	/* Contains "\@<=" or "\@<!". */
400 #define WORST		0	/* Worst case. */
401 
402 /*
403  * When regcode is set to this value, code is not emitted and size is computed
404  * instead.
405  */
406 #define JUST_CALC_SIZE	((char_u *) -1)
407 
408 static char_u		*reg_prev_sub = NULL;
409 
410 /*
411  * REGEXP_INRANGE contains all characters which are always special in a []
412  * range after '\'.
413  * REGEXP_ABBR contains all characters which act as abbreviations after '\'.
414  * These are:
415  *  \n	- New line (NL).
416  *  \r	- Carriage Return (CR).
417  *  \t	- Tab (TAB).
418  *  \e	- Escape (ESC).
419  *  \b	- Backspace (Ctrl_H).
420  *  \d  - Character code in decimal, eg \d123
421  *  \o	- Character code in octal, eg \o80
422  *  \x	- Character code in hex, eg \x4a
423  *  \u	- Multibyte character code, eg \u20ac
424  *  \U	- Long multibyte character code, eg \U12345678
425  */
426 static char_u REGEXP_INRANGE[] = "]^-n\\";
427 static char_u REGEXP_ABBR[] = "nrtebdoxuU";
428 
429 static int	backslash_trans __ARGS((int c));
430 static int	get_char_class __ARGS((char_u **pp));
431 static int	get_equi_class __ARGS((char_u **pp));
432 static void	reg_equi_class __ARGS((int c));
433 static int	get_coll_element __ARGS((char_u **pp));
434 static char_u	*skip_anyof __ARGS((char_u *p));
435 static void	init_class_tab __ARGS((void));
436 
437 /*
438  * Translate '\x' to its control character, except "\n", which is Magic.
439  */
440     static int
441 backslash_trans(c)
442     int		c;
443 {
444     switch (c)
445     {
446 	case 'r':   return CAR;
447 	case 't':   return TAB;
448 	case 'e':   return ESC;
449 	case 'b':   return BS;
450     }
451     return c;
452 }
453 
454 /*
455  * Check for a character class name "[:name:]".  "pp" points to the '['.
456  * Returns one of the CLASS_ items. CLASS_NONE means that no item was
457  * recognized.  Otherwise "pp" is advanced to after the item.
458  */
459     static int
460 get_char_class(pp)
461     char_u	**pp;
462 {
463     static const char *(class_names[]) =
464     {
465 	"alnum:]",
466 #define CLASS_ALNUM 0
467 	"alpha:]",
468 #define CLASS_ALPHA 1
469 	"blank:]",
470 #define CLASS_BLANK 2
471 	"cntrl:]",
472 #define CLASS_CNTRL 3
473 	"digit:]",
474 #define CLASS_DIGIT 4
475 	"graph:]",
476 #define CLASS_GRAPH 5
477 	"lower:]",
478 #define CLASS_LOWER 6
479 	"print:]",
480 #define CLASS_PRINT 7
481 	"punct:]",
482 #define CLASS_PUNCT 8
483 	"space:]",
484 #define CLASS_SPACE 9
485 	"upper:]",
486 #define CLASS_UPPER 10
487 	"xdigit:]",
488 #define CLASS_XDIGIT 11
489 	"tab:]",
490 #define CLASS_TAB 12
491 	"return:]",
492 #define CLASS_RETURN 13
493 	"backspace:]",
494 #define CLASS_BACKSPACE 14
495 	"escape:]",
496 #define CLASS_ESCAPE 15
497     };
498 #define CLASS_NONE 99
499     int i;
500 
501     if ((*pp)[1] == ':')
502     {
503 	for (i = 0; i < (int)(sizeof(class_names) / sizeof(*class_names)); ++i)
504 	    if (STRNCMP(*pp + 2, class_names[i], STRLEN(class_names[i])) == 0)
505 	    {
506 		*pp += STRLEN(class_names[i]) + 2;
507 		return i;
508 	    }
509     }
510     return CLASS_NONE;
511 }
512 
513 /*
514  * Specific version of character class functions.
515  * Using a table to keep this fast.
516  */
517 static short	class_tab[256];
518 
519 #define	    RI_DIGIT	0x01
520 #define	    RI_HEX	0x02
521 #define	    RI_OCTAL	0x04
522 #define	    RI_WORD	0x08
523 #define	    RI_HEAD	0x10
524 #define	    RI_ALPHA	0x20
525 #define	    RI_LOWER	0x40
526 #define	    RI_UPPER	0x80
527 #define	    RI_WHITE	0x100
528 
529     static void
530 init_class_tab()
531 {
532     int		i;
533     static int	done = FALSE;
534 
535     if (done)
536 	return;
537 
538     for (i = 0; i < 256; ++i)
539     {
540 	if (i >= '0' && i <= '7')
541 	    class_tab[i] = RI_DIGIT + RI_HEX + RI_OCTAL + RI_WORD;
542 	else if (i >= '8' && i <= '9')
543 	    class_tab[i] = RI_DIGIT + RI_HEX + RI_WORD;
544 	else if (i >= 'a' && i <= 'f')
545 	    class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
546 #ifdef EBCDIC
547 	else if ((i >= 'g' && i <= 'i') || (i >= 'j' && i <= 'r')
548 						    || (i >= 's' && i <= 'z'))
549 #else
550 	else if (i >= 'g' && i <= 'z')
551 #endif
552 	    class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_LOWER;
553 	else if (i >= 'A' && i <= 'F')
554 	    class_tab[i] = RI_HEX + RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
555 #ifdef EBCDIC
556 	else if ((i >= 'G' && i <= 'I') || ( i >= 'J' && i <= 'R')
557 						    || (i >= 'S' && i <= 'Z'))
558 #else
559 	else if (i >= 'G' && i <= 'Z')
560 #endif
561 	    class_tab[i] = RI_WORD + RI_HEAD + RI_ALPHA + RI_UPPER;
562 	else if (i == '_')
563 	    class_tab[i] = RI_WORD + RI_HEAD;
564 	else
565 	    class_tab[i] = 0;
566     }
567     class_tab[' '] |= RI_WHITE;
568     class_tab['\t'] |= RI_WHITE;
569     done = TRUE;
570 }
571 
572 #ifdef FEAT_MBYTE
573 # define ri_digit(c)	(c < 0x100 && (class_tab[c] & RI_DIGIT))
574 # define ri_hex(c)	(c < 0x100 && (class_tab[c] & RI_HEX))
575 # define ri_octal(c)	(c < 0x100 && (class_tab[c] & RI_OCTAL))
576 # define ri_word(c)	(c < 0x100 && (class_tab[c] & RI_WORD))
577 # define ri_head(c)	(c < 0x100 && (class_tab[c] & RI_HEAD))
578 # define ri_alpha(c)	(c < 0x100 && (class_tab[c] & RI_ALPHA))
579 # define ri_lower(c)	(c < 0x100 && (class_tab[c] & RI_LOWER))
580 # define ri_upper(c)	(c < 0x100 && (class_tab[c] & RI_UPPER))
581 # define ri_white(c)	(c < 0x100 && (class_tab[c] & RI_WHITE))
582 #else
583 # define ri_digit(c)	(class_tab[c] & RI_DIGIT)
584 # define ri_hex(c)	(class_tab[c] & RI_HEX)
585 # define ri_octal(c)	(class_tab[c] & RI_OCTAL)
586 # define ri_word(c)	(class_tab[c] & RI_WORD)
587 # define ri_head(c)	(class_tab[c] & RI_HEAD)
588 # define ri_alpha(c)	(class_tab[c] & RI_ALPHA)
589 # define ri_lower(c)	(class_tab[c] & RI_LOWER)
590 # define ri_upper(c)	(class_tab[c] & RI_UPPER)
591 # define ri_white(c)	(class_tab[c] & RI_WHITE)
592 #endif
593 
594 /* flags for regflags */
595 #define RF_ICASE    1	/* ignore case */
596 #define RF_NOICASE  2	/* don't ignore case */
597 #define RF_HASNL    4	/* can match a NL */
598 #define RF_ICOMBINE 8	/* ignore combining characters */
599 #define RF_LOOKBH   16	/* uses "\@<=" or "\@<!" */
600 
601 /*
602  * Global work variables for vim_regcomp().
603  */
604 
605 static char_u	*regparse;	/* Input-scan pointer. */
606 static int	prevchr_len;	/* byte length of previous char */
607 static int	num_complex_braces; /* Complex \{...} count */
608 static int	regnpar;	/* () count. */
609 #ifdef FEAT_SYN_HL
610 static int	regnzpar;	/* \z() count. */
611 static int	re_has_z;	/* \z item detected */
612 #endif
613 static char_u	*regcode;	/* Code-emit pointer, or JUST_CALC_SIZE */
614 static long	regsize;	/* Code size. */
615 static int	reg_toolong;	/* TRUE when offset out of range */
616 static char_u	had_endbrace[NSUBEXP];	/* flags, TRUE if end of () found */
617 static unsigned	regflags;	/* RF_ flags for prog */
618 static long	brace_min[10];	/* Minimums for complex brace repeats */
619 static long	brace_max[10];	/* Maximums for complex brace repeats */
620 static int	brace_count[10]; /* Current counts for complex brace repeats */
621 #if defined(FEAT_SYN_HL) || defined(PROTO)
622 static int	had_eol;	/* TRUE when EOL found by vim_regcomp() */
623 #endif
624 static int	one_exactly = FALSE;	/* only do one char for EXACTLY */
625 
626 static int	reg_magic;	/* magicness of the pattern: */
627 #define MAGIC_NONE	1	/* "\V" very unmagic */
628 #define MAGIC_OFF	2	/* "\M" or 'magic' off */
629 #define MAGIC_ON	3	/* "\m" or 'magic' */
630 #define MAGIC_ALL	4	/* "\v" very magic */
631 
632 static int	reg_string;	/* matching with a string instead of a buffer
633 				   line */
634 static int	reg_strict;	/* "[abc" is illegal */
635 
636 /*
637  * META contains all characters that may be magic, except '^' and '$'.
638  */
639 
640 #ifdef EBCDIC
641 static char_u META[] = "%&()*+.123456789<=>?@ACDFHIKLMOPSUVWX[_acdfhiklmnopsuvwxz{|~";
642 #else
643 /* META[] is used often enough to justify turning it into a table. */
644 static char_u META_flags[] = {
645     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
646     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
647 /*		   %  &     (  )  *  +	      .    */
648     0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
649 /*     1  2  3	4  5  6  7  8  9	<  =  >  ? */
650     0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1,
651 /*  @  A     C	D     F     H  I     K	L  M	 O */
652     1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1,
653 /*  P	     S	   U  V  W  X	  Z  [		 _ */
654     1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1,
655 /*     a     c	d     f     h  i     k	l  m  n  o */
656     0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1,
657 /*  p	     s	   u  v  w  x	  z  {	|     ~    */
658     1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1
659 };
660 #endif
661 
662 static int	curchr;		/* currently parsed character */
663 /* Previous character.  Note: prevchr is sometimes -1 when we are not at the
664  * start, eg in /[ ^I]^ the pattern was never found even if it existed,
665  * because ^ was taken to be magic -- webb */
666 static int	prevchr;
667 static int	prevprevchr;	/* previous-previous character */
668 static int	nextchr;	/* used for ungetchr() */
669 
670 /* arguments for reg() */
671 #define REG_NOPAREN	0	/* toplevel reg() */
672 #define REG_PAREN	1	/* \(\) */
673 #define REG_ZPAREN	2	/* \z(\) */
674 #define REG_NPAREN	3	/* \%(\) */
675 
676 typedef struct
677 {
678      char_u	*regparse;
679      int	prevchr_len;
680      int	curchr;
681      int	prevchr;
682      int	prevprevchr;
683      int	nextchr;
684      int	at_start;
685      int	prev_at_start;
686      int	regnpar;
687 } parse_state_T;
688 
689 /*
690  * Forward declarations for vim_regcomp()'s friends.
691  */
692 static void	initchr __ARGS((char_u *));
693 static void	save_parse_state __ARGS((parse_state_T *ps));
694 static void	restore_parse_state __ARGS((parse_state_T *ps));
695 static int	getchr __ARGS((void));
696 static void	skipchr_keepstart __ARGS((void));
697 static int	peekchr __ARGS((void));
698 static void	skipchr __ARGS((void));
699 static void	ungetchr __ARGS((void));
700 static int	gethexchrs __ARGS((int maxinputlen));
701 static int	getoctchrs __ARGS((void));
702 static int	getdecchrs __ARGS((void));
703 static int	coll_get_char __ARGS((void));
704 static void	regcomp_start __ARGS((char_u *expr, int flags));
705 static char_u	*reg __ARGS((int, int *));
706 static char_u	*regbranch __ARGS((int *flagp));
707 static char_u	*regconcat __ARGS((int *flagp));
708 static char_u	*regpiece __ARGS((int *));
709 static char_u	*regatom __ARGS((int *));
710 static char_u	*regnode __ARGS((int));
711 #ifdef FEAT_MBYTE
712 static int	use_multibytecode __ARGS((int c));
713 #endif
714 static int	prog_magic_wrong __ARGS((void));
715 static char_u	*regnext __ARGS((char_u *));
716 static void	regc __ARGS((int b));
717 #ifdef FEAT_MBYTE
718 static void	regmbc __ARGS((int c));
719 # define REGMBC(x) regmbc(x);
720 # define CASEMBC(x) case x:
721 #else
722 # define regmbc(c) regc(c)
723 # define REGMBC(x)
724 # define CASEMBC(x)
725 #endif
726 static void	reginsert __ARGS((int, char_u *));
727 static void	reginsert_nr __ARGS((int op, long val, char_u *opnd));
728 static void	reginsert_limits __ARGS((int, long, long, char_u *));
729 static char_u	*re_put_long __ARGS((char_u *pr, long_u val));
730 static int	read_limits __ARGS((long *, long *));
731 static void	regtail __ARGS((char_u *, char_u *));
732 static void	regoptail __ARGS((char_u *, char_u *));
733 
734 static regengine_T bt_regengine;
735 static regengine_T nfa_regengine;
736 
737 /*
738  * Return TRUE if compiled regular expression "prog" can match a line break.
739  */
740     int
741 re_multiline(prog)
742     regprog_T *prog;
743 {
744     return (prog->regflags & RF_HASNL);
745 }
746 
747 /*
748  * Return TRUE if compiled regular expression "prog" looks before the start
749  * position (pattern contains "\@<=" or "\@<!").
750  */
751     int
752 re_lookbehind(prog)
753     regprog_T *prog;
754 {
755     return (prog->regflags & RF_LOOKBH);
756 }
757 
758 /*
759  * Check for an equivalence class name "[=a=]".  "pp" points to the '['.
760  * Returns a character representing the class. Zero means that no item was
761  * recognized.  Otherwise "pp" is advanced to after the item.
762  */
763     static int
764 get_equi_class(pp)
765     char_u	**pp;
766 {
767     int		c;
768     int		l = 1;
769     char_u	*p = *pp;
770 
771     if (p[1] == '=')
772     {
773 #ifdef FEAT_MBYTE
774 	if (has_mbyte)
775 	    l = (*mb_ptr2len)(p + 2);
776 #endif
777 	if (p[l + 2] == '=' && p[l + 3] == ']')
778 	{
779 #ifdef FEAT_MBYTE
780 	    if (has_mbyte)
781 		c = mb_ptr2char(p + 2);
782 	    else
783 #endif
784 		c = p[2];
785 	    *pp += l + 4;
786 	    return c;
787 	}
788     }
789     return 0;
790 }
791 
792 #ifdef EBCDIC
793 /*
794  * Table for equivalence class "c". (IBM-1047)
795  */
796 char *EQUIVAL_CLASS_C[16] = {
797     "A\x62\x63\x64\x65\x66\x67",
798     "C\x68",
799     "E\x71\x72\x73\x74",
800     "I\x75\x76\x77\x78",
801     "N\x69",
802     "O\xEB\xEC\xED\xEE\xEF",
803     "U\xFB\xFC\xFD\xFE",
804     "Y\xBA",
805     "a\x42\x43\x44\x45\x46\x47",
806     "c\x48",
807     "e\x51\x52\x53\x54",
808     "i\x55\x56\x57\x58",
809     "n\x49",
810     "o\xCB\xCC\xCD\xCE\xCF",
811     "u\xDB\xDC\xDD\xDE",
812     "y\x8D\xDF",
813 };
814 #endif
815 
816 /*
817  * Produce the bytes for equivalence class "c".
818  * Currently only handles latin1, latin9 and utf-8.
819  * NOTE: When changing this function, also change nfa_emit_equi_class()
820  */
821     static void
822 reg_equi_class(c)
823     int	    c;
824 {
825 #ifdef FEAT_MBYTE
826     if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
827 					 || STRCMP(p_enc, "iso-8859-15") == 0)
828 #endif
829     {
830 #ifdef EBCDIC
831 	int i;
832 
833 	/* This might be slower than switch/case below. */
834 	for (i = 0; i < 16; i++)
835 	{
836 	    if (vim_strchr(EQUIVAL_CLASS_C[i], c) != NULL)
837 	    {
838 		char *p = EQUIVAL_CLASS_C[i];
839 
840 		while (*p != 0)
841 		    regmbc(*p++);
842 		return;
843 	    }
844 	}
845 #else
846 	switch (c)
847 	{
848 	    /* Do not use '\300' style, it results in a negative number. */
849 	    case 'A': case 0xc0: case 0xc1: case 0xc2:
850 	    case 0xc3: case 0xc4: case 0xc5:
851 	    CASEMBC(0x100) CASEMBC(0x102) CASEMBC(0x104) CASEMBC(0x1cd)
852 	    CASEMBC(0x1de) CASEMBC(0x1e0) CASEMBC(0x1ea2)
853 		      regmbc('A'); regmbc(0xc0); regmbc(0xc1);
854 		      regmbc(0xc2); regmbc(0xc3); regmbc(0xc4);
855 		      regmbc(0xc5);
856 		      REGMBC(0x100) REGMBC(0x102) REGMBC(0x104)
857 		      REGMBC(0x1cd) REGMBC(0x1de) REGMBC(0x1e0)
858 		      REGMBC(0x1ea2)
859 		      return;
860 	    case 'B': CASEMBC(0x1e02) CASEMBC(0x1e06)
861 		      regmbc('B'); REGMBC(0x1e02) REGMBC(0x1e06)
862 		      return;
863 	    case 'C': case 0xc7:
864 	    CASEMBC(0x106) CASEMBC(0x108) CASEMBC(0x10a) CASEMBC(0x10c)
865 		      regmbc('C'); regmbc(0xc7);
866 		      REGMBC(0x106) REGMBC(0x108) REGMBC(0x10a)
867 		      REGMBC(0x10c)
868 		      return;
869 	    case 'D': CASEMBC(0x10e) CASEMBC(0x110) CASEMBC(0x1e0a)
870 	    CASEMBC(0x1e0e) CASEMBC(0x1e10)
871 		      regmbc('D'); REGMBC(0x10e) REGMBC(0x110)
872 		      REGMBC(0x1e0a) REGMBC(0x1e0e) REGMBC(0x1e10)
873 		      return;
874 	    case 'E': case 0xc8: case 0xc9: case 0xca: case 0xcb:
875 	    CASEMBC(0x112) CASEMBC(0x114) CASEMBC(0x116) CASEMBC(0x118)
876 	    CASEMBC(0x11a) CASEMBC(0x1eba) CASEMBC(0x1ebc)
877 		      regmbc('E'); regmbc(0xc8); regmbc(0xc9);
878 		      regmbc(0xca); regmbc(0xcb);
879 		      REGMBC(0x112) REGMBC(0x114) REGMBC(0x116)
880 		      REGMBC(0x118) REGMBC(0x11a) REGMBC(0x1eba)
881 		      REGMBC(0x1ebc)
882 		      return;
883 	    case 'F': CASEMBC(0x1e1e)
884 		      regmbc('F'); REGMBC(0x1e1e)
885 		      return;
886 	    case 'G': CASEMBC(0x11c) CASEMBC(0x11e) CASEMBC(0x120)
887 	    CASEMBC(0x122) CASEMBC(0x1e4) CASEMBC(0x1e6) CASEMBC(0x1f4)
888 	    CASEMBC(0x1e20)
889 		      regmbc('G'); REGMBC(0x11c) REGMBC(0x11e)
890 		      REGMBC(0x120) REGMBC(0x122) REGMBC(0x1e4)
891 		      REGMBC(0x1e6) REGMBC(0x1f4) REGMBC(0x1e20)
892 		      return;
893 	    case 'H': CASEMBC(0x124) CASEMBC(0x126) CASEMBC(0x1e22)
894 	    CASEMBC(0x1e26) CASEMBC(0x1e28)
895 		      regmbc('H'); REGMBC(0x124) REGMBC(0x126)
896 		      REGMBC(0x1e22) REGMBC(0x1e26) REGMBC(0x1e28)
897 		      return;
898 	    case 'I': case 0xcc: case 0xcd: case 0xce: case 0xcf:
899 	    CASEMBC(0x128) CASEMBC(0x12a) CASEMBC(0x12c) CASEMBC(0x12e)
900 	    CASEMBC(0x130) CASEMBC(0x1cf) CASEMBC(0x1ec8)
901 		      regmbc('I'); regmbc(0xcc); regmbc(0xcd);
902 		      regmbc(0xce); regmbc(0xcf);
903 		      REGMBC(0x128) REGMBC(0x12a) REGMBC(0x12c)
904 		      REGMBC(0x12e) REGMBC(0x130) REGMBC(0x1cf)
905 		      REGMBC(0x1ec8)
906 		      return;
907 	    case 'J': CASEMBC(0x134)
908 		      regmbc('J'); REGMBC(0x134)
909 		      return;
910 	    case 'K': CASEMBC(0x136) CASEMBC(0x1e8) CASEMBC(0x1e30)
911 	    CASEMBC(0x1e34)
912 		      regmbc('K'); REGMBC(0x136) REGMBC(0x1e8)
913 		      REGMBC(0x1e30) REGMBC(0x1e34)
914 		      return;
915 	    case 'L': CASEMBC(0x139) CASEMBC(0x13b) CASEMBC(0x13d)
916 	    CASEMBC(0x13f) CASEMBC(0x141) CASEMBC(0x1e3a)
917 		      regmbc('L'); REGMBC(0x139) REGMBC(0x13b)
918 		      REGMBC(0x13d) REGMBC(0x13f) REGMBC(0x141)
919 		      REGMBC(0x1e3a)
920 		      return;
921 	    case 'M': CASEMBC(0x1e3e) CASEMBC(0x1e40)
922 		      regmbc('M'); REGMBC(0x1e3e) REGMBC(0x1e40)
923 		      return;
924 	    case 'N': case 0xd1:
925 	    CASEMBC(0x143) CASEMBC(0x145) CASEMBC(0x147) CASEMBC(0x1e44)
926 	    CASEMBC(0x1e48)
927 		      regmbc('N'); regmbc(0xd1);
928 		      REGMBC(0x143) REGMBC(0x145) REGMBC(0x147)
929 		      REGMBC(0x1e44) REGMBC(0x1e48)
930 		      return;
931 	    case 'O': case 0xd2: case 0xd3: case 0xd4: case 0xd5:
932 	    case 0xd6: case 0xd8:
933 	    CASEMBC(0x14c) CASEMBC(0x14e) CASEMBC(0x150) CASEMBC(0x1a0)
934 	    CASEMBC(0x1d1) CASEMBC(0x1ea) CASEMBC(0x1ec) CASEMBC(0x1ece)
935 		      regmbc('O'); regmbc(0xd2); regmbc(0xd3);
936 		      regmbc(0xd4); regmbc(0xd5); regmbc(0xd6);
937 		      regmbc(0xd8);
938 		      REGMBC(0x14c) REGMBC(0x14e) REGMBC(0x150)
939 		      REGMBC(0x1a0) REGMBC(0x1d1) REGMBC(0x1ea)
940 		      REGMBC(0x1ec) REGMBC(0x1ece)
941 		      return;
942 	    case 'P': case 0x1e54: case 0x1e56:
943 		      regmbc('P'); REGMBC(0x1e54) REGMBC(0x1e56)
944 		      return;
945 	    case 'R': CASEMBC(0x154) CASEMBC(0x156) CASEMBC(0x158)
946 	    CASEMBC(0x1e58) CASEMBC(0x1e5e)
947 		      regmbc('R'); REGMBC(0x154) REGMBC(0x156) REGMBC(0x158)
948 		      REGMBC(0x1e58) REGMBC(0x1e5e)
949 		      return;
950 	    case 'S': CASEMBC(0x15a) CASEMBC(0x15c) CASEMBC(0x15e)
951 	    CASEMBC(0x160) CASEMBC(0x1e60)
952 		      regmbc('S'); REGMBC(0x15a) REGMBC(0x15c)
953 		      REGMBC(0x15e) REGMBC(0x160) REGMBC(0x1e60)
954 		      return;
955 	    case 'T': CASEMBC(0x162) CASEMBC(0x164) CASEMBC(0x166)
956 	    CASEMBC(0x1e6a) CASEMBC(0x1e6e)
957 		      regmbc('T'); REGMBC(0x162) REGMBC(0x164)
958 		      REGMBC(0x166) REGMBC(0x1e6a) REGMBC(0x1e6e)
959 		      return;
960 	    case 'U': case 0xd9: case 0xda: case 0xdb: case 0xdc:
961 	    CASEMBC(0x168) CASEMBC(0x16a) CASEMBC(0x16c) CASEMBC(0x16e)
962 	    CASEMBC(0x170) CASEMBC(0x172) CASEMBC(0x1af) CASEMBC(0x1d3)
963 	    CASEMBC(0x1ee6)
964 		      regmbc('U'); regmbc(0xd9); regmbc(0xda);
965 		      regmbc(0xdb); regmbc(0xdc);
966 		      REGMBC(0x168) REGMBC(0x16a) REGMBC(0x16c)
967 		      REGMBC(0x16e) REGMBC(0x170) REGMBC(0x172)
968 		      REGMBC(0x1af) REGMBC(0x1d3) REGMBC(0x1ee6)
969 		      return;
970 	    case 'V': CASEMBC(0x1e7c)
971 		      regmbc('V'); REGMBC(0x1e7c)
972 		      return;
973 	    case 'W': CASEMBC(0x174) CASEMBC(0x1e80) CASEMBC(0x1e82)
974 	    CASEMBC(0x1e84) CASEMBC(0x1e86)
975 		      regmbc('W'); REGMBC(0x174) REGMBC(0x1e80)
976 		      REGMBC(0x1e82) REGMBC(0x1e84) REGMBC(0x1e86)
977 		      return;
978 	    case 'X': CASEMBC(0x1e8a) CASEMBC(0x1e8c)
979 		      regmbc('X'); REGMBC(0x1e8a) REGMBC(0x1e8c)
980 		      return;
981 	    case 'Y': case 0xdd:
982 	    CASEMBC(0x176) CASEMBC(0x178) CASEMBC(0x1e8e) CASEMBC(0x1ef2)
983 	    CASEMBC(0x1ef6) CASEMBC(0x1ef8)
984 		      regmbc('Y'); regmbc(0xdd);
985 		      REGMBC(0x176) REGMBC(0x178) REGMBC(0x1e8e)
986 		      REGMBC(0x1ef2) REGMBC(0x1ef6) REGMBC(0x1ef8)
987 		      return;
988 	    case 'Z': CASEMBC(0x179) CASEMBC(0x17b) CASEMBC(0x17d)
989 	    CASEMBC(0x1b5) CASEMBC(0x1e90) CASEMBC(0x1e94)
990 		      regmbc('Z'); REGMBC(0x179) REGMBC(0x17b)
991 		      REGMBC(0x17d) REGMBC(0x1b5) REGMBC(0x1e90)
992 		      REGMBC(0x1e94)
993 		      return;
994 	    case 'a': case 0xe0: case 0xe1: case 0xe2:
995 	    case 0xe3: case 0xe4: case 0xe5:
996 	    CASEMBC(0x101) CASEMBC(0x103) CASEMBC(0x105) CASEMBC(0x1ce)
997 	    CASEMBC(0x1df) CASEMBC(0x1e1) CASEMBC(0x1ea3)
998 		      regmbc('a'); regmbc(0xe0); regmbc(0xe1);
999 		      regmbc(0xe2); regmbc(0xe3); regmbc(0xe4);
1000 		      regmbc(0xe5);
1001 		      REGMBC(0x101) REGMBC(0x103) REGMBC(0x105)
1002 		      REGMBC(0x1ce) REGMBC(0x1df) REGMBC(0x1e1)
1003 		      REGMBC(0x1ea3)
1004 		      return;
1005 	    case 'b': CASEMBC(0x1e03) CASEMBC(0x1e07)
1006 		      regmbc('b'); REGMBC(0x1e03) REGMBC(0x1e07)
1007 		      return;
1008 	    case 'c': case 0xe7:
1009 	    CASEMBC(0x107) CASEMBC(0x109) CASEMBC(0x10b) CASEMBC(0x10d)
1010 		      regmbc('c'); regmbc(0xe7);
1011 		      REGMBC(0x107) REGMBC(0x109) REGMBC(0x10b)
1012 		      REGMBC(0x10d)
1013 		      return;
1014 	    case 'd': CASEMBC(0x10f) CASEMBC(0x111) CASEMBC(0x1e0b)
1015 	    CASEMBC(0x1e0f) CASEMBC(0x1e11)
1016 		      regmbc('d'); REGMBC(0x10f) REGMBC(0x111)
1017 		      REGMBC(0x1e0b) REGMBC(0x1e0f) REGMBC(0x1e11)
1018 		      return;
1019 	    case 'e': case 0xe8: case 0xe9: case 0xea: case 0xeb:
1020 	    CASEMBC(0x113) CASEMBC(0x115) CASEMBC(0x117) CASEMBC(0x119)
1021 	    CASEMBC(0x11b) CASEMBC(0x1ebb) CASEMBC(0x1ebd)
1022 		      regmbc('e'); regmbc(0xe8); regmbc(0xe9);
1023 		      regmbc(0xea); regmbc(0xeb);
1024 		      REGMBC(0x113) REGMBC(0x115) REGMBC(0x117)
1025 		      REGMBC(0x119) REGMBC(0x11b) REGMBC(0x1ebb)
1026 		      REGMBC(0x1ebd)
1027 		      return;
1028 	    case 'f': CASEMBC(0x1e1f)
1029 		      regmbc('f'); REGMBC(0x1e1f)
1030 		      return;
1031 	    case 'g': CASEMBC(0x11d) CASEMBC(0x11f) CASEMBC(0x121)
1032 	    CASEMBC(0x123) CASEMBC(0x1e5) CASEMBC(0x1e7) CASEMBC(0x1f5)
1033 	    CASEMBC(0x1e21)
1034 		      regmbc('g'); REGMBC(0x11d) REGMBC(0x11f)
1035 		      REGMBC(0x121) REGMBC(0x123) REGMBC(0x1e5)
1036 		      REGMBC(0x1e7) REGMBC(0x1f5) REGMBC(0x1e21)
1037 		      return;
1038 	    case 'h': CASEMBC(0x125) CASEMBC(0x127) CASEMBC(0x1e23)
1039 	    CASEMBC(0x1e27) CASEMBC(0x1e29) CASEMBC(0x1e96)
1040 		      regmbc('h'); REGMBC(0x125) REGMBC(0x127)
1041 		      REGMBC(0x1e23) REGMBC(0x1e27) REGMBC(0x1e29)
1042 		      REGMBC(0x1e96)
1043 		      return;
1044 	    case 'i': case 0xec: case 0xed: case 0xee: case 0xef:
1045 	    CASEMBC(0x129) CASEMBC(0x12b) CASEMBC(0x12d) CASEMBC(0x12f)
1046 	    CASEMBC(0x1d0) CASEMBC(0x1ec9)
1047 		      regmbc('i'); regmbc(0xec); regmbc(0xed);
1048 		      regmbc(0xee); regmbc(0xef);
1049 		      REGMBC(0x129) REGMBC(0x12b) REGMBC(0x12d)
1050 		      REGMBC(0x12f) REGMBC(0x1d0) REGMBC(0x1ec9)
1051 		      return;
1052 	    case 'j': CASEMBC(0x135) CASEMBC(0x1f0)
1053 		      regmbc('j'); REGMBC(0x135) REGMBC(0x1f0)
1054 		      return;
1055 	    case 'k': CASEMBC(0x137) CASEMBC(0x1e9) CASEMBC(0x1e31)
1056 	    CASEMBC(0x1e35)
1057 		      regmbc('k'); REGMBC(0x137) REGMBC(0x1e9)
1058 		      REGMBC(0x1e31) REGMBC(0x1e35)
1059 		      return;
1060 	    case 'l': CASEMBC(0x13a) CASEMBC(0x13c) CASEMBC(0x13e)
1061 	    CASEMBC(0x140) CASEMBC(0x142) CASEMBC(0x1e3b)
1062 		      regmbc('l'); REGMBC(0x13a) REGMBC(0x13c)
1063 		      REGMBC(0x13e) REGMBC(0x140) REGMBC(0x142)
1064 		      REGMBC(0x1e3b)
1065 		      return;
1066 	    case 'm': CASEMBC(0x1e3f) CASEMBC(0x1e41)
1067 		      regmbc('m'); REGMBC(0x1e3f) REGMBC(0x1e41)
1068 		      return;
1069 	    case 'n': case 0xf1:
1070 	    CASEMBC(0x144) CASEMBC(0x146) CASEMBC(0x148) CASEMBC(0x149)
1071 	    CASEMBC(0x1e45) CASEMBC(0x1e49)
1072 		      regmbc('n'); regmbc(0xf1);
1073 		      REGMBC(0x144) REGMBC(0x146) REGMBC(0x148)
1074 		      REGMBC(0x149) REGMBC(0x1e45) REGMBC(0x1e49)
1075 		      return;
1076 	    case 'o': case 0xf2: case 0xf3: case 0xf4: case 0xf5:
1077 	    case 0xf6: case 0xf8:
1078 	    CASEMBC(0x14d) CASEMBC(0x14f) CASEMBC(0x151) CASEMBC(0x1a1)
1079 	    CASEMBC(0x1d2) CASEMBC(0x1eb) CASEMBC(0x1ed) CASEMBC(0x1ecf)
1080 		      regmbc('o'); regmbc(0xf2); regmbc(0xf3);
1081 		      regmbc(0xf4); regmbc(0xf5); regmbc(0xf6);
1082 		      regmbc(0xf8);
1083 		      REGMBC(0x14d) REGMBC(0x14f) REGMBC(0x151)
1084 		      REGMBC(0x1a1) REGMBC(0x1d2) REGMBC(0x1eb)
1085 		      REGMBC(0x1ed) REGMBC(0x1ecf)
1086 		      return;
1087 	    case 'p': CASEMBC(0x1e55) CASEMBC(0x1e57)
1088 		      regmbc('p'); REGMBC(0x1e55) REGMBC(0x1e57)
1089 		      return;
1090 	    case 'r': CASEMBC(0x155) CASEMBC(0x157) CASEMBC(0x159)
1091 	    CASEMBC(0x1e59) CASEMBC(0x1e5f)
1092 		      regmbc('r'); REGMBC(0x155) REGMBC(0x157) REGMBC(0x159)
1093 		      REGMBC(0x1e59) REGMBC(0x1e5f)
1094 		      return;
1095 	    case 's': CASEMBC(0x15b) CASEMBC(0x15d) CASEMBC(0x15f)
1096 	    CASEMBC(0x161) CASEMBC(0x1e61)
1097 		      regmbc('s'); REGMBC(0x15b) REGMBC(0x15d)
1098 		      REGMBC(0x15f) REGMBC(0x161) REGMBC(0x1e61)
1099 		      return;
1100 	    case 't': CASEMBC(0x163) CASEMBC(0x165) CASEMBC(0x167)
1101 	    CASEMBC(0x1e6b) CASEMBC(0x1e6f) CASEMBC(0x1e97)
1102 		      regmbc('t'); REGMBC(0x163) REGMBC(0x165) REGMBC(0x167)
1103 		      REGMBC(0x1e6b) REGMBC(0x1e6f) REGMBC(0x1e97)
1104 		      return;
1105 	    case 'u': case 0xf9: case 0xfa: case 0xfb: case 0xfc:
1106 	    CASEMBC(0x169) CASEMBC(0x16b) CASEMBC(0x16d) CASEMBC(0x16f)
1107 	    CASEMBC(0x171) CASEMBC(0x173) CASEMBC(0x1b0) CASEMBC(0x1d4)
1108 	    CASEMBC(0x1ee7)
1109 		      regmbc('u'); regmbc(0xf9); regmbc(0xfa);
1110 		      regmbc(0xfb); regmbc(0xfc);
1111 		      REGMBC(0x169) REGMBC(0x16b) REGMBC(0x16d)
1112 		      REGMBC(0x16f) REGMBC(0x171) REGMBC(0x173)
1113 		      REGMBC(0x1b0) REGMBC(0x1d4) REGMBC(0x1ee7)
1114 		      return;
1115 	    case 'v': CASEMBC(0x1e7d)
1116 		      regmbc('v'); REGMBC(0x1e7d)
1117 		      return;
1118 	    case 'w': CASEMBC(0x175) CASEMBC(0x1e81) CASEMBC(0x1e83)
1119 	    CASEMBC(0x1e85) CASEMBC(0x1e87) CASEMBC(0x1e98)
1120 		      regmbc('w'); REGMBC(0x175) REGMBC(0x1e81)
1121 		      REGMBC(0x1e83) REGMBC(0x1e85) REGMBC(0x1e87)
1122 		      REGMBC(0x1e98)
1123 		      return;
1124 	    case 'x': CASEMBC(0x1e8b) CASEMBC(0x1e8d)
1125 		      regmbc('x'); REGMBC(0x1e8b) REGMBC(0x1e8d)
1126 		      return;
1127 	    case 'y': case 0xfd: case 0xff:
1128 	    CASEMBC(0x177) CASEMBC(0x1e8f) CASEMBC(0x1e99)
1129 	    CASEMBC(0x1ef3) CASEMBC(0x1ef7) CASEMBC(0x1ef9)
1130 		      regmbc('y'); regmbc(0xfd); regmbc(0xff);
1131 		      REGMBC(0x177) REGMBC(0x1e8f) REGMBC(0x1e99)
1132 		      REGMBC(0x1ef3) REGMBC(0x1ef7) REGMBC(0x1ef9)
1133 		      return;
1134 	    case 'z': CASEMBC(0x17a) CASEMBC(0x17c) CASEMBC(0x17e)
1135 	    CASEMBC(0x1b6) CASEMBC(0x1e91) CASEMBC(0x1e95)
1136 		      regmbc('z'); REGMBC(0x17a) REGMBC(0x17c)
1137 		      REGMBC(0x17e) REGMBC(0x1b6) REGMBC(0x1e91)
1138 		      REGMBC(0x1e95)
1139 		      return;
1140 	}
1141 #endif
1142     }
1143     regmbc(c);
1144 }
1145 
1146 /*
1147  * Check for a collating element "[.a.]".  "pp" points to the '['.
1148  * Returns a character. Zero means that no item was recognized.  Otherwise
1149  * "pp" is advanced to after the item.
1150  * Currently only single characters are recognized!
1151  */
1152     static int
1153 get_coll_element(pp)
1154     char_u	**pp;
1155 {
1156     int		c;
1157     int		l = 1;
1158     char_u	*p = *pp;
1159 
1160     if (p[0] != NUL && p[1] == '.')
1161     {
1162 #ifdef FEAT_MBYTE
1163 	if (has_mbyte)
1164 	    l = (*mb_ptr2len)(p + 2);
1165 #endif
1166 	if (p[l + 2] == '.' && p[l + 3] == ']')
1167 	{
1168 #ifdef FEAT_MBYTE
1169 	    if (has_mbyte)
1170 		c = mb_ptr2char(p + 2);
1171 	    else
1172 #endif
1173 		c = p[2];
1174 	    *pp += l + 4;
1175 	    return c;
1176 	}
1177     }
1178     return 0;
1179 }
1180 
1181 static void get_cpo_flags __ARGS((void));
1182 static int reg_cpo_lit; /* 'cpoptions' contains 'l' flag */
1183 static int reg_cpo_bsl; /* 'cpoptions' contains '\' flag */
1184 
1185     static void
1186 get_cpo_flags()
1187 {
1188     reg_cpo_lit = vim_strchr(p_cpo, CPO_LITERAL) != NULL;
1189     reg_cpo_bsl = vim_strchr(p_cpo, CPO_BACKSL) != NULL;
1190 }
1191 
1192 /*
1193  * Skip over a "[]" range.
1194  * "p" must point to the character after the '['.
1195  * The returned pointer is on the matching ']', or the terminating NUL.
1196  */
1197     static char_u *
1198 skip_anyof(p)
1199     char_u	*p;
1200 {
1201 #ifdef FEAT_MBYTE
1202     int		l;
1203 #endif
1204 
1205     if (*p == '^')	/* Complement of range. */
1206 	++p;
1207     if (*p == ']' || *p == '-')
1208 	++p;
1209     while (*p != NUL && *p != ']')
1210     {
1211 #ifdef FEAT_MBYTE
1212 	if (has_mbyte && (l = (*mb_ptr2len)(p)) > 1)
1213 	    p += l;
1214 	else
1215 #endif
1216 	    if (*p == '-')
1217 	    {
1218 		++p;
1219 		if (*p != ']' && *p != NUL)
1220 		    mb_ptr_adv(p);
1221 	    }
1222 	else if (*p == '\\'
1223 		&& !reg_cpo_bsl
1224 		&& (vim_strchr(REGEXP_INRANGE, p[1]) != NULL
1225 		    || (!reg_cpo_lit && vim_strchr(REGEXP_ABBR, p[1]) != NULL)))
1226 	    p += 2;
1227 	else if (*p == '[')
1228 	{
1229 	    if (get_char_class(&p) == CLASS_NONE
1230 		    && get_equi_class(&p) == 0
1231 		    && get_coll_element(&p) == 0
1232 		    && *p != NUL)
1233 		++p; /* it is not a class name and not NUL */
1234 	}
1235 	else
1236 	    ++p;
1237     }
1238 
1239     return p;
1240 }
1241 
1242 /*
1243  * Skip past regular expression.
1244  * Stop at end of "startp" or where "dirc" is found ('/', '?', etc).
1245  * Take care of characters with a backslash in front of it.
1246  * Skip strings inside [ and ].
1247  * When "newp" is not NULL and "dirc" is '?', make an allocated copy of the
1248  * expression and change "\?" to "?".  If "*newp" is not NULL the expression
1249  * is changed in-place.
1250  */
1251     char_u *
1252 skip_regexp(startp, dirc, magic, newp)
1253     char_u	*startp;
1254     int		dirc;
1255     int		magic;
1256     char_u	**newp;
1257 {
1258     int		mymagic;
1259     char_u	*p = startp;
1260 
1261     if (magic)
1262 	mymagic = MAGIC_ON;
1263     else
1264 	mymagic = MAGIC_OFF;
1265     get_cpo_flags();
1266 
1267     for (; p[0] != NUL; mb_ptr_adv(p))
1268     {
1269 	if (p[0] == dirc)	/* found end of regexp */
1270 	    break;
1271 	if ((p[0] == '[' && mymagic >= MAGIC_ON)
1272 		|| (p[0] == '\\' && p[1] == '[' && mymagic <= MAGIC_OFF))
1273 	{
1274 	    p = skip_anyof(p + 1);
1275 	    if (p[0] == NUL)
1276 		break;
1277 	}
1278 	else if (p[0] == '\\' && p[1] != NUL)
1279 	{
1280 	    if (dirc == '?' && newp != NULL && p[1] == '?')
1281 	    {
1282 		/* change "\?" to "?", make a copy first. */
1283 		if (*newp == NULL)
1284 		{
1285 		    *newp = vim_strsave(startp);
1286 		    if (*newp != NULL)
1287 			p = *newp + (p - startp);
1288 		}
1289 		if (*newp != NULL)
1290 		    STRMOVE(p, p + 1);
1291 		else
1292 		    ++p;
1293 	    }
1294 	    else
1295 		++p;    /* skip next character */
1296 	    if (*p == 'v')
1297 		mymagic = MAGIC_ALL;
1298 	    else if (*p == 'V')
1299 		mymagic = MAGIC_NONE;
1300 	}
1301     }
1302     return p;
1303 }
1304 
1305 static regprog_T  *bt_regcomp __ARGS((char_u *expr, int re_flags));
1306 static void bt_regfree __ARGS((regprog_T *prog));
1307 
1308 /*
1309  * bt_regcomp() - compile a regular expression into internal code for the
1310  * traditional back track matcher.
1311  * Returns the program in allocated space.  Returns NULL for an error.
1312  *
1313  * We can't allocate space until we know how big the compiled form will be,
1314  * but we can't compile it (and thus know how big it is) until we've got a
1315  * place to put the code.  So we cheat:  we compile it twice, once with code
1316  * generation turned off and size counting turned on, and once "for real".
1317  * This also means that we don't allocate space until we are sure that the
1318  * thing really will compile successfully, and we never have to move the
1319  * code and thus invalidate pointers into it.  (Note that it has to be in
1320  * one piece because vim_free() must be able to free it all.)
1321  *
1322  * Whether upper/lower case is to be ignored is decided when executing the
1323  * program, it does not matter here.
1324  *
1325  * Beware that the optimization-preparation code in here knows about some
1326  * of the structure of the compiled regexp.
1327  * "re_flags": RE_MAGIC and/or RE_STRING.
1328  */
1329     static regprog_T *
1330 bt_regcomp(expr, re_flags)
1331     char_u	*expr;
1332     int		re_flags;
1333 {
1334     bt_regprog_T    *r;
1335     char_u	*scan;
1336     char_u	*longest;
1337     int		len;
1338     int		flags;
1339 
1340     if (expr == NULL)
1341 	EMSG_RET_NULL(_(e_null));
1342 
1343     init_class_tab();
1344 
1345     /*
1346      * First pass: determine size, legality.
1347      */
1348     regcomp_start(expr, re_flags);
1349     regcode = JUST_CALC_SIZE;
1350     regc(REGMAGIC);
1351     if (reg(REG_NOPAREN, &flags) == NULL)
1352 	return NULL;
1353 
1354     /* Small enough for pointer-storage convention? */
1355 #ifdef SMALL_MALLOC		/* 16 bit storage allocation */
1356     if (regsize >= 65536L - 256L)
1357 	EMSG_RET_NULL(_("E339: Pattern too long"));
1358 #endif
1359 
1360     /* Allocate space. */
1361     r = (bt_regprog_T *)lalloc(sizeof(bt_regprog_T) + regsize, TRUE);
1362     if (r == NULL)
1363 	return NULL;
1364 
1365     /*
1366      * Second pass: emit code.
1367      */
1368     regcomp_start(expr, re_flags);
1369     regcode = r->program;
1370     regc(REGMAGIC);
1371     if (reg(REG_NOPAREN, &flags) == NULL || reg_toolong)
1372     {
1373 	vim_free(r);
1374 	if (reg_toolong)
1375 	    EMSG_RET_NULL(_("E339: Pattern too long"));
1376 	return NULL;
1377     }
1378 
1379     /* Dig out information for optimizations. */
1380     r->regstart = NUL;		/* Worst-case defaults. */
1381     r->reganch = 0;
1382     r->regmust = NULL;
1383     r->regmlen = 0;
1384     r->regflags = regflags;
1385     if (flags & HASNL)
1386 	r->regflags |= RF_HASNL;
1387     if (flags & HASLOOKBH)
1388 	r->regflags |= RF_LOOKBH;
1389 #ifdef FEAT_SYN_HL
1390     /* Remember whether this pattern has any \z specials in it. */
1391     r->reghasz = re_has_z;
1392 #endif
1393     scan = r->program + 1;	/* First BRANCH. */
1394     if (OP(regnext(scan)) == END)   /* Only one top-level choice. */
1395     {
1396 	scan = OPERAND(scan);
1397 
1398 	/* Starting-point info. */
1399 	if (OP(scan) == BOL || OP(scan) == RE_BOF)
1400 	{
1401 	    r->reganch++;
1402 	    scan = regnext(scan);
1403 	}
1404 
1405 	if (OP(scan) == EXACTLY)
1406 	{
1407 #ifdef FEAT_MBYTE
1408 	    if (has_mbyte)
1409 		r->regstart = (*mb_ptr2char)(OPERAND(scan));
1410 	    else
1411 #endif
1412 		r->regstart = *OPERAND(scan);
1413 	}
1414 	else if ((OP(scan) == BOW
1415 		    || OP(scan) == EOW
1416 		    || OP(scan) == NOTHING
1417 		    || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN
1418 		    || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE)
1419 		 && OP(regnext(scan)) == EXACTLY)
1420 	{
1421 #ifdef FEAT_MBYTE
1422 	    if (has_mbyte)
1423 		r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
1424 	    else
1425 #endif
1426 		r->regstart = *OPERAND(regnext(scan));
1427 	}
1428 
1429 	/*
1430 	 * If there's something expensive in the r.e., find the longest
1431 	 * literal string that must appear and make it the regmust.  Resolve
1432 	 * ties in favor of later strings, since the regstart check works
1433 	 * with the beginning of the r.e. and avoiding duplication
1434 	 * strengthens checking.  Not a strong reason, but sufficient in the
1435 	 * absence of others.
1436 	 */
1437 	/*
1438 	 * When the r.e. starts with BOW, it is faster to look for a regmust
1439 	 * first. Used a lot for "#" and "*" commands. (Added by mool).
1440 	 */
1441 	if ((flags & SPSTART || OP(scan) == BOW || OP(scan) == EOW)
1442 							  && !(flags & HASNL))
1443 	{
1444 	    longest = NULL;
1445 	    len = 0;
1446 	    for (; scan != NULL; scan = regnext(scan))
1447 		if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
1448 		{
1449 		    longest = OPERAND(scan);
1450 		    len = (int)STRLEN(OPERAND(scan));
1451 		}
1452 	    r->regmust = longest;
1453 	    r->regmlen = len;
1454 	}
1455     }
1456 #ifdef BT_REGEXP_DUMP
1457     regdump(expr, r);
1458 #endif
1459     r->engine = &bt_regengine;
1460     return (regprog_T *)r;
1461 }
1462 
1463 /*
1464  * Free a compiled regexp program, returned by bt_regcomp().
1465  */
1466     static void
1467 bt_regfree(prog)
1468     regprog_T   *prog;
1469 {
1470     vim_free(prog);
1471 }
1472 
1473 /*
1474  * Setup to parse the regexp.  Used once to get the length and once to do it.
1475  */
1476     static void
1477 regcomp_start(expr, re_flags)
1478     char_u	*expr;
1479     int		re_flags;	    /* see vim_regcomp() */
1480 {
1481     initchr(expr);
1482     if (re_flags & RE_MAGIC)
1483 	reg_magic = MAGIC_ON;
1484     else
1485 	reg_magic = MAGIC_OFF;
1486     reg_string = (re_flags & RE_STRING);
1487     reg_strict = (re_flags & RE_STRICT);
1488     get_cpo_flags();
1489 
1490     num_complex_braces = 0;
1491     regnpar = 1;
1492     vim_memset(had_endbrace, 0, sizeof(had_endbrace));
1493 #ifdef FEAT_SYN_HL
1494     regnzpar = 1;
1495     re_has_z = 0;
1496 #endif
1497     regsize = 0L;
1498     reg_toolong = FALSE;
1499     regflags = 0;
1500 #if defined(FEAT_SYN_HL) || defined(PROTO)
1501     had_eol = FALSE;
1502 #endif
1503 }
1504 
1505 #if defined(FEAT_SYN_HL) || defined(PROTO)
1506 /*
1507  * Check if during the previous call to vim_regcomp the EOL item "$" has been
1508  * found.  This is messy, but it works fine.
1509  */
1510     int
1511 vim_regcomp_had_eol()
1512 {
1513     return had_eol;
1514 }
1515 #endif
1516 
1517 /*
1518  * Parse regular expression, i.e. main body or parenthesized thing.
1519  *
1520  * Caller must absorb opening parenthesis.
1521  *
1522  * Combining parenthesis handling with the base level of regular expression
1523  * is a trifle forced, but the need to tie the tails of the branches to what
1524  * follows makes it hard to avoid.
1525  */
1526     static char_u *
1527 reg(paren, flagp)
1528     int		paren;	/* REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN */
1529     int		*flagp;
1530 {
1531     char_u	*ret;
1532     char_u	*br;
1533     char_u	*ender;
1534     int		parno = 0;
1535     int		flags;
1536 
1537     *flagp = HASWIDTH;		/* Tentatively. */
1538 
1539 #ifdef FEAT_SYN_HL
1540     if (paren == REG_ZPAREN)
1541     {
1542 	/* Make a ZOPEN node. */
1543 	if (regnzpar >= NSUBEXP)
1544 	    EMSG_RET_NULL(_("E50: Too many \\z("));
1545 	parno = regnzpar;
1546 	regnzpar++;
1547 	ret = regnode(ZOPEN + parno);
1548     }
1549     else
1550 #endif
1551 	if (paren == REG_PAREN)
1552     {
1553 	/* Make a MOPEN node. */
1554 	if (regnpar >= NSUBEXP)
1555 	    EMSG2_RET_NULL(_("E51: Too many %s("), reg_magic == MAGIC_ALL);
1556 	parno = regnpar;
1557 	++regnpar;
1558 	ret = regnode(MOPEN + parno);
1559     }
1560     else if (paren == REG_NPAREN)
1561     {
1562 	/* Make a NOPEN node. */
1563 	ret = regnode(NOPEN);
1564     }
1565     else
1566 	ret = NULL;
1567 
1568     /* Pick up the branches, linking them together. */
1569     br = regbranch(&flags);
1570     if (br == NULL)
1571 	return NULL;
1572     if (ret != NULL)
1573 	regtail(ret, br);	/* [MZ]OPEN -> first. */
1574     else
1575 	ret = br;
1576     /* If one of the branches can be zero-width, the whole thing can.
1577      * If one of the branches has * at start or matches a line-break, the
1578      * whole thing can. */
1579     if (!(flags & HASWIDTH))
1580 	*flagp &= ~HASWIDTH;
1581     *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1582     while (peekchr() == Magic('|'))
1583     {
1584 	skipchr();
1585 	br = regbranch(&flags);
1586 	if (br == NULL || reg_toolong)
1587 	    return NULL;
1588 	regtail(ret, br);	/* BRANCH -> BRANCH. */
1589 	if (!(flags & HASWIDTH))
1590 	    *flagp &= ~HASWIDTH;
1591 	*flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
1592     }
1593 
1594     /* Make a closing node, and hook it on the end. */
1595     ender = regnode(
1596 #ifdef FEAT_SYN_HL
1597 	    paren == REG_ZPAREN ? ZCLOSE + parno :
1598 #endif
1599 	    paren == REG_PAREN ? MCLOSE + parno :
1600 	    paren == REG_NPAREN ? NCLOSE : END);
1601     regtail(ret, ender);
1602 
1603     /* Hook the tails of the branches to the closing node. */
1604     for (br = ret; br != NULL; br = regnext(br))
1605 	regoptail(br, ender);
1606 
1607     /* Check for proper termination. */
1608     if (paren != REG_NOPAREN && getchr() != Magic(')'))
1609     {
1610 #ifdef FEAT_SYN_HL
1611 	if (paren == REG_ZPAREN)
1612 	    EMSG_RET_NULL(_("E52: Unmatched \\z("));
1613 	else
1614 #endif
1615 	    if (paren == REG_NPAREN)
1616 	    EMSG2_RET_NULL(_(e_unmatchedpp), reg_magic == MAGIC_ALL);
1617 	else
1618 	    EMSG2_RET_NULL(_(e_unmatchedp), reg_magic == MAGIC_ALL);
1619     }
1620     else if (paren == REG_NOPAREN && peekchr() != NUL)
1621     {
1622 	if (curchr == Magic(')'))
1623 	    EMSG2_RET_NULL(_(e_unmatchedpar), reg_magic == MAGIC_ALL);
1624 	else
1625 	    EMSG_RET_NULL(_(e_trailing));	/* "Can't happen". */
1626 	/* NOTREACHED */
1627     }
1628     /*
1629      * Here we set the flag allowing back references to this set of
1630      * parentheses.
1631      */
1632     if (paren == REG_PAREN)
1633 	had_endbrace[parno] = TRUE;	/* have seen the close paren */
1634     return ret;
1635 }
1636 
1637 /*
1638  * Parse one alternative of an | operator.
1639  * Implements the & operator.
1640  */
1641     static char_u *
1642 regbranch(flagp)
1643     int		*flagp;
1644 {
1645     char_u	*ret;
1646     char_u	*chain = NULL;
1647     char_u	*latest;
1648     int		flags;
1649 
1650     *flagp = WORST | HASNL;		/* Tentatively. */
1651 
1652     ret = regnode(BRANCH);
1653     for (;;)
1654     {
1655 	latest = regconcat(&flags);
1656 	if (latest == NULL)
1657 	    return NULL;
1658 	/* If one of the branches has width, the whole thing has.  If one of
1659 	 * the branches anchors at start-of-line, the whole thing does.
1660 	 * If one of the branches uses look-behind, the whole thing does. */
1661 	*flagp |= flags & (HASWIDTH | SPSTART | HASLOOKBH);
1662 	/* If one of the branches doesn't match a line-break, the whole thing
1663 	 * doesn't. */
1664 	*flagp &= ~HASNL | (flags & HASNL);
1665 	if (chain != NULL)
1666 	    regtail(chain, latest);
1667 	if (peekchr() != Magic('&'))
1668 	    break;
1669 	skipchr();
1670 	regtail(latest, regnode(END)); /* operand ends */
1671 	if (reg_toolong)
1672 	    break;
1673 	reginsert(MATCH, latest);
1674 	chain = latest;
1675     }
1676 
1677     return ret;
1678 }
1679 
1680 /*
1681  * Parse one alternative of an | or & operator.
1682  * Implements the concatenation operator.
1683  */
1684     static char_u *
1685 regconcat(flagp)
1686     int		*flagp;
1687 {
1688     char_u	*first = NULL;
1689     char_u	*chain = NULL;
1690     char_u	*latest;
1691     int		flags;
1692     int		cont = TRUE;
1693 
1694     *flagp = WORST;		/* Tentatively. */
1695 
1696     while (cont)
1697     {
1698 	switch (peekchr())
1699 	{
1700 	    case NUL:
1701 	    case Magic('|'):
1702 	    case Magic('&'):
1703 	    case Magic(')'):
1704 			    cont = FALSE;
1705 			    break;
1706 	    case Magic('Z'):
1707 #ifdef FEAT_MBYTE
1708 			    regflags |= RF_ICOMBINE;
1709 #endif
1710 			    skipchr_keepstart();
1711 			    break;
1712 	    case Magic('c'):
1713 			    regflags |= RF_ICASE;
1714 			    skipchr_keepstart();
1715 			    break;
1716 	    case Magic('C'):
1717 			    regflags |= RF_NOICASE;
1718 			    skipchr_keepstart();
1719 			    break;
1720 	    case Magic('v'):
1721 			    reg_magic = MAGIC_ALL;
1722 			    skipchr_keepstart();
1723 			    curchr = -1;
1724 			    break;
1725 	    case Magic('m'):
1726 			    reg_magic = MAGIC_ON;
1727 			    skipchr_keepstart();
1728 			    curchr = -1;
1729 			    break;
1730 	    case Magic('M'):
1731 			    reg_magic = MAGIC_OFF;
1732 			    skipchr_keepstart();
1733 			    curchr = -1;
1734 			    break;
1735 	    case Magic('V'):
1736 			    reg_magic = MAGIC_NONE;
1737 			    skipchr_keepstart();
1738 			    curchr = -1;
1739 			    break;
1740 	    default:
1741 			    latest = regpiece(&flags);
1742 			    if (latest == NULL || reg_toolong)
1743 				return NULL;
1744 			    *flagp |= flags & (HASWIDTH | HASNL | HASLOOKBH);
1745 			    if (chain == NULL)	/* First piece. */
1746 				*flagp |= flags & SPSTART;
1747 			    else
1748 				regtail(chain, latest);
1749 			    chain = latest;
1750 			    if (first == NULL)
1751 				first = latest;
1752 			    break;
1753 	}
1754     }
1755     if (first == NULL)		/* Loop ran zero times. */
1756 	first = regnode(NOTHING);
1757     return first;
1758 }
1759 
1760 /*
1761  * Parse something followed by possible [*+=].
1762  *
1763  * Note that the branching code sequences used for = and the general cases
1764  * of * and + are somewhat optimized:  they use the same NOTHING node as
1765  * both the endmarker for their branch list and the body of the last branch.
1766  * It might seem that this node could be dispensed with entirely, but the
1767  * endmarker role is not redundant.
1768  */
1769     static char_u *
1770 regpiece(flagp)
1771     int		    *flagp;
1772 {
1773     char_u	    *ret;
1774     int		    op;
1775     char_u	    *next;
1776     int		    flags;
1777     long	    minval;
1778     long	    maxval;
1779 
1780     ret = regatom(&flags);
1781     if (ret == NULL)
1782 	return NULL;
1783 
1784     op = peekchr();
1785     if (re_multi_type(op) == NOT_MULTI)
1786     {
1787 	*flagp = flags;
1788 	return ret;
1789     }
1790     /* default flags */
1791     *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH)));
1792 
1793     skipchr();
1794     switch (op)
1795     {
1796 	case Magic('*'):
1797 	    if (flags & SIMPLE)
1798 		reginsert(STAR, ret);
1799 	    else
1800 	    {
1801 		/* Emit x* as (x&|), where & means "self". */
1802 		reginsert(BRANCH, ret); /* Either x */
1803 		regoptail(ret, regnode(BACK));	/* and loop */
1804 		regoptail(ret, ret);	/* back */
1805 		regtail(ret, regnode(BRANCH));	/* or */
1806 		regtail(ret, regnode(NOTHING)); /* null. */
1807 	    }
1808 	    break;
1809 
1810 	case Magic('+'):
1811 	    if (flags & SIMPLE)
1812 		reginsert(PLUS, ret);
1813 	    else
1814 	    {
1815 		/* Emit x+ as x(&|), where & means "self". */
1816 		next = regnode(BRANCH); /* Either */
1817 		regtail(ret, next);
1818 		regtail(regnode(BACK), ret);	/* loop back */
1819 		regtail(next, regnode(BRANCH)); /* or */
1820 		regtail(ret, regnode(NOTHING)); /* null. */
1821 	    }
1822 	    *flagp = (WORST | HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1823 	    break;
1824 
1825 	case Magic('@'):
1826 	    {
1827 		int	lop = END;
1828 		int	nr;
1829 
1830 		nr = getdecchrs();
1831 		switch (no_Magic(getchr()))
1832 		{
1833 		    case '=': lop = MATCH; break;		  /* \@= */
1834 		    case '!': lop = NOMATCH; break;		  /* \@! */
1835 		    case '>': lop = SUBPAT; break;		  /* \@> */
1836 		    case '<': switch (no_Magic(getchr()))
1837 			      {
1838 				  case '=': lop = BEHIND; break;   /* \@<= */
1839 				  case '!': lop = NOBEHIND; break; /* \@<! */
1840 			      }
1841 		}
1842 		if (lop == END)
1843 		    EMSG2_RET_NULL(_("E59: invalid character after %s@"),
1844 						      reg_magic == MAGIC_ALL);
1845 		/* Look behind must match with behind_pos. */
1846 		if (lop == BEHIND || lop == NOBEHIND)
1847 		{
1848 		    regtail(ret, regnode(BHPOS));
1849 		    *flagp |= HASLOOKBH;
1850 		}
1851 		regtail(ret, regnode(END)); /* operand ends */
1852 		if (lop == BEHIND || lop == NOBEHIND)
1853 		{
1854 		    if (nr < 0)
1855 			nr = 0; /* no limit is same as zero limit */
1856 		    reginsert_nr(lop, nr, ret);
1857 		}
1858 		else
1859 		    reginsert(lop, ret);
1860 		break;
1861 	    }
1862 
1863 	case Magic('?'):
1864 	case Magic('='):
1865 	    /* Emit x= as (x|) */
1866 	    reginsert(BRANCH, ret);		/* Either x */
1867 	    regtail(ret, regnode(BRANCH));	/* or */
1868 	    next = regnode(NOTHING);		/* null. */
1869 	    regtail(ret, next);
1870 	    regoptail(ret, next);
1871 	    break;
1872 
1873 	case Magic('{'):
1874 	    if (!read_limits(&minval, &maxval))
1875 		return NULL;
1876 	    if (flags & SIMPLE)
1877 	    {
1878 		reginsert(BRACE_SIMPLE, ret);
1879 		reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1880 	    }
1881 	    else
1882 	    {
1883 		if (num_complex_braces >= 10)
1884 		    EMSG2_RET_NULL(_("E60: Too many complex %s{...}s"),
1885 						      reg_magic == MAGIC_ALL);
1886 		reginsert(BRACE_COMPLEX + num_complex_braces, ret);
1887 		regoptail(ret, regnode(BACK));
1888 		regoptail(ret, ret);
1889 		reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
1890 		++num_complex_braces;
1891 	    }
1892 	    if (minval > 0 && maxval > 0)
1893 		*flagp = (HASWIDTH | (flags & (HASNL | HASLOOKBH)));
1894 	    break;
1895     }
1896     if (re_multi_type(peekchr()) != NOT_MULTI)
1897     {
1898 	/* Can't have a multi follow a multi. */
1899 	if (peekchr() == Magic('*'))
1900 	    sprintf((char *)IObuff, _("E61: Nested %s*"),
1901 					    reg_magic >= MAGIC_ON ? "" : "\\");
1902 	else
1903 	    sprintf((char *)IObuff, _("E62: Nested %s%c"),
1904 		reg_magic == MAGIC_ALL ? "" : "\\", no_Magic(peekchr()));
1905 	EMSG_RET_NULL(IObuff);
1906     }
1907 
1908     return ret;
1909 }
1910 
1911 /* When making changes to classchars also change nfa_classcodes. */
1912 static char_u	*classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
1913 static int	classcodes[] = {
1914     ANY, IDENT, SIDENT, KWORD, SKWORD,
1915     FNAME, SFNAME, PRINT, SPRINT,
1916     WHITE, NWHITE, DIGIT, NDIGIT,
1917     HEX, NHEX, OCTAL, NOCTAL,
1918     WORD, NWORD, HEAD, NHEAD,
1919     ALPHA, NALPHA, LOWER, NLOWER,
1920     UPPER, NUPPER
1921 };
1922 
1923 /*
1924  * Parse the lowest level.
1925  *
1926  * Optimization:  gobbles an entire sequence of ordinary characters so that
1927  * it can turn them into a single node, which is smaller to store and
1928  * faster to run.  Don't do this when one_exactly is set.
1929  */
1930     static char_u *
1931 regatom(flagp)
1932     int		   *flagp;
1933 {
1934     char_u	    *ret;
1935     int		    flags;
1936     int		    c;
1937     char_u	    *p;
1938     int		    extra = 0;
1939 
1940     *flagp = WORST;		/* Tentatively. */
1941 
1942     c = getchr();
1943     switch (c)
1944     {
1945       case Magic('^'):
1946 	ret = regnode(BOL);
1947 	break;
1948 
1949       case Magic('$'):
1950 	ret = regnode(EOL);
1951 #if defined(FEAT_SYN_HL) || defined(PROTO)
1952 	had_eol = TRUE;
1953 #endif
1954 	break;
1955 
1956       case Magic('<'):
1957 	ret = regnode(BOW);
1958 	break;
1959 
1960       case Magic('>'):
1961 	ret = regnode(EOW);
1962 	break;
1963 
1964       case Magic('_'):
1965 	c = no_Magic(getchr());
1966 	if (c == '^')		/* "\_^" is start-of-line */
1967 	{
1968 	    ret = regnode(BOL);
1969 	    break;
1970 	}
1971 	if (c == '$')		/* "\_$" is end-of-line */
1972 	{
1973 	    ret = regnode(EOL);
1974 #if defined(FEAT_SYN_HL) || defined(PROTO)
1975 	    had_eol = TRUE;
1976 #endif
1977 	    break;
1978 	}
1979 
1980 	extra = ADD_NL;
1981 	*flagp |= HASNL;
1982 
1983 	/* "\_[" is character range plus newline */
1984 	if (c == '[')
1985 	    goto collection;
1986 
1987 	/* "\_x" is character class plus newline */
1988 	/*FALLTHROUGH*/
1989 
1990 	/*
1991 	 * Character classes.
1992 	 */
1993       case Magic('.'):
1994       case Magic('i'):
1995       case Magic('I'):
1996       case Magic('k'):
1997       case Magic('K'):
1998       case Magic('f'):
1999       case Magic('F'):
2000       case Magic('p'):
2001       case Magic('P'):
2002       case Magic('s'):
2003       case Magic('S'):
2004       case Magic('d'):
2005       case Magic('D'):
2006       case Magic('x'):
2007       case Magic('X'):
2008       case Magic('o'):
2009       case Magic('O'):
2010       case Magic('w'):
2011       case Magic('W'):
2012       case Magic('h'):
2013       case Magic('H'):
2014       case Magic('a'):
2015       case Magic('A'):
2016       case Magic('l'):
2017       case Magic('L'):
2018       case Magic('u'):
2019       case Magic('U'):
2020 	p = vim_strchr(classchars, no_Magic(c));
2021 	if (p == NULL)
2022 	    EMSG_RET_NULL(_("E63: invalid use of \\_"));
2023 #ifdef FEAT_MBYTE
2024 	/* When '.' is followed by a composing char ignore the dot, so that
2025 	 * the composing char is matched here. */
2026 	if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr()))
2027 	{
2028 	    c = getchr();
2029 	    goto do_multibyte;
2030 	}
2031 #endif
2032 	ret = regnode(classcodes[p - classchars] + extra);
2033 	*flagp |= HASWIDTH | SIMPLE;
2034 	break;
2035 
2036       case Magic('n'):
2037 	if (reg_string)
2038 	{
2039 	    /* In a string "\n" matches a newline character. */
2040 	    ret = regnode(EXACTLY);
2041 	    regc(NL);
2042 	    regc(NUL);
2043 	    *flagp |= HASWIDTH | SIMPLE;
2044 	}
2045 	else
2046 	{
2047 	    /* In buffer text "\n" matches the end of a line. */
2048 	    ret = regnode(NEWL);
2049 	    *flagp |= HASWIDTH | HASNL;
2050 	}
2051 	break;
2052 
2053       case Magic('('):
2054 	if (one_exactly)
2055 	    EMSG_ONE_RET_NULL;
2056 	ret = reg(REG_PAREN, &flags);
2057 	if (ret == NULL)
2058 	    return NULL;
2059 	*flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
2060 	break;
2061 
2062       case NUL:
2063       case Magic('|'):
2064       case Magic('&'):
2065       case Magic(')'):
2066 	if (one_exactly)
2067 	    EMSG_ONE_RET_NULL;
2068 	EMSG_RET_NULL(_(e_internal));	/* Supposed to be caught earlier. */
2069 	/* NOTREACHED */
2070 
2071       case Magic('='):
2072       case Magic('?'):
2073       case Magic('+'):
2074       case Magic('@'):
2075       case Magic('{'):
2076       case Magic('*'):
2077 	c = no_Magic(c);
2078 	sprintf((char *)IObuff, _("E64: %s%c follows nothing"),
2079 		(c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL)
2080 		? "" : "\\", c);
2081 	EMSG_RET_NULL(IObuff);
2082 	/* NOTREACHED */
2083 
2084       case Magic('~'):		/* previous substitute pattern */
2085 	    if (reg_prev_sub != NULL)
2086 	    {
2087 		char_u	    *lp;
2088 
2089 		ret = regnode(EXACTLY);
2090 		lp = reg_prev_sub;
2091 		while (*lp != NUL)
2092 		    regc(*lp++);
2093 		regc(NUL);
2094 		if (*reg_prev_sub != NUL)
2095 		{
2096 		    *flagp |= HASWIDTH;
2097 		    if ((lp - reg_prev_sub) == 1)
2098 			*flagp |= SIMPLE;
2099 		}
2100 	    }
2101 	    else
2102 		EMSG_RET_NULL(_(e_nopresub));
2103 	    break;
2104 
2105       case Magic('1'):
2106       case Magic('2'):
2107       case Magic('3'):
2108       case Magic('4'):
2109       case Magic('5'):
2110       case Magic('6'):
2111       case Magic('7'):
2112       case Magic('8'):
2113       case Magic('9'):
2114 	    {
2115 		int		    refnum;
2116 
2117 		refnum = c - Magic('0');
2118 		/*
2119 		 * Check if the back reference is legal. We must have seen the
2120 		 * close brace.
2121 		 * TODO: Should also check that we don't refer to something
2122 		 * that is repeated (+*=): what instance of the repetition
2123 		 * should we match?
2124 		 */
2125 		if (!had_endbrace[refnum])
2126 		{
2127 		    /* Trick: check if "@<=" or "@<!" follows, in which case
2128 		     * the \1 can appear before the referenced match. */
2129 		    for (p = regparse; *p != NUL; ++p)
2130 			if (p[0] == '@' && p[1] == '<'
2131 					      && (p[2] == '!' || p[2] == '='))
2132 			    break;
2133 		    if (*p == NUL)
2134 			EMSG_RET_NULL(_("E65: Illegal back reference"));
2135 		}
2136 		ret = regnode(BACKREF + refnum);
2137 	    }
2138 	    break;
2139 
2140       case Magic('z'):
2141 	{
2142 	    c = no_Magic(getchr());
2143 	    switch (c)
2144 	    {
2145 #ifdef FEAT_SYN_HL
2146 		case '(': if (reg_do_extmatch != REX_SET)
2147 			      EMSG_RET_NULL(_(e_z_not_allowed));
2148 			  if (one_exactly)
2149 			      EMSG_ONE_RET_NULL;
2150 			  ret = reg(REG_ZPAREN, &flags);
2151 			  if (ret == NULL)
2152 			      return NULL;
2153 			  *flagp |= flags & (HASWIDTH|SPSTART|HASNL|HASLOOKBH);
2154 			  re_has_z = REX_SET;
2155 			  break;
2156 
2157 		case '1':
2158 		case '2':
2159 		case '3':
2160 		case '4':
2161 		case '5':
2162 		case '6':
2163 		case '7':
2164 		case '8':
2165 		case '9': if (reg_do_extmatch != REX_USE)
2166 			      EMSG_RET_NULL(_(e_z1_not_allowed));
2167 			  ret = regnode(ZREF + c - '0');
2168 			  re_has_z = REX_USE;
2169 			  break;
2170 #endif
2171 
2172 		case 's': ret = regnode(MOPEN + 0);
2173 			  if (re_mult_next("\\zs") == FAIL)
2174 			      return NULL;
2175 			  break;
2176 
2177 		case 'e': ret = regnode(MCLOSE + 0);
2178 			  if (re_mult_next("\\ze") == FAIL)
2179 			      return NULL;
2180 			  break;
2181 
2182 		default:  EMSG_RET_NULL(_("E68: Invalid character after \\z"));
2183 	    }
2184 	}
2185 	break;
2186 
2187       case Magic('%'):
2188 	{
2189 	    c = no_Magic(getchr());
2190 	    switch (c)
2191 	    {
2192 		/* () without a back reference */
2193 		case '(':
2194 		    if (one_exactly)
2195 			EMSG_ONE_RET_NULL;
2196 		    ret = reg(REG_NPAREN, &flags);
2197 		    if (ret == NULL)
2198 			return NULL;
2199 		    *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
2200 		    break;
2201 
2202 		/* Catch \%^ and \%$ regardless of where they appear in the
2203 		 * pattern -- regardless of whether or not it makes sense. */
2204 		case '^':
2205 		    ret = regnode(RE_BOF);
2206 		    break;
2207 
2208 		case '$':
2209 		    ret = regnode(RE_EOF);
2210 		    break;
2211 
2212 		case '#':
2213 		    ret = regnode(CURSOR);
2214 		    break;
2215 
2216 		case 'V':
2217 		    ret = regnode(RE_VISUAL);
2218 		    break;
2219 
2220 		case 'C':
2221 		    ret = regnode(RE_COMPOSING);
2222 		    break;
2223 
2224 		/* \%[abc]: Emit as a list of branches, all ending at the last
2225 		 * branch which matches nothing. */
2226 		case '[':
2227 			  if (one_exactly)	/* doesn't nest */
2228 			      EMSG_ONE_RET_NULL;
2229 			  {
2230 			      char_u	*lastbranch;
2231 			      char_u	*lastnode = NULL;
2232 			      char_u	*br;
2233 
2234 			      ret = NULL;
2235 			      while ((c = getchr()) != ']')
2236 			      {
2237 				  if (c == NUL)
2238 				      EMSG2_RET_NULL(_(e_missing_sb),
2239 						      reg_magic == MAGIC_ALL);
2240 				  br = regnode(BRANCH);
2241 				  if (ret == NULL)
2242 				      ret = br;
2243 				  else
2244 				      regtail(lastnode, br);
2245 
2246 				  ungetchr();
2247 				  one_exactly = TRUE;
2248 				  lastnode = regatom(flagp);
2249 				  one_exactly = FALSE;
2250 				  if (lastnode == NULL)
2251 				      return NULL;
2252 			      }
2253 			      if (ret == NULL)
2254 				  EMSG2_RET_NULL(_(e_empty_sb),
2255 						      reg_magic == MAGIC_ALL);
2256 			      lastbranch = regnode(BRANCH);
2257 			      br = regnode(NOTHING);
2258 			      if (ret != JUST_CALC_SIZE)
2259 			      {
2260 				  regtail(lastnode, br);
2261 				  regtail(lastbranch, br);
2262 				  /* connect all branches to the NOTHING
2263 				   * branch at the end */
2264 				  for (br = ret; br != lastnode; )
2265 				  {
2266 				      if (OP(br) == BRANCH)
2267 				      {
2268 					  regtail(br, lastbranch);
2269 					  br = OPERAND(br);
2270 				      }
2271 				      else
2272 					  br = regnext(br);
2273 				  }
2274 			      }
2275 			      *flagp &= ~(HASWIDTH | SIMPLE);
2276 			      break;
2277 			  }
2278 
2279 		case 'd':   /* %d123 decimal */
2280 		case 'o':   /* %o123 octal */
2281 		case 'x':   /* %xab hex 2 */
2282 		case 'u':   /* %uabcd hex 4 */
2283 		case 'U':   /* %U1234abcd hex 8 */
2284 			  {
2285 			      int i;
2286 
2287 			      switch (c)
2288 			      {
2289 				  case 'd': i = getdecchrs(); break;
2290 				  case 'o': i = getoctchrs(); break;
2291 				  case 'x': i = gethexchrs(2); break;
2292 				  case 'u': i = gethexchrs(4); break;
2293 				  case 'U': i = gethexchrs(8); break;
2294 				  default:  i = -1; break;
2295 			      }
2296 
2297 			      if (i < 0)
2298 				  EMSG2_RET_NULL(
2299 					_("E678: Invalid character after %s%%[dxouU]"),
2300 					reg_magic == MAGIC_ALL);
2301 #ifdef FEAT_MBYTE
2302 			      if (use_multibytecode(i))
2303 				  ret = regnode(MULTIBYTECODE);
2304 			      else
2305 #endif
2306 				  ret = regnode(EXACTLY);
2307 			      if (i == 0)
2308 				  regc(0x0a);
2309 			      else
2310 #ifdef FEAT_MBYTE
2311 				  regmbc(i);
2312 #else
2313 				  regc(i);
2314 #endif
2315 			      regc(NUL);
2316 			      *flagp |= HASWIDTH;
2317 			      break;
2318 			  }
2319 
2320 		default:
2321 			  if (VIM_ISDIGIT(c) || c == '<' || c == '>'
2322 								 || c == '\'')
2323 			  {
2324 			      long_u	n = 0;
2325 			      int	cmp;
2326 
2327 			      cmp = c;
2328 			      if (cmp == '<' || cmp == '>')
2329 				  c = getchr();
2330 			      while (VIM_ISDIGIT(c))
2331 			      {
2332 				  n = n * 10 + (c - '0');
2333 				  c = getchr();
2334 			      }
2335 			      if (c == '\'' && n == 0)
2336 			      {
2337 				  /* "\%'m", "\%<'m" and "\%>'m": Mark */
2338 				  c = getchr();
2339 				  ret = regnode(RE_MARK);
2340 				  if (ret == JUST_CALC_SIZE)
2341 				      regsize += 2;
2342 				  else
2343 				  {
2344 				      *regcode++ = c;
2345 				      *regcode++ = cmp;
2346 				  }
2347 				  break;
2348 			      }
2349 			      else if (c == 'l' || c == 'c' || c == 'v')
2350 			      {
2351 				  if (c == 'l')
2352 				      ret = regnode(RE_LNUM);
2353 				  else if (c == 'c')
2354 				      ret = regnode(RE_COL);
2355 				  else
2356 				      ret = regnode(RE_VCOL);
2357 				  if (ret == JUST_CALC_SIZE)
2358 				      regsize += 5;
2359 				  else
2360 				  {
2361 				      /* put the number and the optional
2362 				       * comparator after the opcode */
2363 				      regcode = re_put_long(regcode, n);
2364 				      *regcode++ = cmp;
2365 				  }
2366 				  break;
2367 			      }
2368 			  }
2369 
2370 			  EMSG2_RET_NULL(_("E71: Invalid character after %s%%"),
2371 						      reg_magic == MAGIC_ALL);
2372 	    }
2373 	}
2374 	break;
2375 
2376       case Magic('['):
2377 collection:
2378 	{
2379 	    char_u	*lp;
2380 
2381 	    /*
2382 	     * If there is no matching ']', we assume the '[' is a normal
2383 	     * character.  This makes 'incsearch' and ":help [" work.
2384 	     */
2385 	    lp = skip_anyof(regparse);
2386 	    if (*lp == ']')	/* there is a matching ']' */
2387 	    {
2388 		int	startc = -1;	/* > 0 when next '-' is a range */
2389 		int	endc;
2390 
2391 		/*
2392 		 * In a character class, different parsing rules apply.
2393 		 * Not even \ is special anymore, nothing is.
2394 		 */
2395 		if (*regparse == '^')	    /* Complement of range. */
2396 		{
2397 		    ret = regnode(ANYBUT + extra);
2398 		    regparse++;
2399 		}
2400 		else
2401 		    ret = regnode(ANYOF + extra);
2402 
2403 		/* At the start ']' and '-' mean the literal character. */
2404 		if (*regparse == ']' || *regparse == '-')
2405 		{
2406 		    startc = *regparse;
2407 		    regc(*regparse++);
2408 		}
2409 
2410 		while (*regparse != NUL && *regparse != ']')
2411 		{
2412 		    if (*regparse == '-')
2413 		    {
2414 			++regparse;
2415 			/* The '-' is not used for a range at the end and
2416 			 * after or before a '\n'. */
2417 			if (*regparse == ']' || *regparse == NUL
2418 				|| startc == -1
2419 				|| (regparse[0] == '\\' && regparse[1] == 'n'))
2420 			{
2421 			    regc('-');
2422 			    startc = '-';	/* [--x] is a range */
2423 			}
2424 			else
2425 			{
2426 			    /* Also accept "a-[.z.]" */
2427 			    endc = 0;
2428 			    if (*regparse == '[')
2429 				endc = get_coll_element(&regparse);
2430 			    if (endc == 0)
2431 			    {
2432 #ifdef FEAT_MBYTE
2433 				if (has_mbyte)
2434 				    endc = mb_ptr2char_adv(&regparse);
2435 				else
2436 #endif
2437 				    endc = *regparse++;
2438 			    }
2439 
2440 			    /* Handle \o40, \x20 and \u20AC style sequences */
2441 			    if (endc == '\\' && !reg_cpo_lit && !reg_cpo_bsl)
2442 				endc = coll_get_char();
2443 
2444 			    if (startc > endc)
2445 				EMSG_RET_NULL(_(e_invrange));
2446 #ifdef FEAT_MBYTE
2447 			    if (has_mbyte && ((*mb_char2len)(startc) > 1
2448 						 || (*mb_char2len)(endc) > 1))
2449 			    {
2450 				/* Limit to a range of 256 chars */
2451 				if (endc > startc + 256)
2452 				    EMSG_RET_NULL(_(e_invrange));
2453 				while (++startc <= endc)
2454 				    regmbc(startc);
2455 			    }
2456 			    else
2457 #endif
2458 			    {
2459 #ifdef EBCDIC
2460 				int	alpha_only = FALSE;
2461 
2462 				/* for alphabetical range skip the gaps
2463 				 * 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'.  */
2464 				if (isalpha(startc) && isalpha(endc))
2465 				    alpha_only = TRUE;
2466 #endif
2467 				while (++startc <= endc)
2468 #ifdef EBCDIC
2469 				    if (!alpha_only || isalpha(startc))
2470 #endif
2471 					regc(startc);
2472 			    }
2473 			    startc = -1;
2474 			}
2475 		    }
2476 		    /*
2477 		     * Only "\]", "\^", "\]" and "\\" are special in Vi.  Vim
2478 		     * accepts "\t", "\e", etc., but only when the 'l' flag in
2479 		     * 'cpoptions' is not included.
2480 		     * Posix doesn't recognize backslash at all.
2481 		     */
2482 		    else if (*regparse == '\\'
2483 			    && !reg_cpo_bsl
2484 			    && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
2485 				|| (!reg_cpo_lit
2486 				    && vim_strchr(REGEXP_ABBR,
2487 						       regparse[1]) != NULL)))
2488 		    {
2489 			regparse++;
2490 			if (*regparse == 'n')
2491 			{
2492 			    /* '\n' in range: also match NL */
2493 			    if (ret != JUST_CALC_SIZE)
2494 			    {
2495 				/* Using \n inside [^] does not change what
2496 				 * matches. "[^\n]" is the same as ".". */
2497 				if (*ret == ANYOF)
2498 				{
2499 				    *ret = ANYOF + ADD_NL;
2500 				    *flagp |= HASNL;
2501 				}
2502 				/* else: must have had a \n already */
2503 			    }
2504 			    regparse++;
2505 			    startc = -1;
2506 			}
2507 			else if (*regparse == 'd'
2508 				|| *regparse == 'o'
2509 				|| *regparse == 'x'
2510 				|| *regparse == 'u'
2511 				|| *regparse == 'U')
2512 			{
2513 			    startc = coll_get_char();
2514 			    if (startc == 0)
2515 				regc(0x0a);
2516 			    else
2517 #ifdef FEAT_MBYTE
2518 				regmbc(startc);
2519 #else
2520 				regc(startc);
2521 #endif
2522 			}
2523 			else
2524 			{
2525 			    startc = backslash_trans(*regparse++);
2526 			    regc(startc);
2527 			}
2528 		    }
2529 		    else if (*regparse == '[')
2530 		    {
2531 			int c_class;
2532 			int cu;
2533 
2534 			c_class = get_char_class(&regparse);
2535 			startc = -1;
2536 			/* Characters assumed to be 8 bits! */
2537 			switch (c_class)
2538 			{
2539 			    case CLASS_NONE:
2540 				c_class = get_equi_class(&regparse);
2541 				if (c_class != 0)
2542 				{
2543 				    /* produce equivalence class */
2544 				    reg_equi_class(c_class);
2545 				}
2546 				else if ((c_class =
2547 					    get_coll_element(&regparse)) != 0)
2548 				{
2549 				    /* produce a collating element */
2550 				    regmbc(c_class);
2551 				}
2552 				else
2553 				{
2554 				    /* literal '[', allow [[-x] as a range */
2555 				    startc = *regparse++;
2556 				    regc(startc);
2557 				}
2558 				break;
2559 			    case CLASS_ALNUM:
2560 				for (cu = 1; cu <= 255; cu++)
2561 				    if (isalnum(cu))
2562 					regc(cu);
2563 				break;
2564 			    case CLASS_ALPHA:
2565 				for (cu = 1; cu <= 255; cu++)
2566 				    if (isalpha(cu))
2567 					regc(cu);
2568 				break;
2569 			    case CLASS_BLANK:
2570 				regc(' ');
2571 				regc('\t');
2572 				break;
2573 			    case CLASS_CNTRL:
2574 				for (cu = 1; cu <= 255; cu++)
2575 				    if (iscntrl(cu))
2576 					regc(cu);
2577 				break;
2578 			    case CLASS_DIGIT:
2579 				for (cu = 1; cu <= 255; cu++)
2580 				    if (VIM_ISDIGIT(cu))
2581 					regc(cu);
2582 				break;
2583 			    case CLASS_GRAPH:
2584 				for (cu = 1; cu <= 255; cu++)
2585 				    if (isgraph(cu))
2586 					regc(cu);
2587 				break;
2588 			    case CLASS_LOWER:
2589 				for (cu = 1; cu <= 255; cu++)
2590 				    if (MB_ISLOWER(cu))
2591 					regc(cu);
2592 				break;
2593 			    case CLASS_PRINT:
2594 				for (cu = 1; cu <= 255; cu++)
2595 				    if (vim_isprintc(cu))
2596 					regc(cu);
2597 				break;
2598 			    case CLASS_PUNCT:
2599 				for (cu = 1; cu <= 255; cu++)
2600 				    if (ispunct(cu))
2601 					regc(cu);
2602 				break;
2603 			    case CLASS_SPACE:
2604 				for (cu = 9; cu <= 13; cu++)
2605 				    regc(cu);
2606 				regc(' ');
2607 				break;
2608 			    case CLASS_UPPER:
2609 				for (cu = 1; cu <= 255; cu++)
2610 				    if (MB_ISUPPER(cu))
2611 					regc(cu);
2612 				break;
2613 			    case CLASS_XDIGIT:
2614 				for (cu = 1; cu <= 255; cu++)
2615 				    if (vim_isxdigit(cu))
2616 					regc(cu);
2617 				break;
2618 			    case CLASS_TAB:
2619 				regc('\t');
2620 				break;
2621 			    case CLASS_RETURN:
2622 				regc('\r');
2623 				break;
2624 			    case CLASS_BACKSPACE:
2625 				regc('\b');
2626 				break;
2627 			    case CLASS_ESCAPE:
2628 				regc('\033');
2629 				break;
2630 			}
2631 		    }
2632 		    else
2633 		    {
2634 #ifdef FEAT_MBYTE
2635 			if (has_mbyte)
2636 			{
2637 			    int	len;
2638 
2639 			    /* produce a multibyte character, including any
2640 			     * following composing characters */
2641 			    startc = mb_ptr2char(regparse);
2642 			    len = (*mb_ptr2len)(regparse);
2643 			    if (enc_utf8 && utf_char2len(startc) != len)
2644 				startc = -1;	/* composing chars */
2645 			    while (--len >= 0)
2646 				regc(*regparse++);
2647 			}
2648 			else
2649 #endif
2650 			{
2651 			    startc = *regparse++;
2652 			    regc(startc);
2653 			}
2654 		    }
2655 		}
2656 		regc(NUL);
2657 		prevchr_len = 1;	/* last char was the ']' */
2658 		if (*regparse != ']')
2659 		    EMSG_RET_NULL(_(e_toomsbra));	/* Cannot happen? */
2660 		skipchr();	    /* let's be friends with the lexer again */
2661 		*flagp |= HASWIDTH | SIMPLE;
2662 		break;
2663 	    }
2664 	    else if (reg_strict)
2665 		EMSG2_RET_NULL(_(e_missingbracket), reg_magic > MAGIC_OFF);
2666 	}
2667 	/* FALLTHROUGH */
2668 
2669       default:
2670 	{
2671 	    int		len;
2672 
2673 #ifdef FEAT_MBYTE
2674 	    /* A multi-byte character is handled as a separate atom if it's
2675 	     * before a multi and when it's a composing char. */
2676 	    if (use_multibytecode(c))
2677 	    {
2678 do_multibyte:
2679 		ret = regnode(MULTIBYTECODE);
2680 		regmbc(c);
2681 		*flagp |= HASWIDTH | SIMPLE;
2682 		break;
2683 	    }
2684 #endif
2685 
2686 	    ret = regnode(EXACTLY);
2687 
2688 	    /*
2689 	     * Append characters as long as:
2690 	     * - there is no following multi, we then need the character in
2691 	     *   front of it as a single character operand
2692 	     * - not running into a Magic character
2693 	     * - "one_exactly" is not set
2694 	     * But always emit at least one character.  Might be a Multi,
2695 	     * e.g., a "[" without matching "]".
2696 	     */
2697 	    for (len = 0; c != NUL && (len == 0
2698 			|| (re_multi_type(peekchr()) == NOT_MULTI
2699 			    && !one_exactly
2700 			    && !is_Magic(c))); ++len)
2701 	    {
2702 		c = no_Magic(c);
2703 #ifdef FEAT_MBYTE
2704 		if (has_mbyte)
2705 		{
2706 		    regmbc(c);
2707 		    if (enc_utf8)
2708 		    {
2709 			int	l;
2710 
2711 			/* Need to get composing character too. */
2712 			for (;;)
2713 			{
2714 			    l = utf_ptr2len(regparse);
2715 			    if (!UTF_COMPOSINGLIKE(regparse, regparse + l))
2716 				break;
2717 			    regmbc(utf_ptr2char(regparse));
2718 			    skipchr();
2719 			}
2720 		    }
2721 		}
2722 		else
2723 #endif
2724 		    regc(c);
2725 		c = getchr();
2726 	    }
2727 	    ungetchr();
2728 
2729 	    regc(NUL);
2730 	    *flagp |= HASWIDTH;
2731 	    if (len == 1)
2732 		*flagp |= SIMPLE;
2733 	}
2734 	break;
2735     }
2736 
2737     return ret;
2738 }
2739 
2740 #ifdef FEAT_MBYTE
2741 /*
2742  * Return TRUE if MULTIBYTECODE should be used instead of EXACTLY for
2743  * character "c".
2744  */
2745     static int
2746 use_multibytecode(c)
2747     int c;
2748 {
2749     return has_mbyte && (*mb_char2len)(c) > 1
2750 		     && (re_multi_type(peekchr()) != NOT_MULTI
2751 			     || (enc_utf8 && utf_iscomposing(c)));
2752 }
2753 #endif
2754 
2755 /*
2756  * Emit a node.
2757  * Return pointer to generated code.
2758  */
2759     static char_u *
2760 regnode(op)
2761     int		op;
2762 {
2763     char_u  *ret;
2764 
2765     ret = regcode;
2766     if (ret == JUST_CALC_SIZE)
2767 	regsize += 3;
2768     else
2769     {
2770 	*regcode++ = op;
2771 	*regcode++ = NUL;		/* Null "next" pointer. */
2772 	*regcode++ = NUL;
2773     }
2774     return ret;
2775 }
2776 
2777 /*
2778  * Emit (if appropriate) a byte of code
2779  */
2780     static void
2781 regc(b)
2782     int		b;
2783 {
2784     if (regcode == JUST_CALC_SIZE)
2785 	regsize++;
2786     else
2787 	*regcode++ = b;
2788 }
2789 
2790 #ifdef FEAT_MBYTE
2791 /*
2792  * Emit (if appropriate) a multi-byte character of code
2793  */
2794     static void
2795 regmbc(c)
2796     int		c;
2797 {
2798     if (!has_mbyte && c > 0xff)
2799 	return;
2800     if (regcode == JUST_CALC_SIZE)
2801 	regsize += (*mb_char2len)(c);
2802     else
2803 	regcode += (*mb_char2bytes)(c, regcode);
2804 }
2805 #endif
2806 
2807 /*
2808  * Insert an operator in front of already-emitted operand
2809  *
2810  * Means relocating the operand.
2811  */
2812     static void
2813 reginsert(op, opnd)
2814     int		op;
2815     char_u     *opnd;
2816 {
2817     char_u	*src;
2818     char_u	*dst;
2819     char_u	*place;
2820 
2821     if (regcode == JUST_CALC_SIZE)
2822     {
2823 	regsize += 3;
2824 	return;
2825     }
2826     src = regcode;
2827     regcode += 3;
2828     dst = regcode;
2829     while (src > opnd)
2830 	*--dst = *--src;
2831 
2832     place = opnd;		/* Op node, where operand used to be. */
2833     *place++ = op;
2834     *place++ = NUL;
2835     *place = NUL;
2836 }
2837 
2838 /*
2839  * Insert an operator in front of already-emitted operand.
2840  * Add a number to the operator.
2841  */
2842     static void
2843 reginsert_nr(op, val, opnd)
2844     int		op;
2845     long	val;
2846     char_u	*opnd;
2847 {
2848     char_u	*src;
2849     char_u	*dst;
2850     char_u	*place;
2851 
2852     if (regcode == JUST_CALC_SIZE)
2853     {
2854 	regsize += 7;
2855 	return;
2856     }
2857     src = regcode;
2858     regcode += 7;
2859     dst = regcode;
2860     while (src > opnd)
2861 	*--dst = *--src;
2862 
2863     place = opnd;		/* Op node, where operand used to be. */
2864     *place++ = op;
2865     *place++ = NUL;
2866     *place++ = NUL;
2867     place = re_put_long(place, (long_u)val);
2868 }
2869 
2870 /*
2871  * Insert an operator in front of already-emitted operand.
2872  * The operator has the given limit values as operands.  Also set next pointer.
2873  *
2874  * Means relocating the operand.
2875  */
2876     static void
2877 reginsert_limits(op, minval, maxval, opnd)
2878     int		op;
2879     long	minval;
2880     long	maxval;
2881     char_u	*opnd;
2882 {
2883     char_u	*src;
2884     char_u	*dst;
2885     char_u	*place;
2886 
2887     if (regcode == JUST_CALC_SIZE)
2888     {
2889 	regsize += 11;
2890 	return;
2891     }
2892     src = regcode;
2893     regcode += 11;
2894     dst = regcode;
2895     while (src > opnd)
2896 	*--dst = *--src;
2897 
2898     place = opnd;		/* Op node, where operand used to be. */
2899     *place++ = op;
2900     *place++ = NUL;
2901     *place++ = NUL;
2902     place = re_put_long(place, (long_u)minval);
2903     place = re_put_long(place, (long_u)maxval);
2904     regtail(opnd, place);
2905 }
2906 
2907 /*
2908  * Write a long as four bytes at "p" and return pointer to the next char.
2909  */
2910     static char_u *
2911 re_put_long(p, val)
2912     char_u	*p;
2913     long_u	val;
2914 {
2915     *p++ = (char_u) ((val >> 24) & 0377);
2916     *p++ = (char_u) ((val >> 16) & 0377);
2917     *p++ = (char_u) ((val >> 8) & 0377);
2918     *p++ = (char_u) (val & 0377);
2919     return p;
2920 }
2921 
2922 /*
2923  * Set the next-pointer at the end of a node chain.
2924  */
2925     static void
2926 regtail(p, val)
2927     char_u	*p;
2928     char_u	*val;
2929 {
2930     char_u	*scan;
2931     char_u	*temp;
2932     int		offset;
2933 
2934     if (p == JUST_CALC_SIZE)
2935 	return;
2936 
2937     /* Find last node. */
2938     scan = p;
2939     for (;;)
2940     {
2941 	temp = regnext(scan);
2942 	if (temp == NULL)
2943 	    break;
2944 	scan = temp;
2945     }
2946 
2947     if (OP(scan) == BACK)
2948 	offset = (int)(scan - val);
2949     else
2950 	offset = (int)(val - scan);
2951     /* When the offset uses more than 16 bits it can no longer fit in the two
2952      * bytes available.  Use a global flag to avoid having to check return
2953      * values in too many places. */
2954     if (offset > 0xffff)
2955 	reg_toolong = TRUE;
2956     else
2957     {
2958 	*(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
2959 	*(scan + 2) = (char_u) (offset & 0377);
2960     }
2961 }
2962 
2963 /*
2964  * Like regtail, on item after a BRANCH; nop if none.
2965  */
2966     static void
2967 regoptail(p, val)
2968     char_u	*p;
2969     char_u	*val;
2970 {
2971     /* When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless" */
2972     if (p == NULL || p == JUST_CALC_SIZE
2973 	    || (OP(p) != BRANCH
2974 		&& (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9)))
2975 	return;
2976     regtail(OPERAND(p), val);
2977 }
2978 
2979 /*
2980  * Functions for getting characters from the regexp input.
2981  */
2982 
2983 static int	at_start;	/* True when on the first character */
2984 static int	prev_at_start;  /* True when on the second character */
2985 
2986 /*
2987  * Start parsing at "str".
2988  */
2989     static void
2990 initchr(str)
2991     char_u *str;
2992 {
2993     regparse = str;
2994     prevchr_len = 0;
2995     curchr = prevprevchr = prevchr = nextchr = -1;
2996     at_start = TRUE;
2997     prev_at_start = FALSE;
2998 }
2999 
3000 /*
3001  * Save the current parse state, so that it can be restored and parsing
3002  * starts in the same state again.
3003  */
3004     static void
3005 save_parse_state(ps)
3006     parse_state_T *ps;
3007 {
3008     ps->regparse = regparse;
3009     ps->prevchr_len = prevchr_len;
3010     ps->curchr = curchr;
3011     ps->prevchr = prevchr;
3012     ps->prevprevchr = prevprevchr;
3013     ps->nextchr = nextchr;
3014     ps->at_start = at_start;
3015     ps->prev_at_start = prev_at_start;
3016     ps->regnpar = regnpar;
3017 }
3018 
3019 /*
3020  * Restore a previously saved parse state.
3021  */
3022     static void
3023 restore_parse_state(ps)
3024     parse_state_T *ps;
3025 {
3026     regparse = ps->regparse;
3027     prevchr_len = ps->prevchr_len;
3028     curchr = ps->curchr;
3029     prevchr = ps->prevchr;
3030     prevprevchr = ps->prevprevchr;
3031     nextchr = ps->nextchr;
3032     at_start = ps->at_start;
3033     prev_at_start = ps->prev_at_start;
3034     regnpar = ps->regnpar;
3035 }
3036 
3037 
3038 /*
3039  * Get the next character without advancing.
3040  */
3041     static int
3042 peekchr()
3043 {
3044     static int	after_slash = FALSE;
3045 
3046     if (curchr == -1)
3047     {
3048 	switch (curchr = regparse[0])
3049 	{
3050 	case '.':
3051 	case '[':
3052 	case '~':
3053 	    /* magic when 'magic' is on */
3054 	    if (reg_magic >= MAGIC_ON)
3055 		curchr = Magic(curchr);
3056 	    break;
3057 	case '(':
3058 	case ')':
3059 	case '{':
3060 	case '%':
3061 	case '+':
3062 	case '=':
3063 	case '?':
3064 	case '@':
3065 	case '!':
3066 	case '&':
3067 	case '|':
3068 	case '<':
3069 	case '>':
3070 	case '#':	/* future ext. */
3071 	case '"':	/* future ext. */
3072 	case '\'':	/* future ext. */
3073 	case ',':	/* future ext. */
3074 	case '-':	/* future ext. */
3075 	case ':':	/* future ext. */
3076 	case ';':	/* future ext. */
3077 	case '`':	/* future ext. */
3078 	case '/':	/* Can't be used in / command */
3079 	    /* magic only after "\v" */
3080 	    if (reg_magic == MAGIC_ALL)
3081 		curchr = Magic(curchr);
3082 	    break;
3083 	case '*':
3084 	    /* * is not magic as the very first character, eg "?*ptr", when
3085 	     * after '^', eg "/^*ptr" and when after "\(", "\|", "\&".  But
3086 	     * "\(\*" is not magic, thus must be magic if "after_slash" */
3087 	    if (reg_magic >= MAGIC_ON
3088 		    && !at_start
3089 		    && !(prev_at_start && prevchr == Magic('^'))
3090 		    && (after_slash
3091 			|| (prevchr != Magic('(')
3092 			    && prevchr != Magic('&')
3093 			    && prevchr != Magic('|'))))
3094 		curchr = Magic('*');
3095 	    break;
3096 	case '^':
3097 	    /* '^' is only magic as the very first character and if it's after
3098 	     * "\(", "\|", "\&' or "\n" */
3099 	    if (reg_magic >= MAGIC_OFF
3100 		    && (at_start
3101 			|| reg_magic == MAGIC_ALL
3102 			|| prevchr == Magic('(')
3103 			|| prevchr == Magic('|')
3104 			|| prevchr == Magic('&')
3105 			|| prevchr == Magic('n')
3106 			|| (no_Magic(prevchr) == '('
3107 			    && prevprevchr == Magic('%'))))
3108 	    {
3109 		curchr = Magic('^');
3110 		at_start = TRUE;
3111 		prev_at_start = FALSE;
3112 	    }
3113 	    break;
3114 	case '$':
3115 	    /* '$' is only magic as the very last char and if it's in front of
3116 	     * either "\|", "\)", "\&", or "\n" */
3117 	    if (reg_magic >= MAGIC_OFF)
3118 	    {
3119 		char_u *p = regparse + 1;
3120 		int is_magic_all = (reg_magic == MAGIC_ALL);
3121 
3122 		/* ignore \c \C \m \M \v \V and \Z after '$' */
3123 		while (p[0] == '\\' && (p[1] == 'c' || p[1] == 'C'
3124 				|| p[1] == 'm' || p[1] == 'M'
3125 				|| p[1] == 'v' || p[1] == 'V' || p[1] == 'Z'))
3126 		{
3127 		    if (p[1] == 'v')
3128 			is_magic_all = TRUE;
3129 		    else if (p[1] == 'm' || p[1] == 'M' || p[1] == 'V')
3130 			is_magic_all = FALSE;
3131 		    p += 2;
3132 		}
3133 		if (p[0] == NUL
3134 			|| (p[0] == '\\'
3135 			    && (p[1] == '|' || p[1] == '&' || p[1] == ')'
3136 				|| p[1] == 'n'))
3137 			|| (is_magic_all
3138 			       && (p[0] == '|' || p[0] == '&' || p[0] == ')'))
3139 			|| reg_magic == MAGIC_ALL)
3140 		    curchr = Magic('$');
3141 	    }
3142 	    break;
3143 	case '\\':
3144 	    {
3145 		int c = regparse[1];
3146 
3147 		if (c == NUL)
3148 		    curchr = '\\';	/* trailing '\' */
3149 		else if (
3150 #ifdef EBCDIC
3151 			vim_strchr(META, c)
3152 #else
3153 			c <= '~' && META_flags[c]
3154 #endif
3155 			)
3156 		{
3157 		    /*
3158 		     * META contains everything that may be magic sometimes,
3159 		     * except ^ and $ ("\^" and "\$" are only magic after
3160 		     * "\V").  We now fetch the next character and toggle its
3161 		     * magicness.  Therefore, \ is so meta-magic that it is
3162 		     * not in META.
3163 		     */
3164 		    curchr = -1;
3165 		    prev_at_start = at_start;
3166 		    at_start = FALSE;	/* be able to say "/\*ptr" */
3167 		    ++regparse;
3168 		    ++after_slash;
3169 		    peekchr();
3170 		    --regparse;
3171 		    --after_slash;
3172 		    curchr = toggle_Magic(curchr);
3173 		}
3174 		else if (vim_strchr(REGEXP_ABBR, c))
3175 		{
3176 		    /*
3177 		     * Handle abbreviations, like "\t" for TAB -- webb
3178 		     */
3179 		    curchr = backslash_trans(c);
3180 		}
3181 		else if (reg_magic == MAGIC_NONE && (c == '$' || c == '^'))
3182 		    curchr = toggle_Magic(c);
3183 		else
3184 		{
3185 		    /*
3186 		     * Next character can never be (made) magic?
3187 		     * Then backslashing it won't do anything.
3188 		     */
3189 #ifdef FEAT_MBYTE
3190 		    if (has_mbyte)
3191 			curchr = (*mb_ptr2char)(regparse + 1);
3192 		    else
3193 #endif
3194 			curchr = c;
3195 		}
3196 		break;
3197 	    }
3198 
3199 #ifdef FEAT_MBYTE
3200 	default:
3201 	    if (has_mbyte)
3202 		curchr = (*mb_ptr2char)(regparse);
3203 #endif
3204 	}
3205     }
3206 
3207     return curchr;
3208 }
3209 
3210 /*
3211  * Eat one lexed character.  Do this in a way that we can undo it.
3212  */
3213     static void
3214 skipchr()
3215 {
3216     /* peekchr() eats a backslash, do the same here */
3217     if (*regparse == '\\')
3218 	prevchr_len = 1;
3219     else
3220 	prevchr_len = 0;
3221     if (regparse[prevchr_len] != NUL)
3222     {
3223 #ifdef FEAT_MBYTE
3224 	if (enc_utf8)
3225 	    /* exclude composing chars that mb_ptr2len does include */
3226 	    prevchr_len += utf_ptr2len(regparse + prevchr_len);
3227 	else if (has_mbyte)
3228 	    prevchr_len += (*mb_ptr2len)(regparse + prevchr_len);
3229 	else
3230 #endif
3231 	    ++prevchr_len;
3232     }
3233     regparse += prevchr_len;
3234     prev_at_start = at_start;
3235     at_start = FALSE;
3236     prevprevchr = prevchr;
3237     prevchr = curchr;
3238     curchr = nextchr;	    /* use previously unget char, or -1 */
3239     nextchr = -1;
3240 }
3241 
3242 /*
3243  * Skip a character while keeping the value of prev_at_start for at_start.
3244  * prevchr and prevprevchr are also kept.
3245  */
3246     static void
3247 skipchr_keepstart()
3248 {
3249     int as = prev_at_start;
3250     int pr = prevchr;
3251     int prpr = prevprevchr;
3252 
3253     skipchr();
3254     at_start = as;
3255     prevchr = pr;
3256     prevprevchr = prpr;
3257 }
3258 
3259 /*
3260  * Get the next character from the pattern. We know about magic and such, so
3261  * therefore we need a lexical analyzer.
3262  */
3263     static int
3264 getchr()
3265 {
3266     int chr = peekchr();
3267 
3268     skipchr();
3269     return chr;
3270 }
3271 
3272 /*
3273  * put character back.  Works only once!
3274  */
3275     static void
3276 ungetchr()
3277 {
3278     nextchr = curchr;
3279     curchr = prevchr;
3280     prevchr = prevprevchr;
3281     at_start = prev_at_start;
3282     prev_at_start = FALSE;
3283 
3284     /* Backup regparse, so that it's at the same position as before the
3285      * getchr(). */
3286     regparse -= prevchr_len;
3287 }
3288 
3289 /*
3290  * Get and return the value of the hex string at the current position.
3291  * Return -1 if there is no valid hex number.
3292  * The position is updated:
3293  *     blahblah\%x20asdf
3294  *	   before-^ ^-after
3295  * The parameter controls the maximum number of input characters. This will be
3296  * 2 when reading a \%x20 sequence and 4 when reading a \%u20AC sequence.
3297  */
3298     static int
3299 gethexchrs(maxinputlen)
3300     int		maxinputlen;
3301 {
3302     int		nr = 0;
3303     int		c;
3304     int		i;
3305 
3306     for (i = 0; i < maxinputlen; ++i)
3307     {
3308 	c = regparse[0];
3309 	if (!vim_isxdigit(c))
3310 	    break;
3311 	nr <<= 4;
3312 	nr |= hex2nr(c);
3313 	++regparse;
3314     }
3315 
3316     if (i == 0)
3317 	return -1;
3318     return nr;
3319 }
3320 
3321 /*
3322  * Get and return the value of the decimal string immediately after the
3323  * current position. Return -1 for invalid.  Consumes all digits.
3324  */
3325     static int
3326 getdecchrs()
3327 {
3328     int		nr = 0;
3329     int		c;
3330     int		i;
3331 
3332     for (i = 0; ; ++i)
3333     {
3334 	c = regparse[0];
3335 	if (c < '0' || c > '9')
3336 	    break;
3337 	nr *= 10;
3338 	nr += c - '0';
3339 	++regparse;
3340 	curchr = -1; /* no longer valid */
3341     }
3342 
3343     if (i == 0)
3344 	return -1;
3345     return nr;
3346 }
3347 
3348 /*
3349  * get and return the value of the octal string immediately after the current
3350  * position. Return -1 for invalid, or 0-255 for valid. Smart enough to handle
3351  * numbers > 377 correctly (for example, 400 is treated as 40) and doesn't
3352  * treat 8 or 9 as recognised characters. Position is updated:
3353  *     blahblah\%o210asdf
3354  *	   before-^  ^-after
3355  */
3356     static int
3357 getoctchrs()
3358 {
3359     int		nr = 0;
3360     int		c;
3361     int		i;
3362 
3363     for (i = 0; i < 3 && nr < 040; ++i)
3364     {
3365 	c = regparse[0];
3366 	if (c < '0' || c > '7')
3367 	    break;
3368 	nr <<= 3;
3369 	nr |= hex2nr(c);
3370 	++regparse;
3371     }
3372 
3373     if (i == 0)
3374 	return -1;
3375     return nr;
3376 }
3377 
3378 /*
3379  * Get a number after a backslash that is inside [].
3380  * When nothing is recognized return a backslash.
3381  */
3382     static int
3383 coll_get_char()
3384 {
3385     int	    nr = -1;
3386 
3387     switch (*regparse++)
3388     {
3389 	case 'd': nr = getdecchrs(); break;
3390 	case 'o': nr = getoctchrs(); break;
3391 	case 'x': nr = gethexchrs(2); break;
3392 	case 'u': nr = gethexchrs(4); break;
3393 	case 'U': nr = gethexchrs(8); break;
3394     }
3395     if (nr < 0)
3396     {
3397 	/* If getting the number fails be backwards compatible: the character
3398 	 * is a backslash. */
3399 	--regparse;
3400 	nr = '\\';
3401     }
3402     return nr;
3403 }
3404 
3405 /*
3406  * read_limits - Read two integers to be taken as a minimum and maximum.
3407  * If the first character is '-', then the range is reversed.
3408  * Should end with 'end'.  If minval is missing, zero is default, if maxval is
3409  * missing, a very big number is the default.
3410  */
3411     static int
3412 read_limits(minval, maxval)
3413     long	*minval;
3414     long	*maxval;
3415 {
3416     int		reverse = FALSE;
3417     char_u	*first_char;
3418     long	tmp;
3419 
3420     if (*regparse == '-')
3421     {
3422 	/* Starts with '-', so reverse the range later */
3423 	regparse++;
3424 	reverse = TRUE;
3425     }
3426     first_char = regparse;
3427     *minval = getdigits(&regparse);
3428     if (*regparse == ',')	    /* There is a comma */
3429     {
3430 	if (vim_isdigit(*++regparse))
3431 	    *maxval = getdigits(&regparse);
3432 	else
3433 	    *maxval = MAX_LIMIT;
3434     }
3435     else if (VIM_ISDIGIT(*first_char))
3436 	*maxval = *minval;	    /* It was \{n} or \{-n} */
3437     else
3438 	*maxval = MAX_LIMIT;	    /* It was \{} or \{-} */
3439     if (*regparse == '\\')
3440 	regparse++;	/* Allow either \{...} or \{...\} */
3441     if (*regparse != '}')
3442     {
3443 	sprintf((char *)IObuff, _("E554: Syntax error in %s{...}"),
3444 					  reg_magic == MAGIC_ALL ? "" : "\\");
3445 	EMSG_RET_FAIL(IObuff);
3446     }
3447 
3448     /*
3449      * Reverse the range if there was a '-', or make sure it is in the right
3450      * order otherwise.
3451      */
3452     if ((!reverse && *minval > *maxval) || (reverse && *minval < *maxval))
3453     {
3454 	tmp = *minval;
3455 	*minval = *maxval;
3456 	*maxval = tmp;
3457     }
3458     skipchr();		/* let's be friends with the lexer again */
3459     return OK;
3460 }
3461 
3462 /*
3463  * vim_regexec and friends
3464  */
3465 
3466 /*
3467  * Global work variables for vim_regexec().
3468  */
3469 
3470 /* The current match-position is remembered with these variables: */
3471 static linenr_T	reglnum;	/* line number, relative to first line */
3472 static char_u	*regline;	/* start of current line */
3473 static char_u	*reginput;	/* current input, points into "regline" */
3474 
3475 static int	need_clear_subexpr;	/* subexpressions still need to be
3476 					 * cleared */
3477 #ifdef FEAT_SYN_HL
3478 static int	need_clear_zsubexpr = FALSE;	/* extmatch subexpressions
3479 						 * still need to be cleared */
3480 #endif
3481 
3482 /*
3483  * Structure used to save the current input state, when it needs to be
3484  * restored after trying a match.  Used by reg_save() and reg_restore().
3485  * Also stores the length of "backpos".
3486  */
3487 typedef struct
3488 {
3489     union
3490     {
3491 	char_u	*ptr;	/* reginput pointer, for single-line regexp */
3492 	lpos_T	pos;	/* reginput pos, for multi-line regexp */
3493     } rs_u;
3494     int		rs_len;
3495 } regsave_T;
3496 
3497 /* struct to save start/end pointer/position in for \(\) */
3498 typedef struct
3499 {
3500     union
3501     {
3502 	char_u	*ptr;
3503 	lpos_T	pos;
3504     } se_u;
3505 } save_se_T;
3506 
3507 /* used for BEHIND and NOBEHIND matching */
3508 typedef struct regbehind_S
3509 {
3510     regsave_T	save_after;
3511     regsave_T	save_behind;
3512     int		save_need_clear_subexpr;
3513     save_se_T   save_start[NSUBEXP];
3514     save_se_T   save_end[NSUBEXP];
3515 } regbehind_T;
3516 
3517 static char_u	*reg_getline __ARGS((linenr_T lnum));
3518 static long	bt_regexec_both __ARGS((char_u *line, colnr_T col, proftime_T *tm));
3519 static long	regtry __ARGS((bt_regprog_T *prog, colnr_T col));
3520 static void	cleanup_subexpr __ARGS((void));
3521 #ifdef FEAT_SYN_HL
3522 static void	cleanup_zsubexpr __ARGS((void));
3523 #endif
3524 static void	save_subexpr __ARGS((regbehind_T *bp));
3525 static void	restore_subexpr __ARGS((regbehind_T *bp));
3526 static void	reg_nextline __ARGS((void));
3527 static void	reg_save __ARGS((regsave_T *save, garray_T *gap));
3528 static void	reg_restore __ARGS((regsave_T *save, garray_T *gap));
3529 static int	reg_save_equal __ARGS((regsave_T *save));
3530 static void	save_se_multi __ARGS((save_se_T *savep, lpos_T *posp));
3531 static void	save_se_one __ARGS((save_se_T *savep, char_u **pp));
3532 
3533 /* Save the sub-expressions before attempting a match. */
3534 #define save_se(savep, posp, pp) \
3535     REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
3536 
3537 /* After a failed match restore the sub-expressions. */
3538 #define restore_se(savep, posp, pp) { \
3539     if (REG_MULTI) \
3540 	*(posp) = (savep)->se_u.pos; \
3541     else \
3542 	*(pp) = (savep)->se_u.ptr; }
3543 
3544 static int	re_num_cmp __ARGS((long_u val, char_u *scan));
3545 static int	match_with_backref __ARGS((linenr_T start_lnum, colnr_T start_col, linenr_T end_lnum, colnr_T end_col, int *bytelen));
3546 static int	regmatch __ARGS((char_u *prog));
3547 static int	regrepeat __ARGS((char_u *p, long maxcount));
3548 
3549 #ifdef DEBUG
3550 int		regnarrate = 0;
3551 #endif
3552 
3553 /*
3554  * Internal copy of 'ignorecase'.  It is set at each call to vim_regexec().
3555  * Normally it gets the value of "rm_ic" or "rmm_ic", but when the pattern
3556  * contains '\c' or '\C' the value is overruled.
3557  */
3558 static int	ireg_ic;
3559 
3560 #ifdef FEAT_MBYTE
3561 /*
3562  * Similar to ireg_ic, but only for 'combining' characters.  Set with \Z flag
3563  * in the regexp.  Defaults to false, always.
3564  */
3565 static int	ireg_icombine;
3566 #endif
3567 
3568 /*
3569  * Copy of "rmm_maxcol": maximum column to search for a match.  Zero when
3570  * there is no maximum.
3571  */
3572 static colnr_T	ireg_maxcol;
3573 
3574 /*
3575  * Sometimes need to save a copy of a line.  Since alloc()/free() is very
3576  * slow, we keep one allocated piece of memory and only re-allocate it when
3577  * it's too small.  It's freed in bt_regexec_both() when finished.
3578  */
3579 static char_u	*reg_tofree = NULL;
3580 static unsigned	reg_tofreelen;
3581 
3582 /*
3583  * These variables are set when executing a regexp to speed up the execution.
3584  * Which ones are set depends on whether a single-line or multi-line match is
3585  * done:
3586  *			single-line		multi-line
3587  * reg_match		&regmatch_T		NULL
3588  * reg_mmatch		NULL			&regmmatch_T
3589  * reg_startp		reg_match->startp	<invalid>
3590  * reg_endp		reg_match->endp		<invalid>
3591  * reg_startpos		<invalid>		reg_mmatch->startpos
3592  * reg_endpos		<invalid>		reg_mmatch->endpos
3593  * reg_win		NULL			window in which to search
3594  * reg_buf		curbuf			buffer in which to search
3595  * reg_firstlnum	<invalid>		first line in which to search
3596  * reg_maxline		0			last line nr
3597  * reg_line_lbr		FALSE or TRUE		FALSE
3598  */
3599 static regmatch_T	*reg_match;
3600 static regmmatch_T	*reg_mmatch;
3601 static char_u		**reg_startp = NULL;
3602 static char_u		**reg_endp = NULL;
3603 static lpos_T		*reg_startpos = NULL;
3604 static lpos_T		*reg_endpos = NULL;
3605 static win_T		*reg_win;
3606 static buf_T		*reg_buf;
3607 static linenr_T		reg_firstlnum;
3608 static linenr_T		reg_maxline;
3609 static int		reg_line_lbr;	    /* "\n" in string is line break */
3610 
3611 /* Values for rs_state in regitem_T. */
3612 typedef enum regstate_E
3613 {
3614     RS_NOPEN = 0	/* NOPEN and NCLOSE */
3615     , RS_MOPEN		/* MOPEN + [0-9] */
3616     , RS_MCLOSE		/* MCLOSE + [0-9] */
3617 #ifdef FEAT_SYN_HL
3618     , RS_ZOPEN		/* ZOPEN + [0-9] */
3619     , RS_ZCLOSE		/* ZCLOSE + [0-9] */
3620 #endif
3621     , RS_BRANCH		/* BRANCH */
3622     , RS_BRCPLX_MORE	/* BRACE_COMPLEX and trying one more match */
3623     , RS_BRCPLX_LONG	/* BRACE_COMPLEX and trying longest match */
3624     , RS_BRCPLX_SHORT	/* BRACE_COMPLEX and trying shortest match */
3625     , RS_NOMATCH	/* NOMATCH */
3626     , RS_BEHIND1	/* BEHIND / NOBEHIND matching rest */
3627     , RS_BEHIND2	/* BEHIND / NOBEHIND matching behind part */
3628     , RS_STAR_LONG	/* STAR/PLUS/BRACE_SIMPLE longest match */
3629     , RS_STAR_SHORT	/* STAR/PLUS/BRACE_SIMPLE shortest match */
3630 } regstate_T;
3631 
3632 /*
3633  * When there are alternatives a regstate_T is put on the regstack to remember
3634  * what we are doing.
3635  * Before it may be another type of item, depending on rs_state, to remember
3636  * more things.
3637  */
3638 typedef struct regitem_S
3639 {
3640     regstate_T	rs_state;	/* what we are doing, one of RS_ above */
3641     char_u	*rs_scan;	/* current node in program */
3642     union
3643     {
3644 	save_se_T  sesave;
3645 	regsave_T  regsave;
3646     } rs_un;			/* room for saving reginput */
3647     short	rs_no;		/* submatch nr or BEHIND/NOBEHIND */
3648 } regitem_T;
3649 
3650 static regitem_T *regstack_push __ARGS((regstate_T state, char_u *scan));
3651 static void regstack_pop __ARGS((char_u **scan));
3652 
3653 /* used for STAR, PLUS and BRACE_SIMPLE matching */
3654 typedef struct regstar_S
3655 {
3656     int		nextb;		/* next byte */
3657     int		nextb_ic;	/* next byte reverse case */
3658     long	count;
3659     long	minval;
3660     long	maxval;
3661 } regstar_T;
3662 
3663 /* used to store input position when a BACK was encountered, so that we now if
3664  * we made any progress since the last time. */
3665 typedef struct backpos_S
3666 {
3667     char_u	*bp_scan;	/* "scan" where BACK was encountered */
3668     regsave_T	bp_pos;		/* last input position */
3669 } backpos_T;
3670 
3671 /*
3672  * "regstack" and "backpos" are used by regmatch().  They are kept over calls
3673  * to avoid invoking malloc() and free() often.
3674  * "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T
3675  * or regbehind_T.
3676  * "backpos_T" is a table with backpos_T for BACK
3677  */
3678 static garray_T	regstack = {0, 0, 0, 0, NULL};
3679 static garray_T	backpos = {0, 0, 0, 0, NULL};
3680 
3681 /*
3682  * Both for regstack and backpos tables we use the following strategy of
3683  * allocation (to reduce malloc/free calls):
3684  * - Initial size is fairly small.
3685  * - When needed, the tables are grown bigger (8 times at first, double after
3686  *   that).
3687  * - After executing the match we free the memory only if the array has grown.
3688  *   Thus the memory is kept allocated when it's at the initial size.
3689  * This makes it fast while not keeping a lot of memory allocated.
3690  * A three times speed increase was observed when using many simple patterns.
3691  */
3692 #define REGSTACK_INITIAL	2048
3693 #define BACKPOS_INITIAL		64
3694 
3695 #if defined(EXITFREE) || defined(PROTO)
3696     void
3697 free_regexp_stuff()
3698 {
3699     ga_clear(&regstack);
3700     ga_clear(&backpos);
3701     vim_free(reg_tofree);
3702     vim_free(reg_prev_sub);
3703 }
3704 #endif
3705 
3706 /*
3707  * Get pointer to the line "lnum", which is relative to "reg_firstlnum".
3708  */
3709     static char_u *
3710 reg_getline(lnum)
3711     linenr_T	lnum;
3712 {
3713     /* when looking behind for a match/no-match lnum is negative.  But we
3714      * can't go before line 1 */
3715     if (reg_firstlnum + lnum < 1)
3716 	return NULL;
3717     if (lnum > reg_maxline)
3718 	/* Must have matched the "\n" in the last line. */
3719 	return (char_u *)"";
3720     return ml_get_buf(reg_buf, reg_firstlnum + lnum, FALSE);
3721 }
3722 
3723 static regsave_T behind_pos;
3724 
3725 #ifdef FEAT_SYN_HL
3726 static char_u	*reg_startzp[NSUBEXP];	/* Workspace to mark beginning */
3727 static char_u	*reg_endzp[NSUBEXP];	/*   and end of \z(...\) matches */
3728 static lpos_T	reg_startzpos[NSUBEXP];	/* idem, beginning pos */
3729 static lpos_T	reg_endzpos[NSUBEXP];	/* idem, end pos */
3730 #endif
3731 
3732 /* TRUE if using multi-line regexp. */
3733 #define REG_MULTI	(reg_match == NULL)
3734 
3735 static int  bt_regexec_nl __ARGS((regmatch_T *rmp, char_u *line, colnr_T col, int line_lbr));
3736 
3737 
3738 /*
3739  * Match a regexp against a string.
3740  * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3741  * Uses curbuf for line count and 'iskeyword'.
3742  * if "line_lbr" is TRUE  consider a "\n" in "line" to be a line break.
3743  *
3744  * Returns 0 for failure, number of lines contained in the match otherwise.
3745  */
3746     static int
3747 bt_regexec_nl(rmp, line, col, line_lbr)
3748     regmatch_T	*rmp;
3749     char_u	*line;	/* string to match against */
3750     colnr_T	col;	/* column to start looking for match */
3751     int		line_lbr;
3752 {
3753     reg_match = rmp;
3754     reg_mmatch = NULL;
3755     reg_maxline = 0;
3756     reg_line_lbr = line_lbr;
3757     reg_buf = curbuf;
3758     reg_win = NULL;
3759     ireg_ic = rmp->rm_ic;
3760 #ifdef FEAT_MBYTE
3761     ireg_icombine = FALSE;
3762 #endif
3763     ireg_maxcol = 0;
3764 
3765     return bt_regexec_both(line, col, NULL);
3766 }
3767 
3768 static long bt_regexec_multi __ARGS((regmmatch_T *rmp, win_T *win, buf_T *buf, linenr_T lnum, colnr_T col, proftime_T *tm));
3769 
3770 /*
3771  * Match a regexp against multiple lines.
3772  * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
3773  * Uses curbuf for line count and 'iskeyword'.
3774  *
3775  * Return zero if there is no match.  Return number of lines contained in the
3776  * match otherwise.
3777  */
3778     static long
3779 bt_regexec_multi(rmp, win, buf, lnum, col, tm)
3780     regmmatch_T	*rmp;
3781     win_T	*win;		/* window in which to search or NULL */
3782     buf_T	*buf;		/* buffer in which to search */
3783     linenr_T	lnum;		/* nr of line to start looking for match */
3784     colnr_T	col;		/* column to start looking for match */
3785     proftime_T	*tm;		/* timeout limit or NULL */
3786 {
3787     reg_match = NULL;
3788     reg_mmatch = rmp;
3789     reg_buf = buf;
3790     reg_win = win;
3791     reg_firstlnum = lnum;
3792     reg_maxline = reg_buf->b_ml.ml_line_count - lnum;
3793     reg_line_lbr = FALSE;
3794     ireg_ic = rmp->rmm_ic;
3795 #ifdef FEAT_MBYTE
3796     ireg_icombine = FALSE;
3797 #endif
3798     ireg_maxcol = rmp->rmm_maxcol;
3799 
3800     return bt_regexec_both(NULL, col, tm);
3801 }
3802 
3803 /*
3804  * Match a regexp against a string ("line" points to the string) or multiple
3805  * lines ("line" is NULL, use reg_getline()).
3806  * Returns 0 for failure, number of lines contained in the match otherwise.
3807  */
3808     static long
3809 bt_regexec_both(line, col, tm)
3810     char_u	*line;
3811     colnr_T	col;		/* column to start looking for match */
3812     proftime_T	*tm UNUSED;	/* timeout limit or NULL */
3813 {
3814     bt_regprog_T    *prog;
3815     char_u	    *s;
3816     long	    retval = 0L;
3817 
3818     /* Create "regstack" and "backpos" if they are not allocated yet.
3819      * We allocate *_INITIAL amount of bytes first and then set the grow size
3820      * to much bigger value to avoid many malloc calls in case of deep regular
3821      * expressions.  */
3822     if (regstack.ga_data == NULL)
3823     {
3824 	/* Use an item size of 1 byte, since we push different things
3825 	 * onto the regstack. */
3826 	ga_init2(&regstack, 1, REGSTACK_INITIAL);
3827 	(void)ga_grow(&regstack, REGSTACK_INITIAL);
3828 	regstack.ga_growsize = REGSTACK_INITIAL * 8;
3829     }
3830 
3831     if (backpos.ga_data == NULL)
3832     {
3833 	ga_init2(&backpos, sizeof(backpos_T), BACKPOS_INITIAL);
3834 	(void)ga_grow(&backpos, BACKPOS_INITIAL);
3835 	backpos.ga_growsize = BACKPOS_INITIAL * 8;
3836     }
3837 
3838     if (REG_MULTI)
3839     {
3840 	prog = (bt_regprog_T *)reg_mmatch->regprog;
3841 	line = reg_getline((linenr_T)0);
3842 	reg_startpos = reg_mmatch->startpos;
3843 	reg_endpos = reg_mmatch->endpos;
3844     }
3845     else
3846     {
3847 	prog = (bt_regprog_T *)reg_match->regprog;
3848 	reg_startp = reg_match->startp;
3849 	reg_endp = reg_match->endp;
3850     }
3851 
3852     /* Be paranoid... */
3853     if (prog == NULL || line == NULL)
3854     {
3855 	EMSG(_(e_null));
3856 	goto theend;
3857     }
3858 
3859     /* Check validity of program. */
3860     if (prog_magic_wrong())
3861 	goto theend;
3862 
3863     /* If the start column is past the maximum column: no need to try. */
3864     if (ireg_maxcol > 0 && col >= ireg_maxcol)
3865 	goto theend;
3866 
3867     /* If pattern contains "\c" or "\C": overrule value of ireg_ic */
3868     if (prog->regflags & RF_ICASE)
3869 	ireg_ic = TRUE;
3870     else if (prog->regflags & RF_NOICASE)
3871 	ireg_ic = FALSE;
3872 
3873 #ifdef FEAT_MBYTE
3874     /* If pattern contains "\Z" overrule value of ireg_icombine */
3875     if (prog->regflags & RF_ICOMBINE)
3876 	ireg_icombine = TRUE;
3877 #endif
3878 
3879     /* If there is a "must appear" string, look for it. */
3880     if (prog->regmust != NULL)
3881     {
3882 	int c;
3883 
3884 #ifdef FEAT_MBYTE
3885 	if (has_mbyte)
3886 	    c = (*mb_ptr2char)(prog->regmust);
3887 	else
3888 #endif
3889 	    c = *prog->regmust;
3890 	s = line + col;
3891 
3892 	/*
3893 	 * This is used very often, esp. for ":global".  Use three versions of
3894 	 * the loop to avoid overhead of conditions.
3895 	 */
3896 	if (!ireg_ic
3897 #ifdef FEAT_MBYTE
3898 		&& !has_mbyte
3899 #endif
3900 		)
3901 	    while ((s = vim_strbyte(s, c)) != NULL)
3902 	    {
3903 		if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3904 		    break;		/* Found it. */
3905 		++s;
3906 	    }
3907 #ifdef FEAT_MBYTE
3908 	else if (!ireg_ic || (!enc_utf8 && mb_char2len(c) > 1))
3909 	    while ((s = vim_strchr(s, c)) != NULL)
3910 	    {
3911 		if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3912 		    break;		/* Found it. */
3913 		mb_ptr_adv(s);
3914 	    }
3915 #endif
3916 	else
3917 	    while ((s = cstrchr(s, c)) != NULL)
3918 	    {
3919 		if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
3920 		    break;		/* Found it. */
3921 		mb_ptr_adv(s);
3922 	    }
3923 	if (s == NULL)		/* Not present. */
3924 	    goto theend;
3925     }
3926 
3927     regline = line;
3928     reglnum = 0;
3929     reg_toolong = FALSE;
3930 
3931     /* Simplest case: Anchored match need be tried only once. */
3932     if (prog->reganch)
3933     {
3934 	int	c;
3935 
3936 #ifdef FEAT_MBYTE
3937 	if (has_mbyte)
3938 	    c = (*mb_ptr2char)(regline + col);
3939 	else
3940 #endif
3941 	    c = regline[col];
3942 	if (prog->regstart == NUL
3943 		|| prog->regstart == c
3944 		|| (ireg_ic && ((
3945 #ifdef FEAT_MBYTE
3946 			(enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
3947 			|| (c < 255 && prog->regstart < 255 &&
3948 #endif
3949 			    MB_TOLOWER(prog->regstart) == MB_TOLOWER(c)))))
3950 	    retval = regtry(prog, col);
3951 	else
3952 	    retval = 0;
3953     }
3954     else
3955     {
3956 #ifdef FEAT_RELTIME
3957 	int tm_count = 0;
3958 #endif
3959 	/* Messy cases:  unanchored match. */
3960 	while (!got_int)
3961 	{
3962 	    if (prog->regstart != NUL)
3963 	    {
3964 		/* Skip until the char we know it must start with.
3965 		 * Used often, do some work to avoid call overhead. */
3966 		if (!ireg_ic
3967 #ifdef FEAT_MBYTE
3968 			    && !has_mbyte
3969 #endif
3970 			    )
3971 		    s = vim_strbyte(regline + col, prog->regstart);
3972 		else
3973 		    s = cstrchr(regline + col, prog->regstart);
3974 		if (s == NULL)
3975 		{
3976 		    retval = 0;
3977 		    break;
3978 		}
3979 		col = (int)(s - regline);
3980 	    }
3981 
3982 	    /* Check for maximum column to try. */
3983 	    if (ireg_maxcol > 0 && col >= ireg_maxcol)
3984 	    {
3985 		retval = 0;
3986 		break;
3987 	    }
3988 
3989 	    retval = regtry(prog, col);
3990 	    if (retval > 0)
3991 		break;
3992 
3993 	    /* if not currently on the first line, get it again */
3994 	    if (reglnum != 0)
3995 	    {
3996 		reglnum = 0;
3997 		regline = reg_getline((linenr_T)0);
3998 	    }
3999 	    if (regline[col] == NUL)
4000 		break;
4001 #ifdef FEAT_MBYTE
4002 	    if (has_mbyte)
4003 		col += (*mb_ptr2len)(regline + col);
4004 	    else
4005 #endif
4006 		++col;
4007 #ifdef FEAT_RELTIME
4008 	    /* Check for timeout once in a twenty times to avoid overhead. */
4009 	    if (tm != NULL && ++tm_count == 20)
4010 	    {
4011 		tm_count = 0;
4012 		if (profile_passed_limit(tm))
4013 		    break;
4014 	    }
4015 #endif
4016 	}
4017     }
4018 
4019 theend:
4020     /* Free "reg_tofree" when it's a bit big.
4021      * Free regstack and backpos if they are bigger than their initial size. */
4022     if (reg_tofreelen > 400)
4023     {
4024 	vim_free(reg_tofree);
4025 	reg_tofree = NULL;
4026     }
4027     if (regstack.ga_maxlen > REGSTACK_INITIAL)
4028 	ga_clear(&regstack);
4029     if (backpos.ga_maxlen > BACKPOS_INITIAL)
4030 	ga_clear(&backpos);
4031 
4032     return retval;
4033 }
4034 
4035 #ifdef FEAT_SYN_HL
4036 static reg_extmatch_T *make_extmatch __ARGS((void));
4037 
4038 /*
4039  * Create a new extmatch and mark it as referenced once.
4040  */
4041     static reg_extmatch_T *
4042 make_extmatch()
4043 {
4044     reg_extmatch_T	*em;
4045 
4046     em = (reg_extmatch_T *)alloc_clear((unsigned)sizeof(reg_extmatch_T));
4047     if (em != NULL)
4048 	em->refcnt = 1;
4049     return em;
4050 }
4051 
4052 /*
4053  * Add a reference to an extmatch.
4054  */
4055     reg_extmatch_T *
4056 ref_extmatch(em)
4057     reg_extmatch_T	*em;
4058 {
4059     if (em != NULL)
4060 	em->refcnt++;
4061     return em;
4062 }
4063 
4064 /*
4065  * Remove a reference to an extmatch.  If there are no references left, free
4066  * the info.
4067  */
4068     void
4069 unref_extmatch(em)
4070     reg_extmatch_T	*em;
4071 {
4072     int i;
4073 
4074     if (em != NULL && --em->refcnt <= 0)
4075     {
4076 	for (i = 0; i < NSUBEXP; ++i)
4077 	    vim_free(em->matches[i]);
4078 	vim_free(em);
4079     }
4080 }
4081 #endif
4082 
4083 /*
4084  * regtry - try match of "prog" with at regline["col"].
4085  * Returns 0 for failure, number of lines contained in the match otherwise.
4086  */
4087     static long
4088 regtry(prog, col)
4089     bt_regprog_T    *prog;
4090     colnr_T	col;
4091 {
4092     reginput = regline + col;
4093     need_clear_subexpr = TRUE;
4094 #ifdef FEAT_SYN_HL
4095     /* Clear the external match subpointers if necessary. */
4096     if (prog->reghasz == REX_SET)
4097 	need_clear_zsubexpr = TRUE;
4098 #endif
4099 
4100     if (regmatch(prog->program + 1) == 0)
4101 	return 0;
4102 
4103     cleanup_subexpr();
4104     if (REG_MULTI)
4105     {
4106 	if (reg_startpos[0].lnum < 0)
4107 	{
4108 	    reg_startpos[0].lnum = 0;
4109 	    reg_startpos[0].col = col;
4110 	}
4111 	if (reg_endpos[0].lnum < 0)
4112 	{
4113 	    reg_endpos[0].lnum = reglnum;
4114 	    reg_endpos[0].col = (int)(reginput - regline);
4115 	}
4116 	else
4117 	    /* Use line number of "\ze". */
4118 	    reglnum = reg_endpos[0].lnum;
4119     }
4120     else
4121     {
4122 	if (reg_startp[0] == NULL)
4123 	    reg_startp[0] = regline + col;
4124 	if (reg_endp[0] == NULL)
4125 	    reg_endp[0] = reginput;
4126     }
4127 #ifdef FEAT_SYN_HL
4128     /* Package any found \z(...\) matches for export. Default is none. */
4129     unref_extmatch(re_extmatch_out);
4130     re_extmatch_out = NULL;
4131 
4132     if (prog->reghasz == REX_SET)
4133     {
4134 	int		i;
4135 
4136 	cleanup_zsubexpr();
4137 	re_extmatch_out = make_extmatch();
4138 	for (i = 0; i < NSUBEXP; i++)
4139 	{
4140 	    if (REG_MULTI)
4141 	    {
4142 		/* Only accept single line matches. */
4143 		if (reg_startzpos[i].lnum >= 0
4144 			&& reg_endzpos[i].lnum == reg_startzpos[i].lnum
4145 			&& reg_endzpos[i].col >= reg_startzpos[i].col)
4146 		    re_extmatch_out->matches[i] =
4147 			vim_strnsave(reg_getline(reg_startzpos[i].lnum)
4148 						       + reg_startzpos[i].col,
4149 				   reg_endzpos[i].col - reg_startzpos[i].col);
4150 	    }
4151 	    else
4152 	    {
4153 		if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
4154 		    re_extmatch_out->matches[i] =
4155 			    vim_strnsave(reg_startzp[i],
4156 					(int)(reg_endzp[i] - reg_startzp[i]));
4157 	    }
4158 	}
4159     }
4160 #endif
4161     return 1 + reglnum;
4162 }
4163 
4164 #ifdef FEAT_MBYTE
4165 static int reg_prev_class __ARGS((void));
4166 
4167 /*
4168  * Get class of previous character.
4169  */
4170     static int
4171 reg_prev_class()
4172 {
4173     if (reginput > regline)
4174 	return mb_get_class_buf(reginput - 1
4175 			    - (*mb_head_off)(regline, reginput - 1), reg_buf);
4176     return -1;
4177 }
4178 #endif
4179 
4180 static int reg_match_visual __ARGS((void));
4181 
4182 /*
4183  * Return TRUE if the current reginput position matches the Visual area.
4184  */
4185     static int
4186 reg_match_visual()
4187 {
4188     pos_T	top, bot;
4189     linenr_T    lnum;
4190     colnr_T	col;
4191     win_T	*wp = reg_win == NULL ? curwin : reg_win;
4192     int		mode;
4193     colnr_T	start, end;
4194     colnr_T	start2, end2;
4195     colnr_T	cols;
4196 
4197     /* Check if the buffer is the current buffer. */
4198     if (reg_buf != curbuf || VIsual.lnum == 0)
4199 	return FALSE;
4200 
4201     if (VIsual_active)
4202     {
4203 	if (lt(VIsual, wp->w_cursor))
4204 	{
4205 	    top = VIsual;
4206 	    bot = wp->w_cursor;
4207 	}
4208 	else
4209 	{
4210 	    top = wp->w_cursor;
4211 	    bot = VIsual;
4212 	}
4213 	mode = VIsual_mode;
4214     }
4215     else
4216     {
4217 	if (lt(curbuf->b_visual.vi_start, curbuf->b_visual.vi_end))
4218 	{
4219 	    top = curbuf->b_visual.vi_start;
4220 	    bot = curbuf->b_visual.vi_end;
4221 	}
4222 	else
4223 	{
4224 	    top = curbuf->b_visual.vi_end;
4225 	    bot = curbuf->b_visual.vi_start;
4226 	}
4227 	mode = curbuf->b_visual.vi_mode;
4228     }
4229     lnum = reglnum + reg_firstlnum;
4230     if (lnum < top.lnum || lnum > bot.lnum)
4231 	return FALSE;
4232 
4233     if (mode == 'v')
4234     {
4235 	col = (colnr_T)(reginput - regline);
4236 	if ((lnum == top.lnum && col < top.col)
4237 		|| (lnum == bot.lnum && col >= bot.col + (*p_sel != 'e')))
4238 	    return FALSE;
4239     }
4240     else if (mode == Ctrl_V)
4241     {
4242 	getvvcol(wp, &top, &start, NULL, &end);
4243 	getvvcol(wp, &bot, &start2, NULL, &end2);
4244 	if (start2 < start)
4245 	    start = start2;
4246 	if (end2 > end)
4247 	    end = end2;
4248 	if (top.col == MAXCOL || bot.col == MAXCOL)
4249 	    end = MAXCOL;
4250 	cols = win_linetabsize(wp, regline, (colnr_T)(reginput - regline));
4251 	if (cols < start || cols > end - (*p_sel == 'e'))
4252 	    return FALSE;
4253     }
4254     return TRUE;
4255 }
4256 
4257 #define ADVANCE_REGINPUT() mb_ptr_adv(reginput)
4258 
4259 /*
4260  * The arguments from BRACE_LIMITS are stored here.  They are actually local
4261  * to regmatch(), but they are here to reduce the amount of stack space used
4262  * (it can be called recursively many times).
4263  */
4264 static long	bl_minval;
4265 static long	bl_maxval;
4266 
4267 /*
4268  * regmatch - main matching routine
4269  *
4270  * Conceptually the strategy is simple: Check to see whether the current node
4271  * matches, push an item onto the regstack and loop to see whether the rest
4272  * matches, and then act accordingly.  In practice we make some effort to
4273  * avoid using the regstack, in particular by going through "ordinary" nodes
4274  * (that don't need to know whether the rest of the match failed) by a nested
4275  * loop.
4276  *
4277  * Returns TRUE when there is a match.  Leaves reginput and reglnum just after
4278  * the last matched character.
4279  * Returns FALSE when there is no match.  Leaves reginput and reglnum in an
4280  * undefined state!
4281  */
4282     static int
4283 regmatch(scan)
4284     char_u	*scan;		/* Current node. */
4285 {
4286   char_u	*next;		/* Next node. */
4287   int		op;
4288   int		c;
4289   regitem_T	*rp;
4290   int		no;
4291   int		status;		/* one of the RA_ values: */
4292 #define RA_FAIL		1	/* something failed, abort */
4293 #define RA_CONT		2	/* continue in inner loop */
4294 #define RA_BREAK	3	/* break inner loop */
4295 #define RA_MATCH	4	/* successful match */
4296 #define RA_NOMATCH	5	/* didn't match */
4297 
4298   /* Make "regstack" and "backpos" empty.  They are allocated and freed in
4299    * bt_regexec_both() to reduce malloc()/free() calls. */
4300   regstack.ga_len = 0;
4301   backpos.ga_len = 0;
4302 
4303   /*
4304    * Repeat until "regstack" is empty.
4305    */
4306   for (;;)
4307   {
4308     /* Some patterns may take a long time to match, e.g., "\([a-z]\+\)\+Q".
4309      * Allow interrupting them with CTRL-C. */
4310     fast_breakcheck();
4311 
4312 #ifdef DEBUG
4313     if (scan != NULL && regnarrate)
4314     {
4315 	mch_errmsg((char *)regprop(scan));
4316 	mch_errmsg("(\n");
4317     }
4318 #endif
4319 
4320     /*
4321      * Repeat for items that can be matched sequentially, without using the
4322      * regstack.
4323      */
4324     for (;;)
4325     {
4326 	if (got_int || scan == NULL)
4327 	{
4328 	    status = RA_FAIL;
4329 	    break;
4330 	}
4331 	status = RA_CONT;
4332 
4333 #ifdef DEBUG
4334 	if (regnarrate)
4335 	{
4336 	    mch_errmsg((char *)regprop(scan));
4337 	    mch_errmsg("...\n");
4338 # ifdef FEAT_SYN_HL
4339 	    if (re_extmatch_in != NULL)
4340 	    {
4341 		int i;
4342 
4343 		mch_errmsg(_("External submatches:\n"));
4344 		for (i = 0; i < NSUBEXP; i++)
4345 		{
4346 		    mch_errmsg("    \"");
4347 		    if (re_extmatch_in->matches[i] != NULL)
4348 			mch_errmsg((char *)re_extmatch_in->matches[i]);
4349 		    mch_errmsg("\"\n");
4350 		}
4351 	    }
4352 # endif
4353 	}
4354 #endif
4355 	next = regnext(scan);
4356 
4357 	op = OP(scan);
4358 	/* Check for character class with NL added. */
4359 	if (!reg_line_lbr && WITH_NL(op) && REG_MULTI
4360 				&& *reginput == NUL && reglnum <= reg_maxline)
4361 	{
4362 	    reg_nextline();
4363 	}
4364 	else if (reg_line_lbr && WITH_NL(op) && *reginput == '\n')
4365 	{
4366 	    ADVANCE_REGINPUT();
4367 	}
4368 	else
4369 	{
4370 	  if (WITH_NL(op))
4371 	      op -= ADD_NL;
4372 #ifdef FEAT_MBYTE
4373 	  if (has_mbyte)
4374 	      c = (*mb_ptr2char)(reginput);
4375 	  else
4376 #endif
4377 	      c = *reginput;
4378 	  switch (op)
4379 	  {
4380 	  case BOL:
4381 	    if (reginput != regline)
4382 		status = RA_NOMATCH;
4383 	    break;
4384 
4385 	  case EOL:
4386 	    if (c != NUL)
4387 		status = RA_NOMATCH;
4388 	    break;
4389 
4390 	  case RE_BOF:
4391 	    /* We're not at the beginning of the file when below the first
4392 	     * line where we started, not at the start of the line or we
4393 	     * didn't start at the first line of the buffer. */
4394 	    if (reglnum != 0 || reginput != regline
4395 					  || (REG_MULTI && reg_firstlnum > 1))
4396 		status = RA_NOMATCH;
4397 	    break;
4398 
4399 	  case RE_EOF:
4400 	    if (reglnum != reg_maxline || c != NUL)
4401 		status = RA_NOMATCH;
4402 	    break;
4403 
4404 	  case CURSOR:
4405 	    /* Check if the buffer is in a window and compare the
4406 	     * reg_win->w_cursor position to the match position. */
4407 	    if (reg_win == NULL
4408 		    || (reglnum + reg_firstlnum != reg_win->w_cursor.lnum)
4409 		    || ((colnr_T)(reginput - regline) != reg_win->w_cursor.col))
4410 		status = RA_NOMATCH;
4411 	    break;
4412 
4413 	  case RE_MARK:
4414 	    /* Compare the mark position to the match position. */
4415 	    {
4416 		int	mark = OPERAND(scan)[0];
4417 		int	cmp = OPERAND(scan)[1];
4418 		pos_T	*pos;
4419 
4420 		pos = getmark_buf(reg_buf, mark, FALSE);
4421 		if (pos == NULL		     /* mark doesn't exist */
4422 			|| pos->lnum <= 0    /* mark isn't set in reg_buf */
4423 			|| (pos->lnum == reglnum + reg_firstlnum
4424 				? (pos->col == (colnr_T)(reginput - regline)
4425 				    ? (cmp == '<' || cmp == '>')
4426 				    : (pos->col < (colnr_T)(reginput - regline)
4427 					? cmp != '>'
4428 					: cmp != '<'))
4429 				: (pos->lnum < reglnum + reg_firstlnum
4430 				    ? cmp != '>'
4431 				    : cmp != '<')))
4432 		    status = RA_NOMATCH;
4433 	    }
4434 	    break;
4435 
4436 	  case RE_VISUAL:
4437 	    if (!reg_match_visual())
4438 		status = RA_NOMATCH;
4439 	    break;
4440 
4441 	  case RE_LNUM:
4442 	    if (!REG_MULTI || !re_num_cmp((long_u)(reglnum + reg_firstlnum),
4443 									scan))
4444 		status = RA_NOMATCH;
4445 	    break;
4446 
4447 	  case RE_COL:
4448 	    if (!re_num_cmp((long_u)(reginput - regline) + 1, scan))
4449 		status = RA_NOMATCH;
4450 	    break;
4451 
4452 	  case RE_VCOL:
4453 	    if (!re_num_cmp((long_u)win_linetabsize(
4454 			    reg_win == NULL ? curwin : reg_win,
4455 			    regline, (colnr_T)(reginput - regline)) + 1, scan))
4456 		status = RA_NOMATCH;
4457 	    break;
4458 
4459 	  case BOW:	/* \<word; reginput points to w */
4460 	    if (c == NUL)	/* Can't match at end of line */
4461 		status = RA_NOMATCH;
4462 #ifdef FEAT_MBYTE
4463 	    else if (has_mbyte)
4464 	    {
4465 		int this_class;
4466 
4467 		/* Get class of current and previous char (if it exists). */
4468 		this_class = mb_get_class_buf(reginput, reg_buf);
4469 		if (this_class <= 1)
4470 		    status = RA_NOMATCH;  /* not on a word at all */
4471 		else if (reg_prev_class() == this_class)
4472 		    status = RA_NOMATCH;  /* previous char is in same word */
4473 	    }
4474 #endif
4475 	    else
4476 	    {
4477 		if (!vim_iswordc_buf(c, reg_buf) || (reginput > regline
4478 				   && vim_iswordc_buf(reginput[-1], reg_buf)))
4479 		    status = RA_NOMATCH;
4480 	    }
4481 	    break;
4482 
4483 	  case EOW:	/* word\>; reginput points after d */
4484 	    if (reginput == regline)    /* Can't match at start of line */
4485 		status = RA_NOMATCH;
4486 #ifdef FEAT_MBYTE
4487 	    else if (has_mbyte)
4488 	    {
4489 		int this_class, prev_class;
4490 
4491 		/* Get class of current and previous char (if it exists). */
4492 		this_class = mb_get_class_buf(reginput, reg_buf);
4493 		prev_class = reg_prev_class();
4494 		if (this_class == prev_class
4495 			|| prev_class == 0 || prev_class == 1)
4496 		    status = RA_NOMATCH;
4497 	    }
4498 #endif
4499 	    else
4500 	    {
4501 		if (!vim_iswordc_buf(reginput[-1], reg_buf)
4502 			|| (reginput[0] != NUL && vim_iswordc_buf(c, reg_buf)))
4503 		    status = RA_NOMATCH;
4504 	    }
4505 	    break; /* Matched with EOW */
4506 
4507 	  case ANY:
4508 	    /* ANY does not match new lines. */
4509 	    if (c == NUL)
4510 		status = RA_NOMATCH;
4511 	    else
4512 		ADVANCE_REGINPUT();
4513 	    break;
4514 
4515 	  case IDENT:
4516 	    if (!vim_isIDc(c))
4517 		status = RA_NOMATCH;
4518 	    else
4519 		ADVANCE_REGINPUT();
4520 	    break;
4521 
4522 	  case SIDENT:
4523 	    if (VIM_ISDIGIT(*reginput) || !vim_isIDc(c))
4524 		status = RA_NOMATCH;
4525 	    else
4526 		ADVANCE_REGINPUT();
4527 	    break;
4528 
4529 	  case KWORD:
4530 	    if (!vim_iswordp_buf(reginput, reg_buf))
4531 		status = RA_NOMATCH;
4532 	    else
4533 		ADVANCE_REGINPUT();
4534 	    break;
4535 
4536 	  case SKWORD:
4537 	    if (VIM_ISDIGIT(*reginput) || !vim_iswordp_buf(reginput, reg_buf))
4538 		status = RA_NOMATCH;
4539 	    else
4540 		ADVANCE_REGINPUT();
4541 	    break;
4542 
4543 	  case FNAME:
4544 	    if (!vim_isfilec(c))
4545 		status = RA_NOMATCH;
4546 	    else
4547 		ADVANCE_REGINPUT();
4548 	    break;
4549 
4550 	  case SFNAME:
4551 	    if (VIM_ISDIGIT(*reginput) || !vim_isfilec(c))
4552 		status = RA_NOMATCH;
4553 	    else
4554 		ADVANCE_REGINPUT();
4555 	    break;
4556 
4557 	  case PRINT:
4558 	    if (!vim_isprintc(PTR2CHAR(reginput)))
4559 		status = RA_NOMATCH;
4560 	    else
4561 		ADVANCE_REGINPUT();
4562 	    break;
4563 
4564 	  case SPRINT:
4565 	    if (VIM_ISDIGIT(*reginput) || !vim_isprintc(PTR2CHAR(reginput)))
4566 		status = RA_NOMATCH;
4567 	    else
4568 		ADVANCE_REGINPUT();
4569 	    break;
4570 
4571 	  case WHITE:
4572 	    if (!vim_iswhite(c))
4573 		status = RA_NOMATCH;
4574 	    else
4575 		ADVANCE_REGINPUT();
4576 	    break;
4577 
4578 	  case NWHITE:
4579 	    if (c == NUL || vim_iswhite(c))
4580 		status = RA_NOMATCH;
4581 	    else
4582 		ADVANCE_REGINPUT();
4583 	    break;
4584 
4585 	  case DIGIT:
4586 	    if (!ri_digit(c))
4587 		status = RA_NOMATCH;
4588 	    else
4589 		ADVANCE_REGINPUT();
4590 	    break;
4591 
4592 	  case NDIGIT:
4593 	    if (c == NUL || ri_digit(c))
4594 		status = RA_NOMATCH;
4595 	    else
4596 		ADVANCE_REGINPUT();
4597 	    break;
4598 
4599 	  case HEX:
4600 	    if (!ri_hex(c))
4601 		status = RA_NOMATCH;
4602 	    else
4603 		ADVANCE_REGINPUT();
4604 	    break;
4605 
4606 	  case NHEX:
4607 	    if (c == NUL || ri_hex(c))
4608 		status = RA_NOMATCH;
4609 	    else
4610 		ADVANCE_REGINPUT();
4611 	    break;
4612 
4613 	  case OCTAL:
4614 	    if (!ri_octal(c))
4615 		status = RA_NOMATCH;
4616 	    else
4617 		ADVANCE_REGINPUT();
4618 	    break;
4619 
4620 	  case NOCTAL:
4621 	    if (c == NUL || ri_octal(c))
4622 		status = RA_NOMATCH;
4623 	    else
4624 		ADVANCE_REGINPUT();
4625 	    break;
4626 
4627 	  case WORD:
4628 	    if (!ri_word(c))
4629 		status = RA_NOMATCH;
4630 	    else
4631 		ADVANCE_REGINPUT();
4632 	    break;
4633 
4634 	  case NWORD:
4635 	    if (c == NUL || ri_word(c))
4636 		status = RA_NOMATCH;
4637 	    else
4638 		ADVANCE_REGINPUT();
4639 	    break;
4640 
4641 	  case HEAD:
4642 	    if (!ri_head(c))
4643 		status = RA_NOMATCH;
4644 	    else
4645 		ADVANCE_REGINPUT();
4646 	    break;
4647 
4648 	  case NHEAD:
4649 	    if (c == NUL || ri_head(c))
4650 		status = RA_NOMATCH;
4651 	    else
4652 		ADVANCE_REGINPUT();
4653 	    break;
4654 
4655 	  case ALPHA:
4656 	    if (!ri_alpha(c))
4657 		status = RA_NOMATCH;
4658 	    else
4659 		ADVANCE_REGINPUT();
4660 	    break;
4661 
4662 	  case NALPHA:
4663 	    if (c == NUL || ri_alpha(c))
4664 		status = RA_NOMATCH;
4665 	    else
4666 		ADVANCE_REGINPUT();
4667 	    break;
4668 
4669 	  case LOWER:
4670 	    if (!ri_lower(c))
4671 		status = RA_NOMATCH;
4672 	    else
4673 		ADVANCE_REGINPUT();
4674 	    break;
4675 
4676 	  case NLOWER:
4677 	    if (c == NUL || ri_lower(c))
4678 		status = RA_NOMATCH;
4679 	    else
4680 		ADVANCE_REGINPUT();
4681 	    break;
4682 
4683 	  case UPPER:
4684 	    if (!ri_upper(c))
4685 		status = RA_NOMATCH;
4686 	    else
4687 		ADVANCE_REGINPUT();
4688 	    break;
4689 
4690 	  case NUPPER:
4691 	    if (c == NUL || ri_upper(c))
4692 		status = RA_NOMATCH;
4693 	    else
4694 		ADVANCE_REGINPUT();
4695 	    break;
4696 
4697 	  case EXACTLY:
4698 	    {
4699 		int	len;
4700 		char_u	*opnd;
4701 
4702 		opnd = OPERAND(scan);
4703 		/* Inline the first byte, for speed. */
4704 		if (*opnd != *reginput
4705 			&& (!ireg_ic || (
4706 #ifdef FEAT_MBYTE
4707 			    !enc_utf8 &&
4708 #endif
4709 			    MB_TOLOWER(*opnd) != MB_TOLOWER(*reginput))))
4710 		    status = RA_NOMATCH;
4711 		else if (*opnd == NUL)
4712 		{
4713 		    /* match empty string always works; happens when "~" is
4714 		     * empty. */
4715 		}
4716 		else
4717 		{
4718 		    if (opnd[1] == NUL
4719 #ifdef FEAT_MBYTE
4720 			    && !(enc_utf8 && ireg_ic)
4721 #endif
4722 			)
4723 		    {
4724 			len = 1;	/* matched a single byte above */
4725 		    }
4726 		    else
4727 		    {
4728 			/* Need to match first byte again for multi-byte. */
4729 			len = (int)STRLEN(opnd);
4730 			if (cstrncmp(opnd, reginput, &len) != 0)
4731 			    status = RA_NOMATCH;
4732 		    }
4733 #ifdef FEAT_MBYTE
4734 		    /* Check for following composing character, unless %C
4735 		     * follows (skips over all composing chars). */
4736 		    if (status != RA_NOMATCH
4737 			    && enc_utf8
4738 			    && UTF_COMPOSINGLIKE(reginput, reginput + len)
4739 			    && !ireg_icombine
4740 			    && OP(next) != RE_COMPOSING)
4741 		    {
4742 			/* raaron: This code makes a composing character get
4743 			 * ignored, which is the correct behavior (sometimes)
4744 			 * for voweled Hebrew texts. */
4745 			status = RA_NOMATCH;
4746 		    }
4747 #endif
4748 		    if (status != RA_NOMATCH)
4749 			reginput += len;
4750 		}
4751 	    }
4752 	    break;
4753 
4754 	  case ANYOF:
4755 	  case ANYBUT:
4756 	    if (c == NUL)
4757 		status = RA_NOMATCH;
4758 	    else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
4759 		status = RA_NOMATCH;
4760 	    else
4761 		ADVANCE_REGINPUT();
4762 	    break;
4763 
4764 #ifdef FEAT_MBYTE
4765 	  case MULTIBYTECODE:
4766 	    if (has_mbyte)
4767 	    {
4768 		int	i, len;
4769 		char_u	*opnd;
4770 		int	opndc = 0, inpc;
4771 
4772 		opnd = OPERAND(scan);
4773 		/* Safety check (just in case 'encoding' was changed since
4774 		 * compiling the program). */
4775 		if ((len = (*mb_ptr2len)(opnd)) < 2)
4776 		{
4777 		    status = RA_NOMATCH;
4778 		    break;
4779 		}
4780 		if (enc_utf8)
4781 		    opndc = mb_ptr2char(opnd);
4782 		if (enc_utf8 && utf_iscomposing(opndc))
4783 		{
4784 		    /* When only a composing char is given match at any
4785 		     * position where that composing char appears. */
4786 		    status = RA_NOMATCH;
4787 		    for (i = 0; reginput[i] != NUL;
4788 						i += utf_ptr2len(reginput + i))
4789 		    {
4790 			inpc = mb_ptr2char(reginput + i);
4791 			if (!utf_iscomposing(inpc))
4792 			{
4793 			    if (i > 0)
4794 				break;
4795 			}
4796 			else if (opndc == inpc)
4797 			{
4798 			    /* Include all following composing chars. */
4799 			    len = i + mb_ptr2len(reginput + i);
4800 			    status = RA_MATCH;
4801 			    break;
4802 			}
4803 		    }
4804 		}
4805 		else
4806 		    for (i = 0; i < len; ++i)
4807 			if (opnd[i] != reginput[i])
4808 			{
4809 			    status = RA_NOMATCH;
4810 			    break;
4811 			}
4812 		reginput += len;
4813 	    }
4814 	    else
4815 		status = RA_NOMATCH;
4816 	    break;
4817 #endif
4818 	  case RE_COMPOSING:
4819 #ifdef FEAT_MBYTE
4820 	    if (enc_utf8)
4821 	    {
4822 		/* Skip composing characters. */
4823 		while (utf_iscomposing(utf_ptr2char(reginput)))
4824 		    mb_cptr_adv(reginput);
4825 	    }
4826 #endif
4827 	    break;
4828 
4829 	  case NOTHING:
4830 	    break;
4831 
4832 	  case BACK:
4833 	    {
4834 		int		i;
4835 		backpos_T	*bp;
4836 
4837 		/*
4838 		 * When we run into BACK we need to check if we don't keep
4839 		 * looping without matching any input.  The second and later
4840 		 * times a BACK is encountered it fails if the input is still
4841 		 * at the same position as the previous time.
4842 		 * The positions are stored in "backpos" and found by the
4843 		 * current value of "scan", the position in the RE program.
4844 		 */
4845 		bp = (backpos_T *)backpos.ga_data;
4846 		for (i = 0; i < backpos.ga_len; ++i)
4847 		    if (bp[i].bp_scan == scan)
4848 			break;
4849 		if (i == backpos.ga_len)
4850 		{
4851 		    /* First time at this BACK, make room to store the pos. */
4852 		    if (ga_grow(&backpos, 1) == FAIL)
4853 			status = RA_FAIL;
4854 		    else
4855 		    {
4856 			/* get "ga_data" again, it may have changed */
4857 			bp = (backpos_T *)backpos.ga_data;
4858 			bp[i].bp_scan = scan;
4859 			++backpos.ga_len;
4860 		    }
4861 		}
4862 		else if (reg_save_equal(&bp[i].bp_pos))
4863 		    /* Still at same position as last time, fail. */
4864 		    status = RA_NOMATCH;
4865 
4866 		if (status != RA_FAIL && status != RA_NOMATCH)
4867 		    reg_save(&bp[i].bp_pos, &backpos);
4868 	    }
4869 	    break;
4870 
4871 	  case MOPEN + 0:   /* Match start: \zs */
4872 	  case MOPEN + 1:   /* \( */
4873 	  case MOPEN + 2:
4874 	  case MOPEN + 3:
4875 	  case MOPEN + 4:
4876 	  case MOPEN + 5:
4877 	  case MOPEN + 6:
4878 	  case MOPEN + 7:
4879 	  case MOPEN + 8:
4880 	  case MOPEN + 9:
4881 	    {
4882 		no = op - MOPEN;
4883 		cleanup_subexpr();
4884 		rp = regstack_push(RS_MOPEN, scan);
4885 		if (rp == NULL)
4886 		    status = RA_FAIL;
4887 		else
4888 		{
4889 		    rp->rs_no = no;
4890 		    save_se(&rp->rs_un.sesave, &reg_startpos[no],
4891 							     &reg_startp[no]);
4892 		    /* We simply continue and handle the result when done. */
4893 		}
4894 	    }
4895 	    break;
4896 
4897 	  case NOPEN:	    /* \%( */
4898 	  case NCLOSE:	    /* \) after \%( */
4899 		if (regstack_push(RS_NOPEN, scan) == NULL)
4900 		    status = RA_FAIL;
4901 		/* We simply continue and handle the result when done. */
4902 		break;
4903 
4904 #ifdef FEAT_SYN_HL
4905 	  case ZOPEN + 1:
4906 	  case ZOPEN + 2:
4907 	  case ZOPEN + 3:
4908 	  case ZOPEN + 4:
4909 	  case ZOPEN + 5:
4910 	  case ZOPEN + 6:
4911 	  case ZOPEN + 7:
4912 	  case ZOPEN + 8:
4913 	  case ZOPEN + 9:
4914 	    {
4915 		no = op - ZOPEN;
4916 		cleanup_zsubexpr();
4917 		rp = regstack_push(RS_ZOPEN, scan);
4918 		if (rp == NULL)
4919 		    status = RA_FAIL;
4920 		else
4921 		{
4922 		    rp->rs_no = no;
4923 		    save_se(&rp->rs_un.sesave, &reg_startzpos[no],
4924 							     &reg_startzp[no]);
4925 		    /* We simply continue and handle the result when done. */
4926 		}
4927 	    }
4928 	    break;
4929 #endif
4930 
4931 	  case MCLOSE + 0:  /* Match end: \ze */
4932 	  case MCLOSE + 1:  /* \) */
4933 	  case MCLOSE + 2:
4934 	  case MCLOSE + 3:
4935 	  case MCLOSE + 4:
4936 	  case MCLOSE + 5:
4937 	  case MCLOSE + 6:
4938 	  case MCLOSE + 7:
4939 	  case MCLOSE + 8:
4940 	  case MCLOSE + 9:
4941 	    {
4942 		no = op - MCLOSE;
4943 		cleanup_subexpr();
4944 		rp = regstack_push(RS_MCLOSE, scan);
4945 		if (rp == NULL)
4946 		    status = RA_FAIL;
4947 		else
4948 		{
4949 		    rp->rs_no = no;
4950 		    save_se(&rp->rs_un.sesave, &reg_endpos[no], &reg_endp[no]);
4951 		    /* We simply continue and handle the result when done. */
4952 		}
4953 	    }
4954 	    break;
4955 
4956 #ifdef FEAT_SYN_HL
4957 	  case ZCLOSE + 1:  /* \) after \z( */
4958 	  case ZCLOSE + 2:
4959 	  case ZCLOSE + 3:
4960 	  case ZCLOSE + 4:
4961 	  case ZCLOSE + 5:
4962 	  case ZCLOSE + 6:
4963 	  case ZCLOSE + 7:
4964 	  case ZCLOSE + 8:
4965 	  case ZCLOSE + 9:
4966 	    {
4967 		no = op - ZCLOSE;
4968 		cleanup_zsubexpr();
4969 		rp = regstack_push(RS_ZCLOSE, scan);
4970 		if (rp == NULL)
4971 		    status = RA_FAIL;
4972 		else
4973 		{
4974 		    rp->rs_no = no;
4975 		    save_se(&rp->rs_un.sesave, &reg_endzpos[no],
4976 							      &reg_endzp[no]);
4977 		    /* We simply continue and handle the result when done. */
4978 		}
4979 	    }
4980 	    break;
4981 #endif
4982 
4983 	  case BACKREF + 1:
4984 	  case BACKREF + 2:
4985 	  case BACKREF + 3:
4986 	  case BACKREF + 4:
4987 	  case BACKREF + 5:
4988 	  case BACKREF + 6:
4989 	  case BACKREF + 7:
4990 	  case BACKREF + 8:
4991 	  case BACKREF + 9:
4992 	    {
4993 		int		len;
4994 
4995 		no = op - BACKREF;
4996 		cleanup_subexpr();
4997 		if (!REG_MULTI)		/* Single-line regexp */
4998 		{
4999 		    if (reg_startp[no] == NULL || reg_endp[no] == NULL)
5000 		    {
5001 			/* Backref was not set: Match an empty string. */
5002 			len = 0;
5003 		    }
5004 		    else
5005 		    {
5006 			/* Compare current input with back-ref in the same
5007 			 * line. */
5008 			len = (int)(reg_endp[no] - reg_startp[no]);
5009 			if (cstrncmp(reg_startp[no], reginput, &len) != 0)
5010 			    status = RA_NOMATCH;
5011 		    }
5012 		}
5013 		else				/* Multi-line regexp */
5014 		{
5015 		    if (reg_startpos[no].lnum < 0 || reg_endpos[no].lnum < 0)
5016 		    {
5017 			/* Backref was not set: Match an empty string. */
5018 			len = 0;
5019 		    }
5020 		    else
5021 		    {
5022 			if (reg_startpos[no].lnum == reglnum
5023 				&& reg_endpos[no].lnum == reglnum)
5024 			{
5025 			    /* Compare back-ref within the current line. */
5026 			    len = reg_endpos[no].col - reg_startpos[no].col;
5027 			    if (cstrncmp(regline + reg_startpos[no].col,
5028 							  reginput, &len) != 0)
5029 				status = RA_NOMATCH;
5030 			}
5031 			else
5032 			{
5033 			    /* Messy situation: Need to compare between two
5034 			     * lines. */
5035 			    int r = match_with_backref(
5036 					    reg_startpos[no].lnum,
5037 					    reg_startpos[no].col,
5038 					    reg_endpos[no].lnum,
5039 					    reg_endpos[no].col,
5040 					    &len);
5041 
5042 			    if (r != RA_MATCH)
5043 				status = r;
5044 			}
5045 		    }
5046 		}
5047 
5048 		/* Matched the backref, skip over it. */
5049 		reginput += len;
5050 	    }
5051 	    break;
5052 
5053 #ifdef FEAT_SYN_HL
5054 	  case ZREF + 1:
5055 	  case ZREF + 2:
5056 	  case ZREF + 3:
5057 	  case ZREF + 4:
5058 	  case ZREF + 5:
5059 	  case ZREF + 6:
5060 	  case ZREF + 7:
5061 	  case ZREF + 8:
5062 	  case ZREF + 9:
5063 	    {
5064 		int	len;
5065 
5066 		cleanup_zsubexpr();
5067 		no = op - ZREF;
5068 		if (re_extmatch_in != NULL
5069 			&& re_extmatch_in->matches[no] != NULL)
5070 		{
5071 		    len = (int)STRLEN(re_extmatch_in->matches[no]);
5072 		    if (cstrncmp(re_extmatch_in->matches[no],
5073 							  reginput, &len) != 0)
5074 			status = RA_NOMATCH;
5075 		    else
5076 			reginput += len;
5077 		}
5078 		else
5079 		{
5080 		    /* Backref was not set: Match an empty string. */
5081 		}
5082 	    }
5083 	    break;
5084 #endif
5085 
5086 	  case BRANCH:
5087 	    {
5088 		if (OP(next) != BRANCH) /* No choice. */
5089 		    next = OPERAND(scan);	/* Avoid recursion. */
5090 		else
5091 		{
5092 		    rp = regstack_push(RS_BRANCH, scan);
5093 		    if (rp == NULL)
5094 			status = RA_FAIL;
5095 		    else
5096 			status = RA_BREAK;	/* rest is below */
5097 		}
5098 	    }
5099 	    break;
5100 
5101 	  case BRACE_LIMITS:
5102 	    {
5103 		if (OP(next) == BRACE_SIMPLE)
5104 		{
5105 		    bl_minval = OPERAND_MIN(scan);
5106 		    bl_maxval = OPERAND_MAX(scan);
5107 		}
5108 		else if (OP(next) >= BRACE_COMPLEX
5109 			&& OP(next) < BRACE_COMPLEX + 10)
5110 		{
5111 		    no = OP(next) - BRACE_COMPLEX;
5112 		    brace_min[no] = OPERAND_MIN(scan);
5113 		    brace_max[no] = OPERAND_MAX(scan);
5114 		    brace_count[no] = 0;
5115 		}
5116 		else
5117 		{
5118 		    EMSG(_(e_internal));	    /* Shouldn't happen */
5119 		    status = RA_FAIL;
5120 		}
5121 	    }
5122 	    break;
5123 
5124 	  case BRACE_COMPLEX + 0:
5125 	  case BRACE_COMPLEX + 1:
5126 	  case BRACE_COMPLEX + 2:
5127 	  case BRACE_COMPLEX + 3:
5128 	  case BRACE_COMPLEX + 4:
5129 	  case BRACE_COMPLEX + 5:
5130 	  case BRACE_COMPLEX + 6:
5131 	  case BRACE_COMPLEX + 7:
5132 	  case BRACE_COMPLEX + 8:
5133 	  case BRACE_COMPLEX + 9:
5134 	    {
5135 		no = op - BRACE_COMPLEX;
5136 		++brace_count[no];
5137 
5138 		/* If not matched enough times yet, try one more */
5139 		if (brace_count[no] <= (brace_min[no] <= brace_max[no]
5140 					     ? brace_min[no] : brace_max[no]))
5141 		{
5142 		    rp = regstack_push(RS_BRCPLX_MORE, scan);
5143 		    if (rp == NULL)
5144 			status = RA_FAIL;
5145 		    else
5146 		    {
5147 			rp->rs_no = no;
5148 			reg_save(&rp->rs_un.regsave, &backpos);
5149 			next = OPERAND(scan);
5150 			/* We continue and handle the result when done. */
5151 		    }
5152 		    break;
5153 		}
5154 
5155 		/* If matched enough times, may try matching some more */
5156 		if (brace_min[no] <= brace_max[no])
5157 		{
5158 		    /* Range is the normal way around, use longest match */
5159 		    if (brace_count[no] <= brace_max[no])
5160 		    {
5161 			rp = regstack_push(RS_BRCPLX_LONG, scan);
5162 			if (rp == NULL)
5163 			    status = RA_FAIL;
5164 			else
5165 			{
5166 			    rp->rs_no = no;
5167 			    reg_save(&rp->rs_un.regsave, &backpos);
5168 			    next = OPERAND(scan);
5169 			    /* We continue and handle the result when done. */
5170 			}
5171 		    }
5172 		}
5173 		else
5174 		{
5175 		    /* Range is backwards, use shortest match first */
5176 		    if (brace_count[no] <= brace_min[no])
5177 		    {
5178 			rp = regstack_push(RS_BRCPLX_SHORT, scan);
5179 			if (rp == NULL)
5180 			    status = RA_FAIL;
5181 			else
5182 			{
5183 			    reg_save(&rp->rs_un.regsave, &backpos);
5184 			    /* We continue and handle the result when done. */
5185 			}
5186 		    }
5187 		}
5188 	    }
5189 	    break;
5190 
5191 	  case BRACE_SIMPLE:
5192 	  case STAR:
5193 	  case PLUS:
5194 	    {
5195 		regstar_T	rst;
5196 
5197 		/*
5198 		 * Lookahead to avoid useless match attempts when we know
5199 		 * what character comes next.
5200 		 */
5201 		if (OP(next) == EXACTLY)
5202 		{
5203 		    rst.nextb = *OPERAND(next);
5204 		    if (ireg_ic)
5205 		    {
5206 			if (MB_ISUPPER(rst.nextb))
5207 			    rst.nextb_ic = MB_TOLOWER(rst.nextb);
5208 			else
5209 			    rst.nextb_ic = MB_TOUPPER(rst.nextb);
5210 		    }
5211 		    else
5212 			rst.nextb_ic = rst.nextb;
5213 		}
5214 		else
5215 		{
5216 		    rst.nextb = NUL;
5217 		    rst.nextb_ic = NUL;
5218 		}
5219 		if (op != BRACE_SIMPLE)
5220 		{
5221 		    rst.minval = (op == STAR) ? 0 : 1;
5222 		    rst.maxval = MAX_LIMIT;
5223 		}
5224 		else
5225 		{
5226 		    rst.minval = bl_minval;
5227 		    rst.maxval = bl_maxval;
5228 		}
5229 
5230 		/*
5231 		 * When maxval > minval, try matching as much as possible, up
5232 		 * to maxval.  When maxval < minval, try matching at least the
5233 		 * minimal number (since the range is backwards, that's also
5234 		 * maxval!).
5235 		 */
5236 		rst.count = regrepeat(OPERAND(scan), rst.maxval);
5237 		if (got_int)
5238 		{
5239 		    status = RA_FAIL;
5240 		    break;
5241 		}
5242 		if (rst.minval <= rst.maxval
5243 			  ? rst.count >= rst.minval : rst.count >= rst.maxval)
5244 		{
5245 		    /* It could match.  Prepare for trying to match what
5246 		     * follows.  The code is below.  Parameters are stored in
5247 		     * a regstar_T on the regstack. */
5248 		    if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
5249 		    {
5250 			EMSG(_(e_maxmempat));
5251 			status = RA_FAIL;
5252 		    }
5253 		    else if (ga_grow(&regstack, sizeof(regstar_T)) == FAIL)
5254 			status = RA_FAIL;
5255 		    else
5256 		    {
5257 			regstack.ga_len += sizeof(regstar_T);
5258 			rp = regstack_push(rst.minval <= rst.maxval
5259 					? RS_STAR_LONG : RS_STAR_SHORT, scan);
5260 			if (rp == NULL)
5261 			    status = RA_FAIL;
5262 			else
5263 			{
5264 			    *(((regstar_T *)rp) - 1) = rst;
5265 			    status = RA_BREAK;	    /* skip the restore bits */
5266 			}
5267 		    }
5268 		}
5269 		else
5270 		    status = RA_NOMATCH;
5271 
5272 	    }
5273 	    break;
5274 
5275 	  case NOMATCH:
5276 	  case MATCH:
5277 	  case SUBPAT:
5278 	    rp = regstack_push(RS_NOMATCH, scan);
5279 	    if (rp == NULL)
5280 		status = RA_FAIL;
5281 	    else
5282 	    {
5283 		rp->rs_no = op;
5284 		reg_save(&rp->rs_un.regsave, &backpos);
5285 		next = OPERAND(scan);
5286 		/* We continue and handle the result when done. */
5287 	    }
5288 	    break;
5289 
5290 	  case BEHIND:
5291 	  case NOBEHIND:
5292 	    /* Need a bit of room to store extra positions. */
5293 	    if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
5294 	    {
5295 		EMSG(_(e_maxmempat));
5296 		status = RA_FAIL;
5297 	    }
5298 	    else if (ga_grow(&regstack, sizeof(regbehind_T)) == FAIL)
5299 		status = RA_FAIL;
5300 	    else
5301 	    {
5302 		regstack.ga_len += sizeof(regbehind_T);
5303 		rp = regstack_push(RS_BEHIND1, scan);
5304 		if (rp == NULL)
5305 		    status = RA_FAIL;
5306 		else
5307 		{
5308 		    /* Need to save the subexpr to be able to restore them
5309 		     * when there is a match but we don't use it. */
5310 		    save_subexpr(((regbehind_T *)rp) - 1);
5311 
5312 		    rp->rs_no = op;
5313 		    reg_save(&rp->rs_un.regsave, &backpos);
5314 		    /* First try if what follows matches.  If it does then we
5315 		     * check the behind match by looping. */
5316 		}
5317 	    }
5318 	    break;
5319 
5320 	  case BHPOS:
5321 	    if (REG_MULTI)
5322 	    {
5323 		if (behind_pos.rs_u.pos.col != (colnr_T)(reginput - regline)
5324 			|| behind_pos.rs_u.pos.lnum != reglnum)
5325 		    status = RA_NOMATCH;
5326 	    }
5327 	    else if (behind_pos.rs_u.ptr != reginput)
5328 		status = RA_NOMATCH;
5329 	    break;
5330 
5331 	  case NEWL:
5332 	    if ((c != NUL || !REG_MULTI || reglnum > reg_maxline
5333 			     || reg_line_lbr) && (c != '\n' || !reg_line_lbr))
5334 		status = RA_NOMATCH;
5335 	    else if (reg_line_lbr)
5336 		ADVANCE_REGINPUT();
5337 	    else
5338 		reg_nextline();
5339 	    break;
5340 
5341 	  case END:
5342 	    status = RA_MATCH;	/* Success! */
5343 	    break;
5344 
5345 	  default:
5346 	    EMSG(_(e_re_corr));
5347 #ifdef DEBUG
5348 	    printf("Illegal op code %d\n", op);
5349 #endif
5350 	    status = RA_FAIL;
5351 	    break;
5352 	  }
5353 	}
5354 
5355 	/* If we can't continue sequentially, break the inner loop. */
5356 	if (status != RA_CONT)
5357 	    break;
5358 
5359 	/* Continue in inner loop, advance to next item. */
5360 	scan = next;
5361 
5362     } /* end of inner loop */
5363 
5364     /*
5365      * If there is something on the regstack execute the code for the state.
5366      * If the state is popped then loop and use the older state.
5367      */
5368     while (regstack.ga_len > 0 && status != RA_FAIL)
5369     {
5370 	rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
5371 	switch (rp->rs_state)
5372 	{
5373 	  case RS_NOPEN:
5374 	    /* Result is passed on as-is, simply pop the state. */
5375 	    regstack_pop(&scan);
5376 	    break;
5377 
5378 	  case RS_MOPEN:
5379 	    /* Pop the state.  Restore pointers when there is no match. */
5380 	    if (status == RA_NOMATCH)
5381 		restore_se(&rp->rs_un.sesave, &reg_startpos[rp->rs_no],
5382 						  &reg_startp[rp->rs_no]);
5383 	    regstack_pop(&scan);
5384 	    break;
5385 
5386 #ifdef FEAT_SYN_HL
5387 	  case RS_ZOPEN:
5388 	    /* Pop the state.  Restore pointers when there is no match. */
5389 	    if (status == RA_NOMATCH)
5390 		restore_se(&rp->rs_un.sesave, &reg_startzpos[rp->rs_no],
5391 						 &reg_startzp[rp->rs_no]);
5392 	    regstack_pop(&scan);
5393 	    break;
5394 #endif
5395 
5396 	  case RS_MCLOSE:
5397 	    /* Pop the state.  Restore pointers when there is no match. */
5398 	    if (status == RA_NOMATCH)
5399 		restore_se(&rp->rs_un.sesave, &reg_endpos[rp->rs_no],
5400 						    &reg_endp[rp->rs_no]);
5401 	    regstack_pop(&scan);
5402 	    break;
5403 
5404 #ifdef FEAT_SYN_HL
5405 	  case RS_ZCLOSE:
5406 	    /* Pop the state.  Restore pointers when there is no match. */
5407 	    if (status == RA_NOMATCH)
5408 		restore_se(&rp->rs_un.sesave, &reg_endzpos[rp->rs_no],
5409 						   &reg_endzp[rp->rs_no]);
5410 	    regstack_pop(&scan);
5411 	    break;
5412 #endif
5413 
5414 	  case RS_BRANCH:
5415 	    if (status == RA_MATCH)
5416 		/* this branch matched, use it */
5417 		regstack_pop(&scan);
5418 	    else
5419 	    {
5420 		if (status != RA_BREAK)
5421 		{
5422 		    /* After a non-matching branch: try next one. */
5423 		    reg_restore(&rp->rs_un.regsave, &backpos);
5424 		    scan = rp->rs_scan;
5425 		}
5426 		if (scan == NULL || OP(scan) != BRANCH)
5427 		{
5428 		    /* no more branches, didn't find a match */
5429 		    status = RA_NOMATCH;
5430 		    regstack_pop(&scan);
5431 		}
5432 		else
5433 		{
5434 		    /* Prepare to try a branch. */
5435 		    rp->rs_scan = regnext(scan);
5436 		    reg_save(&rp->rs_un.regsave, &backpos);
5437 		    scan = OPERAND(scan);
5438 		}
5439 	    }
5440 	    break;
5441 
5442 	  case RS_BRCPLX_MORE:
5443 	    /* Pop the state.  Restore pointers when there is no match. */
5444 	    if (status == RA_NOMATCH)
5445 	    {
5446 		reg_restore(&rp->rs_un.regsave, &backpos);
5447 		--brace_count[rp->rs_no];	/* decrement match count */
5448 	    }
5449 	    regstack_pop(&scan);
5450 	    break;
5451 
5452 	  case RS_BRCPLX_LONG:
5453 	    /* Pop the state.  Restore pointers when there is no match. */
5454 	    if (status == RA_NOMATCH)
5455 	    {
5456 		/* There was no match, but we did find enough matches. */
5457 		reg_restore(&rp->rs_un.regsave, &backpos);
5458 		--brace_count[rp->rs_no];
5459 		/* continue with the items after "\{}" */
5460 		status = RA_CONT;
5461 	    }
5462 	    regstack_pop(&scan);
5463 	    if (status == RA_CONT)
5464 		scan = regnext(scan);
5465 	    break;
5466 
5467 	  case RS_BRCPLX_SHORT:
5468 	    /* Pop the state.  Restore pointers when there is no match. */
5469 	    if (status == RA_NOMATCH)
5470 		/* There was no match, try to match one more item. */
5471 		reg_restore(&rp->rs_un.regsave, &backpos);
5472 	    regstack_pop(&scan);
5473 	    if (status == RA_NOMATCH)
5474 	    {
5475 		scan = OPERAND(scan);
5476 		status = RA_CONT;
5477 	    }
5478 	    break;
5479 
5480 	  case RS_NOMATCH:
5481 	    /* Pop the state.  If the operand matches for NOMATCH or
5482 	     * doesn't match for MATCH/SUBPAT, we fail.  Otherwise backup,
5483 	     * except for SUBPAT, and continue with the next item. */
5484 	    if (status == (rp->rs_no == NOMATCH ? RA_MATCH : RA_NOMATCH))
5485 		status = RA_NOMATCH;
5486 	    else
5487 	    {
5488 		status = RA_CONT;
5489 		if (rp->rs_no != SUBPAT)	/* zero-width */
5490 		    reg_restore(&rp->rs_un.regsave, &backpos);
5491 	    }
5492 	    regstack_pop(&scan);
5493 	    if (status == RA_CONT)
5494 		scan = regnext(scan);
5495 	    break;
5496 
5497 	  case RS_BEHIND1:
5498 	    if (status == RA_NOMATCH)
5499 	    {
5500 		regstack_pop(&scan);
5501 		regstack.ga_len -= sizeof(regbehind_T);
5502 	    }
5503 	    else
5504 	    {
5505 		/* The stuff after BEHIND/NOBEHIND matches.  Now try if
5506 		 * the behind part does (not) match before the current
5507 		 * position in the input.  This must be done at every
5508 		 * position in the input and checking if the match ends at
5509 		 * the current position. */
5510 
5511 		/* save the position after the found match for next */
5512 		reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos);
5513 
5514 		/* Start looking for a match with operand at the current
5515 		 * position.  Go back one character until we find the
5516 		 * result, hitting the start of the line or the previous
5517 		 * line (for multi-line matching).
5518 		 * Set behind_pos to where the match should end, BHPOS
5519 		 * will match it.  Save the current value. */
5520 		(((regbehind_T *)rp) - 1)->save_behind = behind_pos;
5521 		behind_pos = rp->rs_un.regsave;
5522 
5523 		rp->rs_state = RS_BEHIND2;
5524 
5525 		reg_restore(&rp->rs_un.regsave, &backpos);
5526 		scan = OPERAND(rp->rs_scan) + 4;
5527 	    }
5528 	    break;
5529 
5530 	  case RS_BEHIND2:
5531 	    /*
5532 	     * Looping for BEHIND / NOBEHIND match.
5533 	     */
5534 	    if (status == RA_MATCH && reg_save_equal(&behind_pos))
5535 	    {
5536 		/* found a match that ends where "next" started */
5537 		behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5538 		if (rp->rs_no == BEHIND)
5539 		    reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5540 								    &backpos);
5541 		else
5542 		{
5543 		    /* But we didn't want a match.  Need to restore the
5544 		     * subexpr, because what follows matched, so they have
5545 		     * been set. */
5546 		    status = RA_NOMATCH;
5547 		    restore_subexpr(((regbehind_T *)rp) - 1);
5548 		}
5549 		regstack_pop(&scan);
5550 		regstack.ga_len -= sizeof(regbehind_T);
5551 	    }
5552 	    else
5553 	    {
5554 		long limit;
5555 
5556 		/* No match or a match that doesn't end where we want it: Go
5557 		 * back one character.  May go to previous line once. */
5558 		no = OK;
5559 		limit = OPERAND_MIN(rp->rs_scan);
5560 		if (REG_MULTI)
5561 		{
5562 		    if (limit > 0
5563 			    && ((rp->rs_un.regsave.rs_u.pos.lnum
5564 						    < behind_pos.rs_u.pos.lnum
5565 				    ? (colnr_T)STRLEN(regline)
5566 				    : behind_pos.rs_u.pos.col)
5567 				- rp->rs_un.regsave.rs_u.pos.col >= limit))
5568 			no = FAIL;
5569 		    else if (rp->rs_un.regsave.rs_u.pos.col == 0)
5570 		    {
5571 			if (rp->rs_un.regsave.rs_u.pos.lnum
5572 					< behind_pos.rs_u.pos.lnum
5573 				|| reg_getline(
5574 					--rp->rs_un.regsave.rs_u.pos.lnum)
5575 								  == NULL)
5576 			    no = FAIL;
5577 			else
5578 			{
5579 			    reg_restore(&rp->rs_un.regsave, &backpos);
5580 			    rp->rs_un.regsave.rs_u.pos.col =
5581 						 (colnr_T)STRLEN(regline);
5582 			}
5583 		    }
5584 		    else
5585 		    {
5586 #ifdef FEAT_MBYTE
5587 			if (has_mbyte)
5588 			    rp->rs_un.regsave.rs_u.pos.col -=
5589 				(*mb_head_off)(regline, regline
5590 				    + rp->rs_un.regsave.rs_u.pos.col - 1) + 1;
5591 			else
5592 #endif
5593 			    --rp->rs_un.regsave.rs_u.pos.col;
5594 		    }
5595 		}
5596 		else
5597 		{
5598 		    if (rp->rs_un.regsave.rs_u.ptr == regline)
5599 			no = FAIL;
5600 		    else
5601 		    {
5602 			mb_ptr_back(regline, rp->rs_un.regsave.rs_u.ptr);
5603 			if (limit > 0 && (long)(behind_pos.rs_u.ptr
5604 				     - rp->rs_un.regsave.rs_u.ptr) > limit)
5605 			    no = FAIL;
5606 		    }
5607 		}
5608 		if (no == OK)
5609 		{
5610 		    /* Advanced, prepare for finding match again. */
5611 		    reg_restore(&rp->rs_un.regsave, &backpos);
5612 		    scan = OPERAND(rp->rs_scan) + 4;
5613 		    if (status == RA_MATCH)
5614 		    {
5615 			/* We did match, so subexpr may have been changed,
5616 			 * need to restore them for the next try. */
5617 			status = RA_NOMATCH;
5618 			restore_subexpr(((regbehind_T *)rp) - 1);
5619 		    }
5620 		}
5621 		else
5622 		{
5623 		    /* Can't advance.  For NOBEHIND that's a match. */
5624 		    behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
5625 		    if (rp->rs_no == NOBEHIND)
5626 		    {
5627 			reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
5628 								    &backpos);
5629 			status = RA_MATCH;
5630 		    }
5631 		    else
5632 		    {
5633 			/* We do want a proper match.  Need to restore the
5634 			 * subexpr if we had a match, because they may have
5635 			 * been set. */
5636 			if (status == RA_MATCH)
5637 			{
5638 			    status = RA_NOMATCH;
5639 			    restore_subexpr(((regbehind_T *)rp) - 1);
5640 			}
5641 		    }
5642 		    regstack_pop(&scan);
5643 		    regstack.ga_len -= sizeof(regbehind_T);
5644 		}
5645 	    }
5646 	    break;
5647 
5648 	  case RS_STAR_LONG:
5649 	  case RS_STAR_SHORT:
5650 	    {
5651 		regstar_T	    *rst = ((regstar_T *)rp) - 1;
5652 
5653 		if (status == RA_MATCH)
5654 		{
5655 		    regstack_pop(&scan);
5656 		    regstack.ga_len -= sizeof(regstar_T);
5657 		    break;
5658 		}
5659 
5660 		/* Tried once already, restore input pointers. */
5661 		if (status != RA_BREAK)
5662 		    reg_restore(&rp->rs_un.regsave, &backpos);
5663 
5664 		/* Repeat until we found a position where it could match. */
5665 		for (;;)
5666 		{
5667 		    if (status != RA_BREAK)
5668 		    {
5669 			/* Tried first position already, advance. */
5670 			if (rp->rs_state == RS_STAR_LONG)
5671 			{
5672 			    /* Trying for longest match, but couldn't or
5673 			     * didn't match -- back up one char. */
5674 			    if (--rst->count < rst->minval)
5675 				break;
5676 			    if (reginput == regline)
5677 			    {
5678 				/* backup to last char of previous line */
5679 				--reglnum;
5680 				regline = reg_getline(reglnum);
5681 				/* Just in case regrepeat() didn't count
5682 				 * right. */
5683 				if (regline == NULL)
5684 				    break;
5685 				reginput = regline + STRLEN(regline);
5686 				fast_breakcheck();
5687 			    }
5688 			    else
5689 				mb_ptr_back(regline, reginput);
5690 			}
5691 			else
5692 			{
5693 			    /* Range is backwards, use shortest match first.
5694 			     * Careful: maxval and minval are exchanged!
5695 			     * Couldn't or didn't match: try advancing one
5696 			     * char. */
5697 			    if (rst->count == rst->minval
5698 				  || regrepeat(OPERAND(rp->rs_scan), 1L) == 0)
5699 				break;
5700 			    ++rst->count;
5701 			}
5702 			if (got_int)
5703 			    break;
5704 		    }
5705 		    else
5706 			status = RA_NOMATCH;
5707 
5708 		    /* If it could match, try it. */
5709 		    if (rst->nextb == NUL || *reginput == rst->nextb
5710 					     || *reginput == rst->nextb_ic)
5711 		    {
5712 			reg_save(&rp->rs_un.regsave, &backpos);
5713 			scan = regnext(rp->rs_scan);
5714 			status = RA_CONT;
5715 			break;
5716 		    }
5717 		}
5718 		if (status != RA_CONT)
5719 		{
5720 		    /* Failed. */
5721 		    regstack_pop(&scan);
5722 		    regstack.ga_len -= sizeof(regstar_T);
5723 		    status = RA_NOMATCH;
5724 		}
5725 	    }
5726 	    break;
5727 	}
5728 
5729 	/* If we want to continue the inner loop or didn't pop a state
5730 	 * continue matching loop */
5731 	if (status == RA_CONT || rp == (regitem_T *)
5732 			     ((char *)regstack.ga_data + regstack.ga_len) - 1)
5733 	    break;
5734     }
5735 
5736     /* May need to continue with the inner loop, starting at "scan". */
5737     if (status == RA_CONT)
5738 	continue;
5739 
5740     /*
5741      * If the regstack is empty or something failed we are done.
5742      */
5743     if (regstack.ga_len == 0 || status == RA_FAIL)
5744     {
5745 	if (scan == NULL)
5746 	{
5747 	    /*
5748 	     * We get here only if there's trouble -- normally "case END" is
5749 	     * the terminating point.
5750 	     */
5751 	    EMSG(_(e_re_corr));
5752 #ifdef DEBUG
5753 	    printf("Premature EOL\n");
5754 #endif
5755 	}
5756 	if (status == RA_FAIL)
5757 	    got_int = TRUE;
5758 	return (status == RA_MATCH);
5759     }
5760 
5761   } /* End of loop until the regstack is empty. */
5762 
5763   /* NOTREACHED */
5764 }
5765 
5766 /*
5767  * Push an item onto the regstack.
5768  * Returns pointer to new item.  Returns NULL when out of memory.
5769  */
5770     static regitem_T *
5771 regstack_push(state, scan)
5772     regstate_T	state;
5773     char_u	*scan;
5774 {
5775     regitem_T	*rp;
5776 
5777     if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
5778     {
5779 	EMSG(_(e_maxmempat));
5780 	return NULL;
5781     }
5782     if (ga_grow(&regstack, sizeof(regitem_T)) == FAIL)
5783 	return NULL;
5784 
5785     rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len);
5786     rp->rs_state = state;
5787     rp->rs_scan = scan;
5788 
5789     regstack.ga_len += sizeof(regitem_T);
5790     return rp;
5791 }
5792 
5793 /*
5794  * Pop an item from the regstack.
5795  */
5796     static void
5797 regstack_pop(scan)
5798     char_u	**scan;
5799 {
5800     regitem_T	*rp;
5801 
5802     rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
5803     *scan = rp->rs_scan;
5804 
5805     regstack.ga_len -= sizeof(regitem_T);
5806 }
5807 
5808 /*
5809  * regrepeat - repeatedly match something simple, return how many.
5810  * Advances reginput (and reglnum) to just after the matched chars.
5811  */
5812     static int
5813 regrepeat(p, maxcount)
5814     char_u	*p;
5815     long	maxcount;   /* maximum number of matches allowed */
5816 {
5817     long	count = 0;
5818     char_u	*scan;
5819     char_u	*opnd;
5820     int		mask;
5821     int		testval = 0;
5822 
5823     scan = reginput;	    /* Make local copy of reginput for speed. */
5824     opnd = OPERAND(p);
5825     switch (OP(p))
5826     {
5827       case ANY:
5828       case ANY + ADD_NL:
5829 	while (count < maxcount)
5830 	{
5831 	    /* Matching anything means we continue until end-of-line (or
5832 	     * end-of-file for ANY + ADD_NL), only limited by maxcount. */
5833 	    while (*scan != NUL && count < maxcount)
5834 	    {
5835 		++count;
5836 		mb_ptr_adv(scan);
5837 	    }
5838 	    if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5839 					 || reg_line_lbr || count == maxcount)
5840 		break;
5841 	    ++count;		/* count the line-break */
5842 	    reg_nextline();
5843 	    scan = reginput;
5844 	    if (got_int)
5845 		break;
5846 	}
5847 	break;
5848 
5849       case IDENT:
5850       case IDENT + ADD_NL:
5851 	testval = TRUE;
5852 	/*FALLTHROUGH*/
5853       case SIDENT:
5854       case SIDENT + ADD_NL:
5855 	while (count < maxcount)
5856 	{
5857 	    if (vim_isIDc(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan)))
5858 	    {
5859 		mb_ptr_adv(scan);
5860 	    }
5861 	    else if (*scan == NUL)
5862 	    {
5863 		if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5864 							      || reg_line_lbr)
5865 		    break;
5866 		reg_nextline();
5867 		scan = reginput;
5868 		if (got_int)
5869 		    break;
5870 	    }
5871 	    else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5872 		++scan;
5873 	    else
5874 		break;
5875 	    ++count;
5876 	}
5877 	break;
5878 
5879       case KWORD:
5880       case KWORD + ADD_NL:
5881 	testval = TRUE;
5882 	/*FALLTHROUGH*/
5883       case SKWORD:
5884       case SKWORD + ADD_NL:
5885 	while (count < maxcount)
5886 	{
5887 	    if (vim_iswordp_buf(scan, reg_buf)
5888 					  && (testval || !VIM_ISDIGIT(*scan)))
5889 	    {
5890 		mb_ptr_adv(scan);
5891 	    }
5892 	    else if (*scan == NUL)
5893 	    {
5894 		if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5895 							      || reg_line_lbr)
5896 		    break;
5897 		reg_nextline();
5898 		scan = reginput;
5899 		if (got_int)
5900 		    break;
5901 	    }
5902 	    else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5903 		++scan;
5904 	    else
5905 		break;
5906 	    ++count;
5907 	}
5908 	break;
5909 
5910       case FNAME:
5911       case FNAME + ADD_NL:
5912 	testval = TRUE;
5913 	/*FALLTHROUGH*/
5914       case SFNAME:
5915       case SFNAME + ADD_NL:
5916 	while (count < maxcount)
5917 	{
5918 	    if (vim_isfilec(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan)))
5919 	    {
5920 		mb_ptr_adv(scan);
5921 	    }
5922 	    else if (*scan == NUL)
5923 	    {
5924 		if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5925 							      || reg_line_lbr)
5926 		    break;
5927 		reg_nextline();
5928 		scan = reginput;
5929 		if (got_int)
5930 		    break;
5931 	    }
5932 	    else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5933 		++scan;
5934 	    else
5935 		break;
5936 	    ++count;
5937 	}
5938 	break;
5939 
5940       case PRINT:
5941       case PRINT + ADD_NL:
5942 	testval = TRUE;
5943 	/*FALLTHROUGH*/
5944       case SPRINT:
5945       case SPRINT + ADD_NL:
5946 	while (count < maxcount)
5947 	{
5948 	    if (*scan == NUL)
5949 	    {
5950 		if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5951 							      || reg_line_lbr)
5952 		    break;
5953 		reg_nextline();
5954 		scan = reginput;
5955 		if (got_int)
5956 		    break;
5957 	    }
5958 	    else if (vim_isprintc(PTR2CHAR(scan)) == 1
5959 					  && (testval || !VIM_ISDIGIT(*scan)))
5960 	    {
5961 		mb_ptr_adv(scan);
5962 	    }
5963 	    else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
5964 		++scan;
5965 	    else
5966 		break;
5967 	    ++count;
5968 	}
5969 	break;
5970 
5971       case WHITE:
5972       case WHITE + ADD_NL:
5973 	testval = mask = RI_WHITE;
5974 do_class:
5975 	while (count < maxcount)
5976 	{
5977 #ifdef FEAT_MBYTE
5978 	    int		l;
5979 #endif
5980 	    if (*scan == NUL)
5981 	    {
5982 		if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
5983 							      || reg_line_lbr)
5984 		    break;
5985 		reg_nextline();
5986 		scan = reginput;
5987 		if (got_int)
5988 		    break;
5989 	    }
5990 #ifdef FEAT_MBYTE
5991 	    else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1)
5992 	    {
5993 		if (testval != 0)
5994 		    break;
5995 		scan += l;
5996 	    }
5997 #endif
5998 	    else if ((class_tab[*scan] & mask) == testval)
5999 		++scan;
6000 	    else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
6001 		++scan;
6002 	    else
6003 		break;
6004 	    ++count;
6005 	}
6006 	break;
6007 
6008       case NWHITE:
6009       case NWHITE + ADD_NL:
6010 	mask = RI_WHITE;
6011 	goto do_class;
6012       case DIGIT:
6013       case DIGIT + ADD_NL:
6014 	testval = mask = RI_DIGIT;
6015 	goto do_class;
6016       case NDIGIT:
6017       case NDIGIT + ADD_NL:
6018 	mask = RI_DIGIT;
6019 	goto do_class;
6020       case HEX:
6021       case HEX + ADD_NL:
6022 	testval = mask = RI_HEX;
6023 	goto do_class;
6024       case NHEX:
6025       case NHEX + ADD_NL:
6026 	mask = RI_HEX;
6027 	goto do_class;
6028       case OCTAL:
6029       case OCTAL + ADD_NL:
6030 	testval = mask = RI_OCTAL;
6031 	goto do_class;
6032       case NOCTAL:
6033       case NOCTAL + ADD_NL:
6034 	mask = RI_OCTAL;
6035 	goto do_class;
6036       case WORD:
6037       case WORD + ADD_NL:
6038 	testval = mask = RI_WORD;
6039 	goto do_class;
6040       case NWORD:
6041       case NWORD + ADD_NL:
6042 	mask = RI_WORD;
6043 	goto do_class;
6044       case HEAD:
6045       case HEAD + ADD_NL:
6046 	testval = mask = RI_HEAD;
6047 	goto do_class;
6048       case NHEAD:
6049       case NHEAD + ADD_NL:
6050 	mask = RI_HEAD;
6051 	goto do_class;
6052       case ALPHA:
6053       case ALPHA + ADD_NL:
6054 	testval = mask = RI_ALPHA;
6055 	goto do_class;
6056       case NALPHA:
6057       case NALPHA + ADD_NL:
6058 	mask = RI_ALPHA;
6059 	goto do_class;
6060       case LOWER:
6061       case LOWER + ADD_NL:
6062 	testval = mask = RI_LOWER;
6063 	goto do_class;
6064       case NLOWER:
6065       case NLOWER + ADD_NL:
6066 	mask = RI_LOWER;
6067 	goto do_class;
6068       case UPPER:
6069       case UPPER + ADD_NL:
6070 	testval = mask = RI_UPPER;
6071 	goto do_class;
6072       case NUPPER:
6073       case NUPPER + ADD_NL:
6074 	mask = RI_UPPER;
6075 	goto do_class;
6076 
6077       case EXACTLY:
6078 	{
6079 	    int	    cu, cl;
6080 
6081 	    /* This doesn't do a multi-byte character, because a MULTIBYTECODE
6082 	     * would have been used for it.  It does handle single-byte
6083 	     * characters, such as latin1. */
6084 	    if (ireg_ic)
6085 	    {
6086 		cu = MB_TOUPPER(*opnd);
6087 		cl = MB_TOLOWER(*opnd);
6088 		while (count < maxcount && (*scan == cu || *scan == cl))
6089 		{
6090 		    count++;
6091 		    scan++;
6092 		}
6093 	    }
6094 	    else
6095 	    {
6096 		cu = *opnd;
6097 		while (count < maxcount && *scan == cu)
6098 		{
6099 		    count++;
6100 		    scan++;
6101 		}
6102 	    }
6103 	    break;
6104 	}
6105 
6106 #ifdef FEAT_MBYTE
6107       case MULTIBYTECODE:
6108 	{
6109 	    int		i, len, cf = 0;
6110 
6111 	    /* Safety check (just in case 'encoding' was changed since
6112 	     * compiling the program). */
6113 	    if ((len = (*mb_ptr2len)(opnd)) > 1)
6114 	    {
6115 		if (ireg_ic && enc_utf8)
6116 		    cf = utf_fold(utf_ptr2char(opnd));
6117 		while (count < maxcount && (*mb_ptr2len)(scan) >= len)
6118 		{
6119 		    for (i = 0; i < len; ++i)
6120 			if (opnd[i] != scan[i])
6121 			    break;
6122 		    if (i < len && (!ireg_ic || !enc_utf8
6123 					|| utf_fold(utf_ptr2char(scan)) != cf))
6124 			break;
6125 		    scan += len;
6126 		    ++count;
6127 		}
6128 	    }
6129 	}
6130 	break;
6131 #endif
6132 
6133       case ANYOF:
6134       case ANYOF + ADD_NL:
6135 	testval = TRUE;
6136 	/*FALLTHROUGH*/
6137 
6138       case ANYBUT:
6139       case ANYBUT + ADD_NL:
6140 	while (count < maxcount)
6141 	{
6142 #ifdef FEAT_MBYTE
6143 	    int len;
6144 #endif
6145 	    if (*scan == NUL)
6146 	    {
6147 		if (!REG_MULTI || !WITH_NL(OP(p)) || reglnum > reg_maxline
6148 							      || reg_line_lbr)
6149 		    break;
6150 		reg_nextline();
6151 		scan = reginput;
6152 		if (got_int)
6153 		    break;
6154 	    }
6155 	    else if (reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
6156 		++scan;
6157 #ifdef FEAT_MBYTE
6158 	    else if (has_mbyte && (len = (*mb_ptr2len)(scan)) > 1)
6159 	    {
6160 		if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
6161 		    break;
6162 		scan += len;
6163 	    }
6164 #endif
6165 	    else
6166 	    {
6167 		if ((cstrchr(opnd, *scan) == NULL) == testval)
6168 		    break;
6169 		++scan;
6170 	    }
6171 	    ++count;
6172 	}
6173 	break;
6174 
6175       case NEWL:
6176 	while (count < maxcount
6177 		&& ((*scan == NUL && reglnum <= reg_maxline && !reg_line_lbr
6178 			    && REG_MULTI) || (*scan == '\n' && reg_line_lbr)))
6179 	{
6180 	    count++;
6181 	    if (reg_line_lbr)
6182 		ADVANCE_REGINPUT();
6183 	    else
6184 		reg_nextline();
6185 	    scan = reginput;
6186 	    if (got_int)
6187 		break;
6188 	}
6189 	break;
6190 
6191       default:			/* Oh dear.  Called inappropriately. */
6192 	EMSG(_(e_re_corr));
6193 #ifdef DEBUG
6194 	printf("Called regrepeat with op code %d\n", OP(p));
6195 #endif
6196 	break;
6197     }
6198 
6199     reginput = scan;
6200 
6201     return (int)count;
6202 }
6203 
6204 /*
6205  * regnext - dig the "next" pointer out of a node
6206  * Returns NULL when calculating size, when there is no next item and when
6207  * there is an error.
6208  */
6209     static char_u *
6210 regnext(p)
6211     char_u  *p;
6212 {
6213     int	    offset;
6214 
6215     if (p == JUST_CALC_SIZE || reg_toolong)
6216 	return NULL;
6217 
6218     offset = NEXT(p);
6219     if (offset == 0)
6220 	return NULL;
6221 
6222     if (OP(p) == BACK)
6223 	return p - offset;
6224     else
6225 	return p + offset;
6226 }
6227 
6228 /*
6229  * Check the regexp program for its magic number.
6230  * Return TRUE if it's wrong.
6231  */
6232     static int
6233 prog_magic_wrong()
6234 {
6235     regprog_T	*prog;
6236 
6237     prog = REG_MULTI ? reg_mmatch->regprog : reg_match->regprog;
6238     if (prog->engine == &nfa_regengine)
6239 	/* For NFA matcher we don't check the magic */
6240 	return FALSE;
6241 
6242     if (UCHARAT(((bt_regprog_T *)prog)->program) != REGMAGIC)
6243     {
6244 	EMSG(_(e_re_corr));
6245 	return TRUE;
6246     }
6247     return FALSE;
6248 }
6249 
6250 /*
6251  * Cleanup the subexpressions, if this wasn't done yet.
6252  * This construction is used to clear the subexpressions only when they are
6253  * used (to increase speed).
6254  */
6255     static void
6256 cleanup_subexpr()
6257 {
6258     if (need_clear_subexpr)
6259     {
6260 	if (REG_MULTI)
6261 	{
6262 	    /* Use 0xff to set lnum to -1 */
6263 	    vim_memset(reg_startpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6264 	    vim_memset(reg_endpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6265 	}
6266 	else
6267 	{
6268 	    vim_memset(reg_startp, 0, sizeof(char_u *) * NSUBEXP);
6269 	    vim_memset(reg_endp, 0, sizeof(char_u *) * NSUBEXP);
6270 	}
6271 	need_clear_subexpr = FALSE;
6272     }
6273 }
6274 
6275 #ifdef FEAT_SYN_HL
6276     static void
6277 cleanup_zsubexpr()
6278 {
6279     if (need_clear_zsubexpr)
6280     {
6281 	if (REG_MULTI)
6282 	{
6283 	    /* Use 0xff to set lnum to -1 */
6284 	    vim_memset(reg_startzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6285 	    vim_memset(reg_endzpos, 0xff, sizeof(lpos_T) * NSUBEXP);
6286 	}
6287 	else
6288 	{
6289 	    vim_memset(reg_startzp, 0, sizeof(char_u *) * NSUBEXP);
6290 	    vim_memset(reg_endzp, 0, sizeof(char_u *) * NSUBEXP);
6291 	}
6292 	need_clear_zsubexpr = FALSE;
6293     }
6294 }
6295 #endif
6296 
6297 /*
6298  * Save the current subexpr to "bp", so that they can be restored
6299  * later by restore_subexpr().
6300  */
6301     static void
6302 save_subexpr(bp)
6303     regbehind_T *bp;
6304 {
6305     int i;
6306 
6307     /* When "need_clear_subexpr" is set we don't need to save the values, only
6308      * remember that this flag needs to be set again when restoring. */
6309     bp->save_need_clear_subexpr = need_clear_subexpr;
6310     if (!need_clear_subexpr)
6311     {
6312 	for (i = 0; i < NSUBEXP; ++i)
6313 	{
6314 	    if (REG_MULTI)
6315 	    {
6316 		bp->save_start[i].se_u.pos = reg_startpos[i];
6317 		bp->save_end[i].se_u.pos = reg_endpos[i];
6318 	    }
6319 	    else
6320 	    {
6321 		bp->save_start[i].se_u.ptr = reg_startp[i];
6322 		bp->save_end[i].se_u.ptr = reg_endp[i];
6323 	    }
6324 	}
6325     }
6326 }
6327 
6328 /*
6329  * Restore the subexpr from "bp".
6330  */
6331     static void
6332 restore_subexpr(bp)
6333     regbehind_T *bp;
6334 {
6335     int i;
6336 
6337     /* Only need to restore saved values when they are not to be cleared. */
6338     need_clear_subexpr = bp->save_need_clear_subexpr;
6339     if (!need_clear_subexpr)
6340     {
6341 	for (i = 0; i < NSUBEXP; ++i)
6342 	{
6343 	    if (REG_MULTI)
6344 	    {
6345 		reg_startpos[i] = bp->save_start[i].se_u.pos;
6346 		reg_endpos[i] = bp->save_end[i].se_u.pos;
6347 	    }
6348 	    else
6349 	    {
6350 		reg_startp[i] = bp->save_start[i].se_u.ptr;
6351 		reg_endp[i] = bp->save_end[i].se_u.ptr;
6352 	    }
6353 	}
6354     }
6355 }
6356 
6357 /*
6358  * Advance reglnum, regline and reginput to the next line.
6359  */
6360     static void
6361 reg_nextline()
6362 {
6363     regline = reg_getline(++reglnum);
6364     reginput = regline;
6365     fast_breakcheck();
6366 }
6367 
6368 /*
6369  * Save the input line and position in a regsave_T.
6370  */
6371     static void
6372 reg_save(save, gap)
6373     regsave_T	*save;
6374     garray_T	*gap;
6375 {
6376     if (REG_MULTI)
6377     {
6378 	save->rs_u.pos.col = (colnr_T)(reginput - regline);
6379 	save->rs_u.pos.lnum = reglnum;
6380     }
6381     else
6382 	save->rs_u.ptr = reginput;
6383     save->rs_len = gap->ga_len;
6384 }
6385 
6386 /*
6387  * Restore the input line and position from a regsave_T.
6388  */
6389     static void
6390 reg_restore(save, gap)
6391     regsave_T	*save;
6392     garray_T	*gap;
6393 {
6394     if (REG_MULTI)
6395     {
6396 	if (reglnum != save->rs_u.pos.lnum)
6397 	{
6398 	    /* only call reg_getline() when the line number changed to save
6399 	     * a bit of time */
6400 	    reglnum = save->rs_u.pos.lnum;
6401 	    regline = reg_getline(reglnum);
6402 	}
6403 	reginput = regline + save->rs_u.pos.col;
6404     }
6405     else
6406 	reginput = save->rs_u.ptr;
6407     gap->ga_len = save->rs_len;
6408 }
6409 
6410 /*
6411  * Return TRUE if current position is equal to saved position.
6412  */
6413     static int
6414 reg_save_equal(save)
6415     regsave_T	*save;
6416 {
6417     if (REG_MULTI)
6418 	return reglnum == save->rs_u.pos.lnum
6419 				  && reginput == regline + save->rs_u.pos.col;
6420     return reginput == save->rs_u.ptr;
6421 }
6422 
6423 /*
6424  * Tentatively set the sub-expression start to the current position (after
6425  * calling regmatch() they will have changed).  Need to save the existing
6426  * values for when there is no match.
6427  * Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
6428  * depending on REG_MULTI.
6429  */
6430     static void
6431 save_se_multi(savep, posp)
6432     save_se_T	*savep;
6433     lpos_T	*posp;
6434 {
6435     savep->se_u.pos = *posp;
6436     posp->lnum = reglnum;
6437     posp->col = (colnr_T)(reginput - regline);
6438 }
6439 
6440     static void
6441 save_se_one(savep, pp)
6442     save_se_T	*savep;
6443     char_u	**pp;
6444 {
6445     savep->se_u.ptr = *pp;
6446     *pp = reginput;
6447 }
6448 
6449 /*
6450  * Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
6451  */
6452     static int
6453 re_num_cmp(val, scan)
6454     long_u	val;
6455     char_u	*scan;
6456 {
6457     long_u  n = OPERAND_MIN(scan);
6458 
6459     if (OPERAND_CMP(scan) == '>')
6460 	return val > n;
6461     if (OPERAND_CMP(scan) == '<')
6462 	return val < n;
6463     return val == n;
6464 }
6465 
6466 /*
6467  * Check whether a backreference matches.
6468  * Returns RA_FAIL, RA_NOMATCH or RA_MATCH.
6469  * If "bytelen" is not NULL, it is set to the byte length of the match in the
6470  * last line.
6471  */
6472     static int
6473 match_with_backref(start_lnum, start_col, end_lnum, end_col, bytelen)
6474     linenr_T start_lnum;
6475     colnr_T  start_col;
6476     linenr_T end_lnum;
6477     colnr_T  end_col;
6478     int	     *bytelen;
6479 {
6480     linenr_T	clnum = start_lnum;
6481     colnr_T	ccol = start_col;
6482     int		len;
6483     char_u	*p;
6484 
6485     if (bytelen != NULL)
6486 	*bytelen = 0;
6487     for (;;)
6488     {
6489 	/* Since getting one line may invalidate the other, need to make copy.
6490 	 * Slow! */
6491 	if (regline != reg_tofree)
6492 	{
6493 	    len = (int)STRLEN(regline);
6494 	    if (reg_tofree == NULL || len >= (int)reg_tofreelen)
6495 	    {
6496 		len += 50;	/* get some extra */
6497 		vim_free(reg_tofree);
6498 		reg_tofree = alloc(len);
6499 		if (reg_tofree == NULL)
6500 		    return RA_FAIL; /* out of memory!*/
6501 		reg_tofreelen = len;
6502 	    }
6503 	    STRCPY(reg_tofree, regline);
6504 	    reginput = reg_tofree + (reginput - regline);
6505 	    regline = reg_tofree;
6506 	}
6507 
6508 	/* Get the line to compare with. */
6509 	p = reg_getline(clnum);
6510 	if (clnum == end_lnum)
6511 	    len = end_col - ccol;
6512 	else
6513 	    len = (int)STRLEN(p + ccol);
6514 
6515 	if (cstrncmp(p + ccol, reginput, &len) != 0)
6516 	    return RA_NOMATCH;  /* doesn't match */
6517 	if (bytelen != NULL)
6518 	    *bytelen += len;
6519 	if (clnum == end_lnum)
6520 	    break;		/* match and at end! */
6521 	if (reglnum >= reg_maxline)
6522 	    return RA_NOMATCH;  /* text too short */
6523 
6524 	/* Advance to next line. */
6525 	reg_nextline();
6526 	if (bytelen != NULL)
6527 	    *bytelen = 0;
6528 	++clnum;
6529 	ccol = 0;
6530 	if (got_int)
6531 	    return RA_FAIL;
6532     }
6533 
6534     /* found a match!  Note that regline may now point to a copy of the line,
6535      * that should not matter. */
6536     return RA_MATCH;
6537 }
6538 
6539 #ifdef BT_REGEXP_DUMP
6540 
6541 /*
6542  * regdump - dump a regexp onto stdout in vaguely comprehensible form
6543  */
6544     static void
6545 regdump(pattern, r)
6546     char_u	*pattern;
6547     bt_regprog_T	*r;
6548 {
6549     char_u  *s;
6550     int	    op = EXACTLY;	/* Arbitrary non-END op. */
6551     char_u  *next;
6552     char_u  *end = NULL;
6553     FILE    *f;
6554 
6555 #ifdef BT_REGEXP_LOG
6556     f = fopen("bt_regexp_log.log", "a");
6557 #else
6558     f = stdout;
6559 #endif
6560     if (f == NULL)
6561 	return;
6562     fprintf(f, "-------------------------------------\n\r\nregcomp(%s):\r\n", pattern);
6563 
6564     s = r->program + 1;
6565     /*
6566      * Loop until we find the END that isn't before a referred next (an END
6567      * can also appear in a NOMATCH operand).
6568      */
6569     while (op != END || s <= end)
6570     {
6571 	op = OP(s);
6572 	fprintf(f, "%2d%s", (int)(s - r->program), regprop(s)); /* Where, what. */
6573 	next = regnext(s);
6574 	if (next == NULL)	/* Next ptr. */
6575 	    fprintf(f, "(0)");
6576 	else
6577 	    fprintf(f, "(%d)", (int)((s - r->program) + (next - s)));
6578 	if (end < next)
6579 	    end = next;
6580 	if (op == BRACE_LIMITS)
6581 	{
6582 	    /* Two ints */
6583 	    fprintf(f, " minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
6584 	    s += 8;
6585 	}
6586 	else if (op == BEHIND || op == NOBEHIND)
6587 	{
6588 	    /* one int */
6589 	    fprintf(f, " count %ld", OPERAND_MIN(s));
6590 	    s += 4;
6591 	}
6592 	else if (op == RE_LNUM || op == RE_COL || op == RE_VCOL)
6593 	{
6594 	    /* one int plus comperator */
6595 	    fprintf(f, " count %ld", OPERAND_MIN(s));
6596 	    s += 5;
6597 	}
6598 	s += 3;
6599 	if (op == ANYOF || op == ANYOF + ADD_NL
6600 		|| op == ANYBUT || op == ANYBUT + ADD_NL
6601 		|| op == EXACTLY)
6602 	{
6603 	    /* Literal string, where present. */
6604 	    fprintf(f, "\nxxxxxxxxx\n");
6605 	    while (*s != NUL)
6606 		fprintf(f, "%c", *s++);
6607 	    fprintf(f, "\nxxxxxxxxx\n");
6608 	    s++;
6609 	}
6610 	fprintf(f, "\r\n");
6611     }
6612 
6613     /* Header fields of interest. */
6614     if (r->regstart != NUL)
6615 	fprintf(f, "start `%s' 0x%x; ", r->regstart < 256
6616 		? (char *)transchar(r->regstart)
6617 		: "multibyte", r->regstart);
6618     if (r->reganch)
6619 	fprintf(f, "anchored; ");
6620     if (r->regmust != NULL)
6621 	fprintf(f, "must have \"%s\"", r->regmust);
6622     fprintf(f, "\r\n");
6623 
6624 #ifdef BT_REGEXP_LOG
6625     fclose(f);
6626 #endif
6627 }
6628 #endif	    /* BT_REGEXP_DUMP */
6629 
6630 #ifdef DEBUG
6631 /*
6632  * regprop - printable representation of opcode
6633  */
6634     static char_u *
6635 regprop(op)
6636     char_u	   *op;
6637 {
6638     char	    *p;
6639     static char	    buf[50];
6640 
6641     STRCPY(buf, ":");
6642 
6643     switch ((int) OP(op))
6644     {
6645       case BOL:
6646 	p = "BOL";
6647 	break;
6648       case EOL:
6649 	p = "EOL";
6650 	break;
6651       case RE_BOF:
6652 	p = "BOF";
6653 	break;
6654       case RE_EOF:
6655 	p = "EOF";
6656 	break;
6657       case CURSOR:
6658 	p = "CURSOR";
6659 	break;
6660       case RE_VISUAL:
6661 	p = "RE_VISUAL";
6662 	break;
6663       case RE_LNUM:
6664 	p = "RE_LNUM";
6665 	break;
6666       case RE_MARK:
6667 	p = "RE_MARK";
6668 	break;
6669       case RE_COL:
6670 	p = "RE_COL";
6671 	break;
6672       case RE_VCOL:
6673 	p = "RE_VCOL";
6674 	break;
6675       case BOW:
6676 	p = "BOW";
6677 	break;
6678       case EOW:
6679 	p = "EOW";
6680 	break;
6681       case ANY:
6682 	p = "ANY";
6683 	break;
6684       case ANY + ADD_NL:
6685 	p = "ANY+NL";
6686 	break;
6687       case ANYOF:
6688 	p = "ANYOF";
6689 	break;
6690       case ANYOF + ADD_NL:
6691 	p = "ANYOF+NL";
6692 	break;
6693       case ANYBUT:
6694 	p = "ANYBUT";
6695 	break;
6696       case ANYBUT + ADD_NL:
6697 	p = "ANYBUT+NL";
6698 	break;
6699       case IDENT:
6700 	p = "IDENT";
6701 	break;
6702       case IDENT + ADD_NL:
6703 	p = "IDENT+NL";
6704 	break;
6705       case SIDENT:
6706 	p = "SIDENT";
6707 	break;
6708       case SIDENT + ADD_NL:
6709 	p = "SIDENT+NL";
6710 	break;
6711       case KWORD:
6712 	p = "KWORD";
6713 	break;
6714       case KWORD + ADD_NL:
6715 	p = "KWORD+NL";
6716 	break;
6717       case SKWORD:
6718 	p = "SKWORD";
6719 	break;
6720       case SKWORD + ADD_NL:
6721 	p = "SKWORD+NL";
6722 	break;
6723       case FNAME:
6724 	p = "FNAME";
6725 	break;
6726       case FNAME + ADD_NL:
6727 	p = "FNAME+NL";
6728 	break;
6729       case SFNAME:
6730 	p = "SFNAME";
6731 	break;
6732       case SFNAME + ADD_NL:
6733 	p = "SFNAME+NL";
6734 	break;
6735       case PRINT:
6736 	p = "PRINT";
6737 	break;
6738       case PRINT + ADD_NL:
6739 	p = "PRINT+NL";
6740 	break;
6741       case SPRINT:
6742 	p = "SPRINT";
6743 	break;
6744       case SPRINT + ADD_NL:
6745 	p = "SPRINT+NL";
6746 	break;
6747       case WHITE:
6748 	p = "WHITE";
6749 	break;
6750       case WHITE + ADD_NL:
6751 	p = "WHITE+NL";
6752 	break;
6753       case NWHITE:
6754 	p = "NWHITE";
6755 	break;
6756       case NWHITE + ADD_NL:
6757 	p = "NWHITE+NL";
6758 	break;
6759       case DIGIT:
6760 	p = "DIGIT";
6761 	break;
6762       case DIGIT + ADD_NL:
6763 	p = "DIGIT+NL";
6764 	break;
6765       case NDIGIT:
6766 	p = "NDIGIT";
6767 	break;
6768       case NDIGIT + ADD_NL:
6769 	p = "NDIGIT+NL";
6770 	break;
6771       case HEX:
6772 	p = "HEX";
6773 	break;
6774       case HEX + ADD_NL:
6775 	p = "HEX+NL";
6776 	break;
6777       case NHEX:
6778 	p = "NHEX";
6779 	break;
6780       case NHEX + ADD_NL:
6781 	p = "NHEX+NL";
6782 	break;
6783       case OCTAL:
6784 	p = "OCTAL";
6785 	break;
6786       case OCTAL + ADD_NL:
6787 	p = "OCTAL+NL";
6788 	break;
6789       case NOCTAL:
6790 	p = "NOCTAL";
6791 	break;
6792       case NOCTAL + ADD_NL:
6793 	p = "NOCTAL+NL";
6794 	break;
6795       case WORD:
6796 	p = "WORD";
6797 	break;
6798       case WORD + ADD_NL:
6799 	p = "WORD+NL";
6800 	break;
6801       case NWORD:
6802 	p = "NWORD";
6803 	break;
6804       case NWORD + ADD_NL:
6805 	p = "NWORD+NL";
6806 	break;
6807       case HEAD:
6808 	p = "HEAD";
6809 	break;
6810       case HEAD + ADD_NL:
6811 	p = "HEAD+NL";
6812 	break;
6813       case NHEAD:
6814 	p = "NHEAD";
6815 	break;
6816       case NHEAD + ADD_NL:
6817 	p = "NHEAD+NL";
6818 	break;
6819       case ALPHA:
6820 	p = "ALPHA";
6821 	break;
6822       case ALPHA + ADD_NL:
6823 	p = "ALPHA+NL";
6824 	break;
6825       case NALPHA:
6826 	p = "NALPHA";
6827 	break;
6828       case NALPHA + ADD_NL:
6829 	p = "NALPHA+NL";
6830 	break;
6831       case LOWER:
6832 	p = "LOWER";
6833 	break;
6834       case LOWER + ADD_NL:
6835 	p = "LOWER+NL";
6836 	break;
6837       case NLOWER:
6838 	p = "NLOWER";
6839 	break;
6840       case NLOWER + ADD_NL:
6841 	p = "NLOWER+NL";
6842 	break;
6843       case UPPER:
6844 	p = "UPPER";
6845 	break;
6846       case UPPER + ADD_NL:
6847 	p = "UPPER+NL";
6848 	break;
6849       case NUPPER:
6850 	p = "NUPPER";
6851 	break;
6852       case NUPPER + ADD_NL:
6853 	p = "NUPPER+NL";
6854 	break;
6855       case BRANCH:
6856 	p = "BRANCH";
6857 	break;
6858       case EXACTLY:
6859 	p = "EXACTLY";
6860 	break;
6861       case NOTHING:
6862 	p = "NOTHING";
6863 	break;
6864       case BACK:
6865 	p = "BACK";
6866 	break;
6867       case END:
6868 	p = "END";
6869 	break;
6870       case MOPEN + 0:
6871 	p = "MATCH START";
6872 	break;
6873       case MOPEN + 1:
6874       case MOPEN + 2:
6875       case MOPEN + 3:
6876       case MOPEN + 4:
6877       case MOPEN + 5:
6878       case MOPEN + 6:
6879       case MOPEN + 7:
6880       case MOPEN + 8:
6881       case MOPEN + 9:
6882 	sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
6883 	p = NULL;
6884 	break;
6885       case MCLOSE + 0:
6886 	p = "MATCH END";
6887 	break;
6888       case MCLOSE + 1:
6889       case MCLOSE + 2:
6890       case MCLOSE + 3:
6891       case MCLOSE + 4:
6892       case MCLOSE + 5:
6893       case MCLOSE + 6:
6894       case MCLOSE + 7:
6895       case MCLOSE + 8:
6896       case MCLOSE + 9:
6897 	sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
6898 	p = NULL;
6899 	break;
6900       case BACKREF + 1:
6901       case BACKREF + 2:
6902       case BACKREF + 3:
6903       case BACKREF + 4:
6904       case BACKREF + 5:
6905       case BACKREF + 6:
6906       case BACKREF + 7:
6907       case BACKREF + 8:
6908       case BACKREF + 9:
6909 	sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
6910 	p = NULL;
6911 	break;
6912       case NOPEN:
6913 	p = "NOPEN";
6914 	break;
6915       case NCLOSE:
6916 	p = "NCLOSE";
6917 	break;
6918 #ifdef FEAT_SYN_HL
6919       case ZOPEN + 1:
6920       case ZOPEN + 2:
6921       case ZOPEN + 3:
6922       case ZOPEN + 4:
6923       case ZOPEN + 5:
6924       case ZOPEN + 6:
6925       case ZOPEN + 7:
6926       case ZOPEN + 8:
6927       case ZOPEN + 9:
6928 	sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
6929 	p = NULL;
6930 	break;
6931       case ZCLOSE + 1:
6932       case ZCLOSE + 2:
6933       case ZCLOSE + 3:
6934       case ZCLOSE + 4:
6935       case ZCLOSE + 5:
6936       case ZCLOSE + 6:
6937       case ZCLOSE + 7:
6938       case ZCLOSE + 8:
6939       case ZCLOSE + 9:
6940 	sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
6941 	p = NULL;
6942 	break;
6943       case ZREF + 1:
6944       case ZREF + 2:
6945       case ZREF + 3:
6946       case ZREF + 4:
6947       case ZREF + 5:
6948       case ZREF + 6:
6949       case ZREF + 7:
6950       case ZREF + 8:
6951       case ZREF + 9:
6952 	sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
6953 	p = NULL;
6954 	break;
6955 #endif
6956       case STAR:
6957 	p = "STAR";
6958 	break;
6959       case PLUS:
6960 	p = "PLUS";
6961 	break;
6962       case NOMATCH:
6963 	p = "NOMATCH";
6964 	break;
6965       case MATCH:
6966 	p = "MATCH";
6967 	break;
6968       case BEHIND:
6969 	p = "BEHIND";
6970 	break;
6971       case NOBEHIND:
6972 	p = "NOBEHIND";
6973 	break;
6974       case SUBPAT:
6975 	p = "SUBPAT";
6976 	break;
6977       case BRACE_LIMITS:
6978 	p = "BRACE_LIMITS";
6979 	break;
6980       case BRACE_SIMPLE:
6981 	p = "BRACE_SIMPLE";
6982 	break;
6983       case BRACE_COMPLEX + 0:
6984       case BRACE_COMPLEX + 1:
6985       case BRACE_COMPLEX + 2:
6986       case BRACE_COMPLEX + 3:
6987       case BRACE_COMPLEX + 4:
6988       case BRACE_COMPLEX + 5:
6989       case BRACE_COMPLEX + 6:
6990       case BRACE_COMPLEX + 7:
6991       case BRACE_COMPLEX + 8:
6992       case BRACE_COMPLEX + 9:
6993 	sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
6994 	p = NULL;
6995 	break;
6996 #ifdef FEAT_MBYTE
6997       case MULTIBYTECODE:
6998 	p = "MULTIBYTECODE";
6999 	break;
7000 #endif
7001       case NEWL:
7002 	p = "NEWL";
7003 	break;
7004       default:
7005 	sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
7006 	p = NULL;
7007 	break;
7008     }
7009     if (p != NULL)
7010 	STRCAT(buf, p);
7011     return (char_u *)buf;
7012 }
7013 #endif	    /* DEBUG */
7014 
7015 /*
7016  * Used in a place where no * or \+ can follow.
7017  */
7018     static int
7019 re_mult_next(what)
7020     char *what;
7021 {
7022     if (re_multi_type(peekchr()) == MULTI_MULT)
7023 	EMSG2_RET_FAIL(_("E888: (NFA regexp) cannot repeat %s"), what);
7024     return OK;
7025 }
7026 
7027 #ifdef FEAT_MBYTE
7028 static void mb_decompose __ARGS((int c, int *c1, int *c2, int *c3));
7029 
7030 typedef struct
7031 {
7032     int a, b, c;
7033 } decomp_T;
7034 
7035 
7036 /* 0xfb20 - 0xfb4f */
7037 static decomp_T decomp_table[0xfb4f-0xfb20+1] =
7038 {
7039     {0x5e2,0,0},		/* 0xfb20	alt ayin */
7040     {0x5d0,0,0},		/* 0xfb21	alt alef */
7041     {0x5d3,0,0},		/* 0xfb22	alt dalet */
7042     {0x5d4,0,0},		/* 0xfb23	alt he */
7043     {0x5db,0,0},		/* 0xfb24	alt kaf */
7044     {0x5dc,0,0},		/* 0xfb25	alt lamed */
7045     {0x5dd,0,0},		/* 0xfb26	alt mem-sofit */
7046     {0x5e8,0,0},		/* 0xfb27	alt resh */
7047     {0x5ea,0,0},		/* 0xfb28	alt tav */
7048     {'+', 0, 0},		/* 0xfb29	alt plus */
7049     {0x5e9, 0x5c1, 0},		/* 0xfb2a	shin+shin-dot */
7050     {0x5e9, 0x5c2, 0},		/* 0xfb2b	shin+sin-dot */
7051     {0x5e9, 0x5c1, 0x5bc},	/* 0xfb2c	shin+shin-dot+dagesh */
7052     {0x5e9, 0x5c2, 0x5bc},	/* 0xfb2d	shin+sin-dot+dagesh */
7053     {0x5d0, 0x5b7, 0},		/* 0xfb2e	alef+patah */
7054     {0x5d0, 0x5b8, 0},		/* 0xfb2f	alef+qamats */
7055     {0x5d0, 0x5b4, 0},		/* 0xfb30	alef+hiriq */
7056     {0x5d1, 0x5bc, 0},		/* 0xfb31	bet+dagesh */
7057     {0x5d2, 0x5bc, 0},		/* 0xfb32	gimel+dagesh */
7058     {0x5d3, 0x5bc, 0},		/* 0xfb33	dalet+dagesh */
7059     {0x5d4, 0x5bc, 0},		/* 0xfb34	he+dagesh */
7060     {0x5d5, 0x5bc, 0},		/* 0xfb35	vav+dagesh */
7061     {0x5d6, 0x5bc, 0},		/* 0xfb36	zayin+dagesh */
7062     {0xfb37, 0, 0},		/* 0xfb37 -- UNUSED */
7063     {0x5d8, 0x5bc, 0},		/* 0xfb38	tet+dagesh */
7064     {0x5d9, 0x5bc, 0},		/* 0xfb39	yud+dagesh */
7065     {0x5da, 0x5bc, 0},		/* 0xfb3a	kaf sofit+dagesh */
7066     {0x5db, 0x5bc, 0},		/* 0xfb3b	kaf+dagesh */
7067     {0x5dc, 0x5bc, 0},		/* 0xfb3c	lamed+dagesh */
7068     {0xfb3d, 0, 0},		/* 0xfb3d -- UNUSED */
7069     {0x5de, 0x5bc, 0},		/* 0xfb3e	mem+dagesh */
7070     {0xfb3f, 0, 0},		/* 0xfb3f -- UNUSED */
7071     {0x5e0, 0x5bc, 0},		/* 0xfb40	nun+dagesh */
7072     {0x5e1, 0x5bc, 0},		/* 0xfb41	samech+dagesh */
7073     {0xfb42, 0, 0},		/* 0xfb42 -- UNUSED */
7074     {0x5e3, 0x5bc, 0},		/* 0xfb43	pe sofit+dagesh */
7075     {0x5e4, 0x5bc,0},		/* 0xfb44	pe+dagesh */
7076     {0xfb45, 0, 0},		/* 0xfb45 -- UNUSED */
7077     {0x5e6, 0x5bc, 0},		/* 0xfb46	tsadi+dagesh */
7078     {0x5e7, 0x5bc, 0},		/* 0xfb47	qof+dagesh */
7079     {0x5e8, 0x5bc, 0},		/* 0xfb48	resh+dagesh */
7080     {0x5e9, 0x5bc, 0},		/* 0xfb49	shin+dagesh */
7081     {0x5ea, 0x5bc, 0},		/* 0xfb4a	tav+dagesh */
7082     {0x5d5, 0x5b9, 0},		/* 0xfb4b	vav+holam */
7083     {0x5d1, 0x5bf, 0},		/* 0xfb4c	bet+rafe */
7084     {0x5db, 0x5bf, 0},		/* 0xfb4d	kaf+rafe */
7085     {0x5e4, 0x5bf, 0},		/* 0xfb4e	pe+rafe */
7086     {0x5d0, 0x5dc, 0}		/* 0xfb4f	alef-lamed */
7087 };
7088 
7089     static void
7090 mb_decompose(c, c1, c2, c3)
7091     int c, *c1, *c2, *c3;
7092 {
7093     decomp_T d;
7094 
7095     if (c >= 0xfb20 && c <= 0xfb4f)
7096     {
7097 	d = decomp_table[c - 0xfb20];
7098 	*c1 = d.a;
7099 	*c2 = d.b;
7100 	*c3 = d.c;
7101     }
7102     else
7103     {
7104 	*c1 = c;
7105 	*c2 = *c3 = 0;
7106     }
7107 }
7108 #endif
7109 
7110 /*
7111  * Compare two strings, ignore case if ireg_ic set.
7112  * Return 0 if strings match, non-zero otherwise.
7113  * Correct the length "*n" when composing characters are ignored.
7114  */
7115     static int
7116 cstrncmp(s1, s2, n)
7117     char_u	*s1, *s2;
7118     int		*n;
7119 {
7120     int		result;
7121 
7122     if (!ireg_ic)
7123 	result = STRNCMP(s1, s2, *n);
7124     else
7125 	result = MB_STRNICMP(s1, s2, *n);
7126 
7127 #ifdef FEAT_MBYTE
7128     /* if it failed and it's utf8 and we want to combineignore: */
7129     if (result != 0 && enc_utf8 && ireg_icombine)
7130     {
7131 	char_u	*str1, *str2;
7132 	int	c1, c2, c11, c12;
7133 	int	junk;
7134 
7135 	/* we have to handle the strcmp ourselves, since it is necessary to
7136 	 * deal with the composing characters by ignoring them: */
7137 	str1 = s1;
7138 	str2 = s2;
7139 	c1 = c2 = 0;
7140 	while ((int)(str1 - s1) < *n)
7141 	{
7142 	    c1 = mb_ptr2char_adv(&str1);
7143 	    c2 = mb_ptr2char_adv(&str2);
7144 
7145 	    /* decompose the character if necessary, into 'base' characters
7146 	     * because I don't care about Arabic, I will hard-code the Hebrew
7147 	     * which I *do* care about!  So sue me... */
7148 	    if (c1 != c2 && (!ireg_ic || utf_fold(c1) != utf_fold(c2)))
7149 	    {
7150 		/* decomposition necessary? */
7151 		mb_decompose(c1, &c11, &junk, &junk);
7152 		mb_decompose(c2, &c12, &junk, &junk);
7153 		c1 = c11;
7154 		c2 = c12;
7155 		if (c11 != c12 && (!ireg_ic || utf_fold(c11) != utf_fold(c12)))
7156 		    break;
7157 	    }
7158 	}
7159 	result = c2 - c1;
7160 	if (result == 0)
7161 	    *n = (int)(str2 - s2);
7162     }
7163 #endif
7164 
7165     return result;
7166 }
7167 
7168 /*
7169  * cstrchr: This function is used a lot for simple searches, keep it fast!
7170  */
7171     static char_u *
7172 cstrchr(s, c)
7173     char_u	*s;
7174     int		c;
7175 {
7176     char_u	*p;
7177     int		cc;
7178 
7179     if (!ireg_ic
7180 #ifdef FEAT_MBYTE
7181 	    || (!enc_utf8 && mb_char2len(c) > 1)
7182 #endif
7183 	    )
7184 	return vim_strchr(s, c);
7185 
7186     /* tolower() and toupper() can be slow, comparing twice should be a lot
7187      * faster (esp. when using MS Visual C++!).
7188      * For UTF-8 need to use folded case. */
7189 #ifdef FEAT_MBYTE
7190     if (enc_utf8 && c > 0x80)
7191 	cc = utf_fold(c);
7192     else
7193 #endif
7194 	 if (MB_ISUPPER(c))
7195 	cc = MB_TOLOWER(c);
7196     else if (MB_ISLOWER(c))
7197 	cc = MB_TOUPPER(c);
7198     else
7199 	return vim_strchr(s, c);
7200 
7201 #ifdef FEAT_MBYTE
7202     if (has_mbyte)
7203     {
7204 	for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
7205 	{
7206 	    if (enc_utf8 && c > 0x80)
7207 	    {
7208 		if (utf_fold(utf_ptr2char(p)) == cc)
7209 		    return p;
7210 	    }
7211 	    else if (*p == c || *p == cc)
7212 		return p;
7213 	}
7214     }
7215     else
7216 #endif
7217 	/* Faster version for when there are no multi-byte characters. */
7218 	for (p = s; *p != NUL; ++p)
7219 	    if (*p == c || *p == cc)
7220 		return p;
7221 
7222     return NULL;
7223 }
7224 
7225 /***************************************************************
7226  *		      regsub stuff			       *
7227  ***************************************************************/
7228 
7229 /* This stuff below really confuses cc on an SGI -- webb */
7230 #ifdef __sgi
7231 # undef __ARGS
7232 # define __ARGS(x)  ()
7233 #endif
7234 
7235 /*
7236  * We should define ftpr as a pointer to a function returning a pointer to
7237  * a function returning a pointer to a function ...
7238  * This is impossible, so we declare a pointer to a function returning a
7239  * pointer to a function returning void. This should work for all compilers.
7240  */
7241 typedef void (*(*fptr_T) __ARGS((int *, int)))();
7242 
7243 static fptr_T do_upper __ARGS((int *, int));
7244 static fptr_T do_Upper __ARGS((int *, int));
7245 static fptr_T do_lower __ARGS((int *, int));
7246 static fptr_T do_Lower __ARGS((int *, int));
7247 
7248 static int vim_regsub_both __ARGS((char_u *source, char_u *dest, int copy, int magic, int backslash));
7249 
7250     static fptr_T
7251 do_upper(d, c)
7252     int		*d;
7253     int		c;
7254 {
7255     *d = MB_TOUPPER(c);
7256 
7257     return (fptr_T)NULL;
7258 }
7259 
7260     static fptr_T
7261 do_Upper(d, c)
7262     int		*d;
7263     int		c;
7264 {
7265     *d = MB_TOUPPER(c);
7266 
7267     return (fptr_T)do_Upper;
7268 }
7269 
7270     static fptr_T
7271 do_lower(d, c)
7272     int		*d;
7273     int		c;
7274 {
7275     *d = MB_TOLOWER(c);
7276 
7277     return (fptr_T)NULL;
7278 }
7279 
7280     static fptr_T
7281 do_Lower(d, c)
7282     int		*d;
7283     int		c;
7284 {
7285     *d = MB_TOLOWER(c);
7286 
7287     return (fptr_T)do_Lower;
7288 }
7289 
7290 /*
7291  * regtilde(): Replace tildes in the pattern by the old pattern.
7292  *
7293  * Short explanation of the tilde: It stands for the previous replacement
7294  * pattern.  If that previous pattern also contains a ~ we should go back a
7295  * step further...  But we insert the previous pattern into the current one
7296  * and remember that.
7297  * This still does not handle the case where "magic" changes.  So require the
7298  * user to keep his hands off of "magic".
7299  *
7300  * The tildes are parsed once before the first call to vim_regsub().
7301  */
7302     char_u *
7303 regtilde(source, magic)
7304     char_u	*source;
7305     int		magic;
7306 {
7307     char_u	*newsub = source;
7308     char_u	*tmpsub;
7309     char_u	*p;
7310     int		len;
7311     int		prevlen;
7312 
7313     for (p = newsub; *p; ++p)
7314     {
7315 	if ((*p == '~' && magic) || (*p == '\\' && *(p + 1) == '~' && !magic))
7316 	{
7317 	    if (reg_prev_sub != NULL)
7318 	    {
7319 		/* length = len(newsub) - 1 + len(prev_sub) + 1 */
7320 		prevlen = (int)STRLEN(reg_prev_sub);
7321 		tmpsub = alloc((unsigned)(STRLEN(newsub) + prevlen));
7322 		if (tmpsub != NULL)
7323 		{
7324 		    /* copy prefix */
7325 		    len = (int)(p - newsub);	/* not including ~ */
7326 		    mch_memmove(tmpsub, newsub, (size_t)len);
7327 		    /* interpret tilde */
7328 		    mch_memmove(tmpsub + len, reg_prev_sub, (size_t)prevlen);
7329 		    /* copy postfix */
7330 		    if (!magic)
7331 			++p;			/* back off \ */
7332 		    STRCPY(tmpsub + len + prevlen, p + 1);
7333 
7334 		    if (newsub != source)	/* already allocated newsub */
7335 			vim_free(newsub);
7336 		    newsub = tmpsub;
7337 		    p = newsub + len + prevlen;
7338 		}
7339 	    }
7340 	    else if (magic)
7341 		STRMOVE(p, p + 1);	/* remove '~' */
7342 	    else
7343 		STRMOVE(p, p + 2);	/* remove '\~' */
7344 	    --p;
7345 	}
7346 	else
7347 	{
7348 	    if (*p == '\\' && p[1])		/* skip escaped characters */
7349 		++p;
7350 #ifdef FEAT_MBYTE
7351 	    if (has_mbyte)
7352 		p += (*mb_ptr2len)(p) - 1;
7353 #endif
7354 	}
7355     }
7356 
7357     vim_free(reg_prev_sub);
7358     if (newsub != source)	/* newsub was allocated, just keep it */
7359 	reg_prev_sub = newsub;
7360     else			/* no ~ found, need to save newsub  */
7361 	reg_prev_sub = vim_strsave(newsub);
7362     return newsub;
7363 }
7364 
7365 #ifdef FEAT_EVAL
7366 static int can_f_submatch = FALSE;	/* TRUE when submatch() can be used */
7367 
7368 /* These pointers are used instead of reg_match and reg_mmatch for
7369  * reg_submatch().  Needed for when the substitution string is an expression
7370  * that contains a call to substitute() and submatch(). */
7371 static regmatch_T	*submatch_match;
7372 static regmmatch_T	*submatch_mmatch;
7373 static linenr_T		submatch_firstlnum;
7374 static linenr_T		submatch_maxline;
7375 static int		submatch_line_lbr;
7376 #endif
7377 
7378 #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) || defined(PROTO)
7379 /*
7380  * vim_regsub() - perform substitutions after a vim_regexec() or
7381  * vim_regexec_multi() match.
7382  *
7383  * If "copy" is TRUE really copy into "dest".
7384  * If "copy" is FALSE nothing is copied, this is just to find out the length
7385  * of the result.
7386  *
7387  * If "backslash" is TRUE, a backslash will be removed later, need to double
7388  * them to keep them, and insert a backslash before a CR to avoid it being
7389  * replaced with a line break later.
7390  *
7391  * Note: The matched text must not change between the call of
7392  * vim_regexec()/vim_regexec_multi() and vim_regsub()!  It would make the back
7393  * references invalid!
7394  *
7395  * Returns the size of the replacement, including terminating NUL.
7396  */
7397     int
7398 vim_regsub(rmp, source, dest, copy, magic, backslash)
7399     regmatch_T	*rmp;
7400     char_u	*source;
7401     char_u	*dest;
7402     int		copy;
7403     int		magic;
7404     int		backslash;
7405 {
7406     reg_match = rmp;
7407     reg_mmatch = NULL;
7408     reg_maxline = 0;
7409     reg_buf = curbuf;
7410     reg_line_lbr = TRUE;
7411     return vim_regsub_both(source, dest, copy, magic, backslash);
7412 }
7413 #endif
7414 
7415     int
7416 vim_regsub_multi(rmp, lnum, source, dest, copy, magic, backslash)
7417     regmmatch_T	*rmp;
7418     linenr_T	lnum;
7419     char_u	*source;
7420     char_u	*dest;
7421     int		copy;
7422     int		magic;
7423     int		backslash;
7424 {
7425     reg_match = NULL;
7426     reg_mmatch = rmp;
7427     reg_buf = curbuf;		/* always works on the current buffer! */
7428     reg_firstlnum = lnum;
7429     reg_maxline = curbuf->b_ml.ml_line_count - lnum;
7430     reg_line_lbr = FALSE;
7431     return vim_regsub_both(source, dest, copy, magic, backslash);
7432 }
7433 
7434     static int
7435 vim_regsub_both(source, dest, copy, magic, backslash)
7436     char_u	*source;
7437     char_u	*dest;
7438     int		copy;
7439     int		magic;
7440     int		backslash;
7441 {
7442     char_u	*src;
7443     char_u	*dst;
7444     char_u	*s;
7445     int		c;
7446     int		cc;
7447     int		no = -1;
7448     fptr_T	func_all = (fptr_T)NULL;
7449     fptr_T	func_one = (fptr_T)NULL;
7450     linenr_T	clnum = 0;	/* init for GCC */
7451     int		len = 0;	/* init for GCC */
7452 #ifdef FEAT_EVAL
7453     static char_u *eval_result = NULL;
7454 #endif
7455 
7456     /* Be paranoid... */
7457     if (source == NULL || dest == NULL)
7458     {
7459 	EMSG(_(e_null));
7460 	return 0;
7461     }
7462     if (prog_magic_wrong())
7463 	return 0;
7464     src = source;
7465     dst = dest;
7466 
7467     /*
7468      * When the substitute part starts with "\=" evaluate it as an expression.
7469      */
7470     if (source[0] == '\\' && source[1] == '='
7471 #ifdef FEAT_EVAL
7472 	    && !can_f_submatch	    /* can't do this recursively */
7473 #endif
7474 	    )
7475     {
7476 #ifdef FEAT_EVAL
7477 	/* To make sure that the length doesn't change between checking the
7478 	 * length and copying the string, and to speed up things, the
7479 	 * resulting string is saved from the call with "copy" == FALSE to the
7480 	 * call with "copy" == TRUE. */
7481 	if (copy)
7482 	{
7483 	    if (eval_result != NULL)
7484 	    {
7485 		STRCPY(dest, eval_result);
7486 		dst += STRLEN(eval_result);
7487 		vim_free(eval_result);
7488 		eval_result = NULL;
7489 	    }
7490 	}
7491 	else
7492 	{
7493 	    win_T	*save_reg_win;
7494 	    int		save_ireg_ic;
7495 
7496 	    vim_free(eval_result);
7497 
7498 	    /* The expression may contain substitute(), which calls us
7499 	     * recursively.  Make sure submatch() gets the text from the first
7500 	     * level.  Don't need to save "reg_buf", because
7501 	     * vim_regexec_multi() can't be called recursively. */
7502 	    submatch_match = reg_match;
7503 	    submatch_mmatch = reg_mmatch;
7504 	    submatch_firstlnum = reg_firstlnum;
7505 	    submatch_maxline = reg_maxline;
7506 	    submatch_line_lbr = reg_line_lbr;
7507 	    save_reg_win = reg_win;
7508 	    save_ireg_ic = ireg_ic;
7509 	    can_f_submatch = TRUE;
7510 
7511 	    eval_result = eval_to_string(source + 2, NULL, TRUE);
7512 	    if (eval_result != NULL)
7513 	    {
7514 		int had_backslash = FALSE;
7515 
7516 		for (s = eval_result; *s != NUL; mb_ptr_adv(s))
7517 		{
7518 		    /* Change NL to CR, so that it becomes a line break,
7519 		     * unless called from vim_regexec_nl().
7520 		     * Skip over a backslashed character. */
7521 		    if (*s == NL && !submatch_line_lbr)
7522 			*s = CAR;
7523 		    else if (*s == '\\' && s[1] != NUL)
7524 		    {
7525 			++s;
7526 			/* Change NL to CR here too, so that this works:
7527 			 * :s/abc\\\ndef/\="aaa\\\nbbb"/  on text:
7528 			 *   abc\
7529 			 *   def
7530 			 * Not when called from vim_regexec_nl().
7531 			 */
7532 			if (*s == NL && !submatch_line_lbr)
7533 			    *s = CAR;
7534 			had_backslash = TRUE;
7535 		    }
7536 		}
7537 		if (had_backslash && backslash)
7538 		{
7539 		    /* Backslashes will be consumed, need to double them. */
7540 		    s = vim_strsave_escaped(eval_result, (char_u *)"\\");
7541 		    if (s != NULL)
7542 		    {
7543 			vim_free(eval_result);
7544 			eval_result = s;
7545 		    }
7546 		}
7547 
7548 		dst += STRLEN(eval_result);
7549 	    }
7550 
7551 	    reg_match = submatch_match;
7552 	    reg_mmatch = submatch_mmatch;
7553 	    reg_firstlnum = submatch_firstlnum;
7554 	    reg_maxline = submatch_maxline;
7555 	    reg_line_lbr = submatch_line_lbr;
7556 	    reg_win = save_reg_win;
7557 	    ireg_ic = save_ireg_ic;
7558 	    can_f_submatch = FALSE;
7559 	}
7560 #endif
7561     }
7562     else
7563       while ((c = *src++) != NUL)
7564       {
7565 	if (c == '&' && magic)
7566 	    no = 0;
7567 	else if (c == '\\' && *src != NUL)
7568 	{
7569 	    if (*src == '&' && !magic)
7570 	    {
7571 		++src;
7572 		no = 0;
7573 	    }
7574 	    else if ('0' <= *src && *src <= '9')
7575 	    {
7576 		no = *src++ - '0';
7577 	    }
7578 	    else if (vim_strchr((char_u *)"uUlLeE", *src))
7579 	    {
7580 		switch (*src++)
7581 		{
7582 		case 'u':   func_one = (fptr_T)do_upper;
7583 			    continue;
7584 		case 'U':   func_all = (fptr_T)do_Upper;
7585 			    continue;
7586 		case 'l':   func_one = (fptr_T)do_lower;
7587 			    continue;
7588 		case 'L':   func_all = (fptr_T)do_Lower;
7589 			    continue;
7590 		case 'e':
7591 		case 'E':   func_one = func_all = (fptr_T)NULL;
7592 			    continue;
7593 		}
7594 	    }
7595 	}
7596 	if (no < 0)	      /* Ordinary character. */
7597 	{
7598 	    if (c == K_SPECIAL && src[0] != NUL && src[1] != NUL)
7599 	    {
7600 		/* Copy a special key as-is. */
7601 		if (copy)
7602 		{
7603 		    *dst++ = c;
7604 		    *dst++ = *src++;
7605 		    *dst++ = *src++;
7606 		}
7607 		else
7608 		{
7609 		    dst += 3;
7610 		    src += 2;
7611 		}
7612 		continue;
7613 	    }
7614 
7615 	    if (c == '\\' && *src != NUL)
7616 	    {
7617 		/* Check for abbreviations -- webb */
7618 		switch (*src)
7619 		{
7620 		    case 'r':	c = CAR;	++src;	break;
7621 		    case 'n':	c = NL;		++src;	break;
7622 		    case 't':	c = TAB;	++src;	break;
7623 		 /* Oh no!  \e already has meaning in subst pat :-( */
7624 		 /* case 'e':   c = ESC;	++src;	break; */
7625 		    case 'b':	c = Ctrl_H;	++src;	break;
7626 
7627 		    /* If "backslash" is TRUE the backslash will be removed
7628 		     * later.  Used to insert a literal CR. */
7629 		    default:	if (backslash)
7630 				{
7631 				    if (copy)
7632 					*dst = '\\';
7633 				    ++dst;
7634 				}
7635 				c = *src++;
7636 		}
7637 	    }
7638 #ifdef FEAT_MBYTE
7639 	    else if (has_mbyte)
7640 		c = mb_ptr2char(src - 1);
7641 #endif
7642 
7643 	    /* Write to buffer, if copy is set. */
7644 	    if (func_one != (fptr_T)NULL)
7645 		/* Turbo C complains without the typecast */
7646 		func_one = (fptr_T)(func_one(&cc, c));
7647 	    else if (func_all != (fptr_T)NULL)
7648 		/* Turbo C complains without the typecast */
7649 		func_all = (fptr_T)(func_all(&cc, c));
7650 	    else /* just copy */
7651 		cc = c;
7652 
7653 #ifdef FEAT_MBYTE
7654 	    if (has_mbyte)
7655 	    {
7656 		int totlen = mb_ptr2len(src - 1);
7657 
7658 		if (copy)
7659 		    mb_char2bytes(cc, dst);
7660 		dst += mb_char2len(cc) - 1;
7661 		if (enc_utf8)
7662 		{
7663 		    int clen = utf_ptr2len(src - 1);
7664 
7665 		    /* If the character length is shorter than "totlen", there
7666 		     * are composing characters; copy them as-is. */
7667 		    if (clen < totlen)
7668 		    {
7669 			if (copy)
7670 			    mch_memmove(dst + 1, src - 1 + clen,
7671 						     (size_t)(totlen - clen));
7672 			dst += totlen - clen;
7673 		    }
7674 		}
7675 		src += totlen - 1;
7676 	    }
7677 	    else
7678 #endif
7679 		if (copy)
7680 		    *dst = cc;
7681 	    dst++;
7682 	}
7683 	else
7684 	{
7685 	    if (REG_MULTI)
7686 	    {
7687 		clnum = reg_mmatch->startpos[no].lnum;
7688 		if (clnum < 0 || reg_mmatch->endpos[no].lnum < 0)
7689 		    s = NULL;
7690 		else
7691 		{
7692 		    s = reg_getline(clnum) + reg_mmatch->startpos[no].col;
7693 		    if (reg_mmatch->endpos[no].lnum == clnum)
7694 			len = reg_mmatch->endpos[no].col
7695 					       - reg_mmatch->startpos[no].col;
7696 		    else
7697 			len = (int)STRLEN(s);
7698 		}
7699 	    }
7700 	    else
7701 	    {
7702 		s = reg_match->startp[no];
7703 		if (reg_match->endp[no] == NULL)
7704 		    s = NULL;
7705 		else
7706 		    len = (int)(reg_match->endp[no] - s);
7707 	    }
7708 	    if (s != NULL)
7709 	    {
7710 		for (;;)
7711 		{
7712 		    if (len == 0)
7713 		    {
7714 			if (REG_MULTI)
7715 			{
7716 			    if (reg_mmatch->endpos[no].lnum == clnum)
7717 				break;
7718 			    if (copy)
7719 				*dst = CAR;
7720 			    ++dst;
7721 			    s = reg_getline(++clnum);
7722 			    if (reg_mmatch->endpos[no].lnum == clnum)
7723 				len = reg_mmatch->endpos[no].col;
7724 			    else
7725 				len = (int)STRLEN(s);
7726 			}
7727 			else
7728 			    break;
7729 		    }
7730 		    else if (*s == NUL) /* we hit NUL. */
7731 		    {
7732 			if (copy)
7733 			    EMSG(_(e_re_damg));
7734 			goto exit;
7735 		    }
7736 		    else
7737 		    {
7738 			if (backslash && (*s == CAR || *s == '\\'))
7739 			{
7740 			    /*
7741 			     * Insert a backslash in front of a CR, otherwise
7742 			     * it will be replaced by a line break.
7743 			     * Number of backslashes will be halved later,
7744 			     * double them here.
7745 			     */
7746 			    if (copy)
7747 			    {
7748 				dst[0] = '\\';
7749 				dst[1] = *s;
7750 			    }
7751 			    dst += 2;
7752 			}
7753 			else
7754 			{
7755 #ifdef FEAT_MBYTE
7756 			    if (has_mbyte)
7757 				c = mb_ptr2char(s);
7758 			    else
7759 #endif
7760 				c = *s;
7761 
7762 			    if (func_one != (fptr_T)NULL)
7763 				/* Turbo C complains without the typecast */
7764 				func_one = (fptr_T)(func_one(&cc, c));
7765 			    else if (func_all != (fptr_T)NULL)
7766 				/* Turbo C complains without the typecast */
7767 				func_all = (fptr_T)(func_all(&cc, c));
7768 			    else /* just copy */
7769 				cc = c;
7770 
7771 #ifdef FEAT_MBYTE
7772 			    if (has_mbyte)
7773 			    {
7774 				int l;
7775 
7776 				/* Copy composing characters separately, one
7777 				 * at a time. */
7778 				if (enc_utf8)
7779 				    l = utf_ptr2len(s) - 1;
7780 				else
7781 				    l = mb_ptr2len(s) - 1;
7782 
7783 				s += l;
7784 				len -= l;
7785 				if (copy)
7786 				    mb_char2bytes(cc, dst);
7787 				dst += mb_char2len(cc) - 1;
7788 			    }
7789 			    else
7790 #endif
7791 				if (copy)
7792 				    *dst = cc;
7793 			    dst++;
7794 			}
7795 
7796 			++s;
7797 			--len;
7798 		    }
7799 		}
7800 	    }
7801 	    no = -1;
7802 	}
7803       }
7804     if (copy)
7805 	*dst = NUL;
7806 
7807 exit:
7808     return (int)((dst - dest) + 1);
7809 }
7810 
7811 #ifdef FEAT_EVAL
7812 static char_u *reg_getline_submatch __ARGS((linenr_T lnum));
7813 
7814 /*
7815  * Call reg_getline() with the line numbers from the submatch.  If a
7816  * substitute() was used the reg_maxline and other values have been
7817  * overwritten.
7818  */
7819     static char_u *
7820 reg_getline_submatch(lnum)
7821     linenr_T	lnum;
7822 {
7823     char_u *s;
7824     linenr_T save_first = reg_firstlnum;
7825     linenr_T save_max = reg_maxline;
7826 
7827     reg_firstlnum = submatch_firstlnum;
7828     reg_maxline = submatch_maxline;
7829 
7830     s = reg_getline(lnum);
7831 
7832     reg_firstlnum = save_first;
7833     reg_maxline = save_max;
7834     return s;
7835 }
7836 
7837 /*
7838  * Used for the submatch() function: get the string from the n'th submatch in
7839  * allocated memory.
7840  * Returns NULL when not in a ":s" command and for a non-existing submatch.
7841  */
7842     char_u *
7843 reg_submatch(no)
7844     int		no;
7845 {
7846     char_u	*retval = NULL;
7847     char_u	*s;
7848     int		len;
7849     int		round;
7850     linenr_T	lnum;
7851 
7852     if (!can_f_submatch || no < 0)
7853 	return NULL;
7854 
7855     if (submatch_match == NULL)
7856     {
7857 	/*
7858 	 * First round: compute the length and allocate memory.
7859 	 * Second round: copy the text.
7860 	 */
7861 	for (round = 1; round <= 2; ++round)
7862 	{
7863 	    lnum = submatch_mmatch->startpos[no].lnum;
7864 	    if (lnum < 0 || submatch_mmatch->endpos[no].lnum < 0)
7865 		return NULL;
7866 
7867 	    s = reg_getline_submatch(lnum) + submatch_mmatch->startpos[no].col;
7868 	    if (s == NULL)  /* anti-crash check, cannot happen? */
7869 		break;
7870 	    if (submatch_mmatch->endpos[no].lnum == lnum)
7871 	    {
7872 		/* Within one line: take form start to end col. */
7873 		len = submatch_mmatch->endpos[no].col
7874 					  - submatch_mmatch->startpos[no].col;
7875 		if (round == 2)
7876 		    vim_strncpy(retval, s, len);
7877 		++len;
7878 	    }
7879 	    else
7880 	    {
7881 		/* Multiple lines: take start line from start col, middle
7882 		 * lines completely and end line up to end col. */
7883 		len = (int)STRLEN(s);
7884 		if (round == 2)
7885 		{
7886 		    STRCPY(retval, s);
7887 		    retval[len] = '\n';
7888 		}
7889 		++len;
7890 		++lnum;
7891 		while (lnum < submatch_mmatch->endpos[no].lnum)
7892 		{
7893 		    s = reg_getline_submatch(lnum++);
7894 		    if (round == 2)
7895 			STRCPY(retval + len, s);
7896 		    len += (int)STRLEN(s);
7897 		    if (round == 2)
7898 			retval[len] = '\n';
7899 		    ++len;
7900 		}
7901 		if (round == 2)
7902 		    STRNCPY(retval + len, reg_getline_submatch(lnum),
7903 					     submatch_mmatch->endpos[no].col);
7904 		len += submatch_mmatch->endpos[no].col;
7905 		if (round == 2)
7906 		    retval[len] = NUL;
7907 		++len;
7908 	    }
7909 
7910 	    if (retval == NULL)
7911 	    {
7912 		retval = lalloc((long_u)len, TRUE);
7913 		if (retval == NULL)
7914 		    return NULL;
7915 	    }
7916 	}
7917     }
7918     else
7919     {
7920 	s = submatch_match->startp[no];
7921 	if (s == NULL || submatch_match->endp[no] == NULL)
7922 	    retval = NULL;
7923 	else
7924 	    retval = vim_strnsave(s, (int)(submatch_match->endp[no] - s));
7925     }
7926 
7927     return retval;
7928 }
7929 
7930 /*
7931  * Used for the submatch() function with the optional non-zero argument: get
7932  * the list of strings from the n'th submatch in allocated memory with NULs
7933  * represented in NLs.
7934  * Returns a list of allocated strings.  Returns NULL when not in a ":s"
7935  * command, for a non-existing submatch and for any error.
7936  */
7937     list_T *
7938 reg_submatch_list(no)
7939     int		no;
7940 {
7941     char_u	*s;
7942     linenr_T	slnum;
7943     linenr_T	elnum;
7944     colnr_T	scol;
7945     colnr_T	ecol;
7946     int		i;
7947     list_T	*list;
7948     int		error = FALSE;
7949 
7950     if (!can_f_submatch || no < 0)
7951 	return NULL;
7952 
7953     if (submatch_match == NULL)
7954     {
7955 	slnum = submatch_mmatch->startpos[no].lnum;
7956 	elnum = submatch_mmatch->endpos[no].lnum;
7957 	if (slnum < 0 || elnum < 0)
7958 	    return NULL;
7959 
7960 	scol = submatch_mmatch->startpos[no].col;
7961 	ecol = submatch_mmatch->endpos[no].col;
7962 
7963 	list = list_alloc();
7964 	if (list == NULL)
7965 	    return NULL;
7966 
7967 	s = reg_getline_submatch(slnum) + scol;
7968 	if (slnum == elnum)
7969 	{
7970 	    if (list_append_string(list, s, ecol - scol) == FAIL)
7971 		error = TRUE;
7972 	}
7973 	else
7974 	{
7975 	    if (list_append_string(list, s, -1) == FAIL)
7976 		error = TRUE;
7977 	    for (i = 1; i < elnum - slnum; i++)
7978 	    {
7979 		s = reg_getline_submatch(slnum + i);
7980 		if (list_append_string(list, s, -1) == FAIL)
7981 		    error = TRUE;
7982 	    }
7983 	    s = reg_getline_submatch(elnum);
7984 	    if (list_append_string(list, s, ecol) == FAIL)
7985 		error = TRUE;
7986 	}
7987     }
7988     else
7989     {
7990 	s = submatch_match->startp[no];
7991 	if (s == NULL || submatch_match->endp[no] == NULL)
7992 	    return NULL;
7993 	list = list_alloc();
7994 	if (list == NULL)
7995 	    return NULL;
7996 	if (list_append_string(list, s,
7997 				 (int)(submatch_match->endp[no] - s)) == FAIL)
7998 	    error = TRUE;
7999     }
8000 
8001     if (error)
8002     {
8003 	list_free(list, TRUE);
8004 	return NULL;
8005     }
8006     return list;
8007 }
8008 #endif
8009 
8010 static regengine_T bt_regengine =
8011 {
8012     bt_regcomp,
8013     bt_regfree,
8014     bt_regexec_nl,
8015     bt_regexec_multi,
8016     (char_u *)""
8017 };
8018 
8019 #include "regexp_nfa.c"
8020 
8021 static regengine_T nfa_regengine =
8022 {
8023     nfa_regcomp,
8024     nfa_regfree,
8025     nfa_regexec_nl,
8026     nfa_regexec_multi,
8027     (char_u *)""
8028 };
8029 
8030 /* Which regexp engine to use? Needed for vim_regcomp().
8031  * Must match with 'regexpengine'. */
8032 static int regexp_engine = 0;
8033 
8034 #ifdef DEBUG
8035 static char_u regname[][30] = {
8036 		    "AUTOMATIC Regexp Engine",
8037 		    "BACKTRACKING Regexp Engine",
8038 		    "NFA Regexp Engine"
8039 			    };
8040 #endif
8041 
8042 /*
8043  * Compile a regular expression into internal code.
8044  * Returns the program in allocated memory.
8045  * Use vim_regfree() to free the memory.
8046  * Returns NULL for an error.
8047  */
8048     regprog_T *
8049 vim_regcomp(expr_arg, re_flags)
8050     char_u	*expr_arg;
8051     int		re_flags;
8052 {
8053     regprog_T   *prog = NULL;
8054     char_u	*expr = expr_arg;
8055 
8056     regexp_engine = p_re;
8057 
8058     /* Check for prefix "\%#=", that sets the regexp engine */
8059     if (STRNCMP(expr, "\\%#=", 4) == 0)
8060     {
8061 	int newengine = expr[4] - '0';
8062 
8063 	if (newengine == AUTOMATIC_ENGINE
8064 	    || newengine == BACKTRACKING_ENGINE
8065 	    || newengine == NFA_ENGINE)
8066 	{
8067 	    regexp_engine = expr[4] - '0';
8068 	    expr += 5;
8069 #ifdef DEBUG
8070 	    smsg((char_u *)"New regexp mode selected (%d): %s",
8071 					   regexp_engine, regname[newengine]);
8072 #endif
8073 	}
8074 	else
8075 	{
8076 	    EMSG(_("E864: \\%#= can only be followed by 0, 1, or 2. The automatic engine will be used "));
8077 	    regexp_engine = AUTOMATIC_ENGINE;
8078 	}
8079     }
8080     bt_regengine.expr = expr;
8081     nfa_regengine.expr = expr;
8082 
8083     /*
8084      * First try the NFA engine, unless backtracking was requested.
8085      */
8086     if (regexp_engine != BACKTRACKING_ENGINE)
8087         prog = nfa_regengine.regcomp(expr,
8088 		re_flags + (regexp_engine == AUTOMATIC_ENGINE ? RE_AUTO : 0));
8089     else
8090 	prog = bt_regengine.regcomp(expr, re_flags);
8091 
8092     /* Check for error compiling regexp with initial engine. */
8093     if (prog == NULL)
8094     {
8095 #ifdef BT_REGEXP_DEBUG_LOG
8096 	if (regexp_engine != BACKTRACKING_ENGINE)   /* debugging log for NFA */
8097 	{
8098 	    FILE *f;
8099 	    f = fopen(BT_REGEXP_DEBUG_LOG_NAME, "a");
8100 	    if (f)
8101 	    {
8102 		fprintf(f, "Syntax error in \"%s\"\n", expr);
8103 		fclose(f);
8104 	    }
8105 	    else
8106 		EMSG2("(NFA) Could not open \"%s\" to write !!!",
8107                         BT_REGEXP_DEBUG_LOG_NAME);
8108 	}
8109 #endif
8110 	/*
8111 	 * If the NFA engine failed, try the backtracking engine.
8112 	 * The NFA engine also fails for patterns that it can't handle well
8113 	 * but are still valid patterns, thus a retry should work.
8114 	 */
8115 	if (regexp_engine == AUTOMATIC_ENGINE)
8116 	{
8117 	    regexp_engine = BACKTRACKING_ENGINE;
8118 	    prog = bt_regengine.regcomp(expr, re_flags);
8119 	}
8120     }
8121 
8122     if (prog != NULL)
8123     {
8124 	/* Store the info needed to call regcomp() again when the engine turns
8125 	 * out to be very slow when executing it. */
8126 	prog->re_engine = regexp_engine;
8127 	prog->re_flags  = re_flags;
8128     }
8129 
8130     return prog;
8131 }
8132 
8133 /*
8134  * Free a compiled regexp program, returned by vim_regcomp().
8135  */
8136     void
8137 vim_regfree(prog)
8138     regprog_T   *prog;
8139 {
8140     if (prog != NULL)
8141 	prog->engine->regfree(prog);
8142 }
8143 
8144 #ifdef FEAT_EVAL
8145 static void report_re_switch __ARGS((char_u *pat));
8146 
8147     static void
8148 report_re_switch(pat)
8149     char_u *pat;
8150 {
8151     if (p_verbose > 0)
8152     {
8153 	verbose_enter();
8154 	MSG_PUTS(_("Switching to backtracking RE engine for pattern: "));
8155 	MSG_PUTS(pat);
8156 	verbose_leave();
8157     }
8158 }
8159 #endif
8160 
8161 static int vim_regexec_both __ARGS((regmatch_T *rmp, char_u *line, colnr_T col, int nl));
8162 
8163 /*
8164  * Match a regexp against a string.
8165  * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
8166  * Note: "rmp->regprog" may be freed and changed.
8167  * Uses curbuf for line count and 'iskeyword'.
8168  * When "nl" is TRUE consider a "\n" in "line" to be a line break.
8169  *
8170  * Return TRUE if there is a match, FALSE if not.
8171  */
8172     static int
8173 vim_regexec_both(rmp, line, col, nl)
8174     regmatch_T	*rmp;
8175     char_u	*line;  /* string to match against */
8176     colnr_T	col;    /* column to start looking for match */
8177     int		nl;
8178 {
8179     int result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
8180 
8181     /* NFA engine aborted because it's very slow. */
8182     if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
8183 					       && result == NFA_TOO_EXPENSIVE)
8184     {
8185 	int    save_p_re = p_re;
8186 	int    re_flags = rmp->regprog->re_flags;
8187 	char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
8188 
8189 	p_re = BACKTRACKING_ENGINE;
8190 	vim_regfree(rmp->regprog);
8191 	if (pat != NULL)
8192 	{
8193 #ifdef FEAT_EVAL
8194 	    report_re_switch(pat);
8195 #endif
8196 	    rmp->regprog = vim_regcomp(pat, re_flags);
8197 	    if (rmp->regprog != NULL)
8198 		result = rmp->regprog->engine->regexec_nl(rmp, line, col, nl);
8199 	    vim_free(pat);
8200 	}
8201 
8202 	p_re = save_p_re;
8203     }
8204     return result > 0;
8205 }
8206 
8207 /*
8208  * Note: "*prog" may be freed and changed.
8209  * Return TRUE if there is a match, FALSE if not.
8210  */
8211     int
8212 vim_regexec_prog(prog, ignore_case, line, col)
8213     regprog_T	**prog;
8214     int		ignore_case;
8215     char_u	*line;
8216     colnr_T	col;
8217 {
8218     int r;
8219     regmatch_T regmatch;
8220 
8221     regmatch.regprog = *prog;
8222     regmatch.rm_ic = ignore_case;
8223     r = vim_regexec_both(&regmatch, line, col, FALSE);
8224     *prog = regmatch.regprog;
8225     return r;
8226 }
8227 
8228 /*
8229  * Note: "rmp->regprog" may be freed and changed.
8230  * Return TRUE if there is a match, FALSE if not.
8231  */
8232     int
8233 vim_regexec(rmp, line, col)
8234     regmatch_T	*rmp;
8235     char_u	*line;
8236     colnr_T	col;
8237 {
8238     return vim_regexec_both(rmp, line, col, FALSE);
8239 }
8240 
8241 #if defined(FEAT_MODIFY_FNAME) || defined(FEAT_EVAL) \
8242 	|| defined(FIND_REPLACE_DIALOG) || defined(PROTO)
8243 /*
8244  * Like vim_regexec(), but consider a "\n" in "line" to be a line break.
8245  * Note: "rmp->regprog" may be freed and changed.
8246  * Return TRUE if there is a match, FALSE if not.
8247  */
8248     int
8249 vim_regexec_nl(rmp, line, col)
8250     regmatch_T	*rmp;
8251     char_u	*line;
8252     colnr_T	col;
8253 {
8254     return vim_regexec_both(rmp, line, col, TRUE);
8255 }
8256 #endif
8257 
8258 /*
8259  * Match a regexp against multiple lines.
8260  * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
8261  * Note: "rmp->regprog" may be freed and changed.
8262  * Uses curbuf for line count and 'iskeyword'.
8263  *
8264  * Return zero if there is no match.  Return number of lines contained in the
8265  * match otherwise.
8266  */
8267     long
8268 vim_regexec_multi(rmp, win, buf, lnum, col, tm)
8269     regmmatch_T *rmp;
8270     win_T       *win;           /* window in which to search or NULL */
8271     buf_T       *buf;           /* buffer in which to search */
8272     linenr_T    lnum;           /* nr of line to start looking for match */
8273     colnr_T     col;            /* column to start looking for match */
8274     proftime_T	*tm;		/* timeout limit or NULL */
8275 {
8276     int result = rmp->regprog->engine->regexec_multi(
8277 						rmp, win, buf, lnum, col, tm);
8278 
8279     /* NFA engine aborted because it's very slow. */
8280     if (rmp->regprog->re_engine == AUTOMATIC_ENGINE
8281 					       && result == NFA_TOO_EXPENSIVE)
8282     {
8283 	int    save_p_re = p_re;
8284 	int    re_flags = rmp->regprog->re_flags;
8285 	char_u *pat = vim_strsave(((nfa_regprog_T *)rmp->regprog)->pattern);
8286 
8287 	p_re = BACKTRACKING_ENGINE;
8288 	vim_regfree(rmp->regprog);
8289 	if (pat != NULL)
8290 	{
8291 #ifdef FEAT_EVAL
8292 	    report_re_switch(pat);
8293 #endif
8294 	    rmp->regprog = vim_regcomp(pat, re_flags);
8295 	    if (rmp->regprog != NULL)
8296 		result = rmp->regprog->engine->regexec_multi(
8297 						rmp, win, buf, lnum, col, tm);
8298 	    vim_free(pat);
8299 	}
8300 	p_re = save_p_re;
8301     }
8302 
8303     return result <= 0 ? 0 : result;
8304 }
8305