xref: /vim-8.2.3635/src/regexp_bt.c (revision d8e44476)
1 /* vi:set ts=8 sts=4 sw=4 noet:
2  *
3  * Backtracking regular expression implementation.
4  *
5  * This file is included in "regexp.c".
6  *
7  * NOTICE:
8  *
9  * This is NOT the original regular expression code as written by Henry
10  * Spencer.  This code has been modified specifically for use with the VIM
11  * editor, and should not be used separately from Vim.  If you want a good
12  * regular expression library, get the original code.  The copyright notice
13  * that follows is from the original.
14  *
15  * END NOTICE
16  *
17  *	Copyright (c) 1986 by University of Toronto.
18  *	Written by Henry Spencer.  Not derived from licensed software.
19  *
20  *	Permission is granted to anyone to use this software for any
21  *	purpose on any computer system, and to redistribute it freely,
22  *	subject to the following restrictions:
23  *
24  *	1. The author is not responsible for the consequences of use of
25  *		this software, no matter how awful, even if they arise
26  *		from defects in it.
27  *
28  *	2. The origin of this software must not be misrepresented, either
29  *		by explicit claim or by omission.
30  *
31  *	3. Altered versions must be plainly marked as such, and must not
32  *		be misrepresented as being the original software.
33  *
34  * Beware that some of this code is subtly aware of the way operator
35  * precedence is structured in regular expressions.  Serious changes in
36  * regular-expression syntax might require a total rethink.
37  *
38  * Changes have been made by Tony Andrews, Olaf 'Rhialto' Seibert, Robert
39  * Webb, Ciaran McCreesh and Bram Moolenaar.
40  * Named character class support added by Walter Briscoe (1998 Jul 01)
41  */
42 
43 /*
44  * The "internal use only" fields in regexp.h are present to pass info from
45  * compile to execute that permits the execute phase to run lots faster on
46  * simple cases.  They are:
47  *
48  * regstart	char that must begin a match; NUL if none obvious; Can be a
49  *		multi-byte character.
50  * reganch	is the match anchored (at beginning-of-line only)?
51  * regmust	string (pointer into program) that match must include, or NULL
52  * regmlen	length of regmust string
53  * regflags	RF_ values or'ed together
54  *
55  * Regstart and reganch permit very fast decisions on suitable starting points
56  * for a match, cutting down the work a lot.  Regmust permits fast rejection
57  * of lines that cannot possibly match.  The regmust tests are costly enough
58  * that vim_regcomp() supplies a regmust only if the r.e. contains something
59  * potentially expensive (at present, the only such thing detected is * or +
60  * at the start of the r.e., which can involve a lot of backup).  Regmlen is
61  * supplied because the test in vim_regexec() needs it and vim_regcomp() is
62  * computing it anyway.
63  */
64 
65 /*
66  * Structure for regexp "program".  This is essentially a linear encoding
67  * of a nondeterministic finite-state machine (aka syntax charts or
68  * "railroad normal form" in parsing technology).  Each node is an opcode
69  * plus a "next" pointer, possibly plus an operand.  "Next" pointers of
70  * all nodes except BRANCH and BRACES_COMPLEX implement concatenation; a "next"
71  * pointer with a BRANCH on both ends of it is connecting two alternatives.
72  * (Here we have one of the subtle syntax dependencies:	an individual BRANCH
73  * (as opposed to a collection of them) is never concatenated with anything
74  * because of operator precedence).  The "next" pointer of a BRACES_COMPLEX
75  * node points to the node after the stuff to be repeated.
76  * The operand of some types of node is a literal string; for others, it is a
77  * node leading into a sub-FSM.  In particular, the operand of a BRANCH node
78  * is the first node of the branch.
79  * (NB this is *not* a tree structure: the tail of the branch connects to the
80  * thing following the set of BRANCHes.)
81  *
82  * pattern	is coded like:
83  *
84  *			  +-----------------+
85  *			  |		    V
86  * <aa>\|<bb>	BRANCH <aa> BRANCH <bb> --> END
87  *		     |	    ^	 |	    ^
88  *		     +------+	 +----------+
89  *
90  *
91  *		       +------------------+
92  *		       V		  |
93  * <aa>*	BRANCH BRANCH <aa> --> BACK BRANCH --> NOTHING --> END
94  *		     |	    |		    ^			   ^
95  *		     |	    +---------------+			   |
96  *		     +---------------------------------------------+
97  *
98  *
99  *		       +----------------------+
100  *		       V		      |
101  * <aa>\+	BRANCH <aa> --> BRANCH --> BACK  BRANCH --> NOTHING --> END
102  *		     |		     |		 ^			^
103  *		     |		     +-----------+			|
104  *		     +--------------------------------------------------+
105  *
106  *
107  *					+-------------------------+
108  *					V			  |
109  * <aa>\{}	BRANCH BRACE_LIMITS --> BRACE_COMPLEX <aa> --> BACK  END
110  *		     |				    |		     ^
111  *		     |				    +----------------+
112  *		     +-----------------------------------------------+
113  *
114  *
115  * <aa>\@!<bb>	BRANCH NOMATCH <aa> --> END  <bb> --> END
116  *		     |	     |		      ^       ^
117  *		     |	     +----------------+       |
118  *		     +--------------------------------+
119  *
120  *						      +---------+
121  *						      |		V
122  * \z[abc]	BRANCH BRANCH  a  BRANCH  b  BRANCH  c	BRANCH	NOTHING --> END
123  *		     |	    |	       |	  |	^		    ^
124  *		     |	    |	       |	  +-----+		    |
125  *		     |	    |	       +----------------+		    |
126  *		     |	    +---------------------------+		    |
127  *		     +------------------------------------------------------+
128  *
129  * They all start with a BRANCH for "\|" alternatives, even when there is only
130  * one alternative.
131  */
132 
133 /*
134  * The opcodes are:
135  */
136 
137 // definition	number		   opnd?    meaning
138 #define END		0	//	End of program or NOMATCH operand.
139 #define BOL		1	//	Match "" at beginning of line.
140 #define EOL		2	//	Match "" at end of line.
141 #define BRANCH		3	// node Match this alternative, or the
142 				//	next...
143 #define BACK		4	//	Match "", "next" ptr points backward.
144 #define EXACTLY		5	// str	Match this string.
145 #define NOTHING		6	//	Match empty string.
146 #define STAR		7	// node Match this (simple) thing 0 or more
147 				//	times.
148 #define PLUS		8	// node Match this (simple) thing 1 or more
149 				//	times.
150 #define MATCH		9	// node match the operand zero-width
151 #define NOMATCH		10	// node check for no match with operand
152 #define BEHIND		11	// node look behind for a match with operand
153 #define NOBEHIND	12	// node look behind for no match with operand
154 #define SUBPAT		13	// node match the operand here
155 #define BRACE_SIMPLE	14	// node Match this (simple) thing between m and
156 				//	n times (\{m,n\}).
157 #define BOW		15	//	Match "" after [^a-zA-Z0-9_]
158 #define EOW		16	//	Match "" at    [^a-zA-Z0-9_]
159 #define BRACE_LIMITS	17	// nr nr  define the min & max for BRACE_SIMPLE
160 				//	and BRACE_COMPLEX.
161 #define NEWL		18	//	Match line-break
162 #define BHPOS		19	//	End position for BEHIND or NOBEHIND
163 
164 
165 // character classes: 20-48 normal, 50-78 include a line-break
166 #define ADD_NL		30
167 #define FIRST_NL	ANY + ADD_NL
168 #define ANY		20	//	Match any one character.
169 #define ANYOF		21	// str	Match any character in this string.
170 #define ANYBUT		22	// str	Match any character not in this
171 				//	string.
172 #define IDENT		23	//	Match identifier char
173 #define SIDENT		24	//	Match identifier char but no digit
174 #define KWORD		25	//	Match keyword char
175 #define SKWORD		26	//	Match word char but no digit
176 #define FNAME		27	//	Match file name char
177 #define SFNAME		28	//	Match file name char but no digit
178 #define PRINT		29	//	Match printable char
179 #define SPRINT		30	//	Match printable char but no digit
180 #define WHITE		31	//	Match whitespace char
181 #define NWHITE		32	//	Match non-whitespace char
182 #define DIGIT		33	//	Match digit char
183 #define NDIGIT		34	//	Match non-digit char
184 #define HEX		35	//	Match hex char
185 #define NHEX		36	//	Match non-hex char
186 #define OCTAL		37	//	Match octal char
187 #define NOCTAL		38	//	Match non-octal char
188 #define WORD		39	//	Match word char
189 #define NWORD		40	//	Match non-word char
190 #define HEAD		41	//	Match head char
191 #define NHEAD		42	//	Match non-head char
192 #define ALPHA		43	//	Match alpha char
193 #define NALPHA		44	//	Match non-alpha char
194 #define LOWER		45	//	Match lowercase char
195 #define NLOWER		46	//	Match non-lowercase char
196 #define UPPER		47	//	Match uppercase char
197 #define NUPPER		48	//	Match non-uppercase char
198 #define LAST_NL		NUPPER + ADD_NL
199 #define WITH_NL(op)	((op) >= FIRST_NL && (op) <= LAST_NL)
200 
201 #define MOPEN		80  // -89	 Mark this point in input as start of
202 				//	 \( subexpr.  MOPEN + 0 marks start of
203 				//	 match.
204 #define MCLOSE		90  // -99	 Analogous to MOPEN.  MCLOSE + 0 marks
205 				//	 end of match.
206 #define BACKREF		100 // -109 node Match same string again \1-\9
207 
208 #ifdef FEAT_SYN_HL
209 # define ZOPEN		110 // -119	 Mark this point in input as start of
210 				//	 \z( subexpr.
211 # define ZCLOSE		120 // -129	 Analogous to ZOPEN.
212 # define ZREF		130 // -139 node Match external submatch \z1-\z9
213 #endif
214 
215 #define BRACE_COMPLEX	140 // -149 node Match nodes between m & n times
216 
217 #define NOPEN		150	//	Mark this point in input as start of
218 				//	\%( subexpr.
219 #define NCLOSE		151	//	Analogous to NOPEN.
220 
221 #define MULTIBYTECODE	200	// mbc	Match one multi-byte character
222 #define RE_BOF		201	//	Match "" at beginning of file.
223 #define RE_EOF		202	//	Match "" at end of file.
224 #define CURSOR		203	//	Match location of cursor.
225 
226 #define RE_LNUM		204	// nr cmp  Match line number
227 #define RE_COL		205	// nr cmp  Match column number
228 #define RE_VCOL		206	// nr cmp  Match virtual column number
229 
230 #define RE_MARK		207	// mark cmp  Match mark position
231 #define RE_VISUAL	208	//	Match Visual area
232 #define RE_COMPOSING	209	// any composing characters
233 
234 /*
235  * Flags to be passed up and down.
236  */
237 #define HASWIDTH	0x1	// Known never to match null string.
238 #define SIMPLE		0x2	// Simple enough to be STAR/PLUS operand.
239 #define SPSTART		0x4	// Starts with * or +.
240 #define HASNL		0x8	// Contains some \n.
241 #define HASLOOKBH	0x10	// Contains "\@<=" or "\@<!".
242 #define WORST		0	// Worst case.
243 
244 static int	num_complex_braces; // Complex \{...} count
245 static char_u	*regcode;	// Code-emit pointer, or JUST_CALC_SIZE
246 static long	regsize;	// Code size.
247 static int	reg_toolong;	// TRUE when offset out of range
248 static char_u	had_endbrace[NSUBEXP];	// flags, TRUE if end of () found
249 static long	brace_min[10];	// Minimums for complex brace repeats
250 static long	brace_max[10];	// Maximums for complex brace repeats
251 static int	brace_count[10]; // Current counts for complex brace repeats
252 static int	one_exactly = FALSE;	// only do one char for EXACTLY
253 
254 // When making changes to classchars also change nfa_classcodes.
255 static char_u	*classchars = (char_u *)".iIkKfFpPsSdDxXoOwWhHaAlLuU";
256 static int	classcodes[] = {
257     ANY, IDENT, SIDENT, KWORD, SKWORD,
258     FNAME, SFNAME, PRINT, SPRINT,
259     WHITE, NWHITE, DIGIT, NDIGIT,
260     HEX, NHEX, OCTAL, NOCTAL,
261     WORD, NWORD, HEAD, NHEAD,
262     ALPHA, NALPHA, LOWER, NLOWER,
263     UPPER, NUPPER
264 };
265 
266 /*
267  * When regcode is set to this value, code is not emitted and size is computed
268  * instead.
269  */
270 #define JUST_CALC_SIZE	((char_u *) -1)
271 
272 // Values for rs_state in regitem_T.
273 typedef enum regstate_E
274 {
275     RS_NOPEN = 0	// NOPEN and NCLOSE
276     , RS_MOPEN		// MOPEN + [0-9]
277     , RS_MCLOSE		// MCLOSE + [0-9]
278 #ifdef FEAT_SYN_HL
279     , RS_ZOPEN		// ZOPEN + [0-9]
280     , RS_ZCLOSE		// ZCLOSE + [0-9]
281 #endif
282     , RS_BRANCH		// BRANCH
283     , RS_BRCPLX_MORE	// BRACE_COMPLEX and trying one more match
284     , RS_BRCPLX_LONG	// BRACE_COMPLEX and trying longest match
285     , RS_BRCPLX_SHORT	// BRACE_COMPLEX and trying shortest match
286     , RS_NOMATCH	// NOMATCH
287     , RS_BEHIND1	// BEHIND / NOBEHIND matching rest
288     , RS_BEHIND2	// BEHIND / NOBEHIND matching behind part
289     , RS_STAR_LONG	// STAR/PLUS/BRACE_SIMPLE longest match
290     , RS_STAR_SHORT	// STAR/PLUS/BRACE_SIMPLE shortest match
291 } regstate_T;
292 
293 /*
294  * Structure used to save the current input state, when it needs to be
295  * restored after trying a match.  Used by reg_save() and reg_restore().
296  * Also stores the length of "backpos".
297  */
298 typedef struct
299 {
300     union
301     {
302 	char_u	*ptr;	// rex.input pointer, for single-line regexp
303 	lpos_T	pos;	// rex.input pos, for multi-line regexp
304     } rs_u;
305     int		rs_len;
306 } regsave_T;
307 
308 // struct to save start/end pointer/position in for \(\)
309 typedef struct
310 {
311     union
312     {
313 	char_u	*ptr;
314 	lpos_T	pos;
315     } se_u;
316 } save_se_T;
317 
318 // used for BEHIND and NOBEHIND matching
319 typedef struct regbehind_S
320 {
321     regsave_T	save_after;
322     regsave_T	save_behind;
323     int		save_need_clear_subexpr;
324     save_se_T   save_start[NSUBEXP];
325     save_se_T   save_end[NSUBEXP];
326 } regbehind_T;
327 
328 /*
329  * When there are alternatives a regstate_T is put on the regstack to remember
330  * what we are doing.
331  * Before it may be another type of item, depending on rs_state, to remember
332  * more things.
333  */
334 typedef struct regitem_S
335 {
336     regstate_T	rs_state;	// what we are doing, one of RS_ above
337     short	rs_no;		// submatch nr or BEHIND/NOBEHIND
338     char_u	*rs_scan;	// current node in program
339     union
340     {
341 	save_se_T  sesave;
342 	regsave_T  regsave;
343     } rs_un;			// room for saving rex.input
344 } regitem_T;
345 
346 
347 // used for STAR, PLUS and BRACE_SIMPLE matching
348 typedef struct regstar_S
349 {
350     int		nextb;		// next byte
351     int		nextb_ic;	// next byte reverse case
352     long	count;
353     long	minval;
354     long	maxval;
355 } regstar_T;
356 
357 // used to store input position when a BACK was encountered, so that we now if
358 // we made any progress since the last time.
359 typedef struct backpos_S
360 {
361     char_u	*bp_scan;	// "scan" where BACK was encountered
362     regsave_T	bp_pos;		// last input position
363 } backpos_T;
364 
365 /*
366  * "regstack" and "backpos" are used by regmatch().  They are kept over calls
367  * to avoid invoking malloc() and free() often.
368  * "regstack" is a stack with regitem_T items, sometimes preceded by regstar_T
369  * or regbehind_T.
370  * "backpos_T" is a table with backpos_T for BACK
371  */
372 static garray_T	regstack = {0, 0, 0, 0, NULL};
373 static garray_T	backpos = {0, 0, 0, 0, NULL};
374 
375 static regsave_T behind_pos;
376 
377 /*
378  * Both for regstack and backpos tables we use the following strategy of
379  * allocation (to reduce malloc/free calls):
380  * - Initial size is fairly small.
381  * - When needed, the tables are grown bigger (8 times at first, double after
382  *   that).
383  * - After executing the match we free the memory only if the array has grown.
384  *   Thus the memory is kept allocated when it's at the initial size.
385  * This makes it fast while not keeping a lot of memory allocated.
386  * A three times speed increase was observed when using many simple patterns.
387  */
388 #define REGSTACK_INITIAL	2048
389 #define BACKPOS_INITIAL		64
390 
391 /*
392  * Opcode notes:
393  *
394  * BRANCH	The set of branches constituting a single choice are hooked
395  *		together with their "next" pointers, since precedence prevents
396  *		anything being concatenated to any individual branch.  The
397  *		"next" pointer of the last BRANCH in a choice points to the
398  *		thing following the whole choice.  This is also where the
399  *		final "next" pointer of each individual branch points; each
400  *		branch starts with the operand node of a BRANCH node.
401  *
402  * BACK		Normal "next" pointers all implicitly point forward; BACK
403  *		exists to make loop structures possible.
404  *
405  * STAR,PLUS	'=', and complex '*' and '+', are implemented as circular
406  *		BRANCH structures using BACK.  Simple cases (one character
407  *		per match) are implemented with STAR and PLUS for speed
408  *		and to minimize recursive plunges.
409  *
410  * BRACE_LIMITS	This is always followed by a BRACE_SIMPLE or BRACE_COMPLEX
411  *		node, and defines the min and max limits to be used for that
412  *		node.
413  *
414  * MOPEN,MCLOSE	...are numbered at compile time.
415  * ZOPEN,ZCLOSE	...ditto
416  */
417 
418 /*
419  * A node is one char of opcode followed by two chars of "next" pointer.
420  * "Next" pointers are stored as two 8-bit bytes, high order first.  The
421  * value is a positive offset from the opcode of the node containing it.
422  * An operand, if any, simply follows the node.  (Note that much of the
423  * code generation knows about this implicit relationship.)
424  *
425  * Using two bytes for the "next" pointer is vast overkill for most things,
426  * but allows patterns to get big without disasters.
427  */
428 #define OP(p)		((int)*(p))
429 #define NEXT(p)		(((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377))
430 #define OPERAND(p)	((p) + 3)
431 // Obtain an operand that was stored as four bytes, MSB first.
432 #define OPERAND_MIN(p)	(((long)(p)[3] << 24) + ((long)(p)[4] << 16) \
433 			+ ((long)(p)[5] << 8) + (long)(p)[6])
434 // Obtain a second operand stored as four bytes.
435 #define OPERAND_MAX(p)	OPERAND_MIN((p) + 4)
436 // Obtain a second single-byte operand stored after a four bytes operand.
437 #define OPERAND_CMP(p)	(p)[7]
438 
439 static char_u *reg(int paren, int *flagp);
440 
441 #ifdef BT_REGEXP_DUMP
442 static void	regdump(char_u *, bt_regprog_T *);
443 #endif
444 
445 static int	re_num_cmp(long_u val, char_u *scan);
446 
447 #ifdef DEBUG
448 static char_u	*regprop(char_u *);
449 
450 static int	regnarrate = 0;
451 #endif
452 
453 
454 /*
455  * Setup to parse the regexp.  Used once to get the length and once to do it.
456  */
457     static void
regcomp_start(char_u * expr,int re_flags)458 regcomp_start(
459     char_u	*expr,
460     int		re_flags)	    // see vim_regcomp()
461 {
462     initchr(expr);
463     if (re_flags & RE_MAGIC)
464 	reg_magic = MAGIC_ON;
465     else
466 	reg_magic = MAGIC_OFF;
467     reg_string = (re_flags & RE_STRING);
468     reg_strict = (re_flags & RE_STRICT);
469     get_cpo_flags();
470 
471     num_complex_braces = 0;
472     regnpar = 1;
473     CLEAR_FIELD(had_endbrace);
474 #ifdef FEAT_SYN_HL
475     regnzpar = 1;
476     re_has_z = 0;
477 #endif
478     regsize = 0L;
479     reg_toolong = FALSE;
480     regflags = 0;
481 #if defined(FEAT_SYN_HL) || defined(PROTO)
482     had_eol = FALSE;
483 #endif
484 }
485 
486 /*
487  * Return TRUE if MULTIBYTECODE should be used instead of EXACTLY for
488  * character "c".
489  */
490     static int
use_multibytecode(int c)491 use_multibytecode(int c)
492 {
493     return has_mbyte && (*mb_char2len)(c) > 1
494 		     && (re_multi_type(peekchr()) != NOT_MULTI
495 			     || (enc_utf8 && utf_iscomposing(c)));
496 }
497 
498 /*
499  * Emit (if appropriate) a byte of code
500  */
501     static void
regc(int b)502 regc(int b)
503 {
504     if (regcode == JUST_CALC_SIZE)
505 	regsize++;
506     else
507 	*regcode++ = b;
508 }
509 
510 /*
511  * Emit (if appropriate) a multi-byte character of code
512  */
513     static void
regmbc(int c)514 regmbc(int c)
515 {
516     if (!has_mbyte && c > 0xff)
517 	return;
518     if (regcode == JUST_CALC_SIZE)
519 	regsize += (*mb_char2len)(c);
520     else
521 	regcode += (*mb_char2bytes)(c, regcode);
522 }
523 
524 
525 /*
526  * Produce the bytes for equivalence class "c".
527  * Currently only handles latin1, latin9 and utf-8.
528  * NOTE: When changing this function, also change nfa_emit_equi_class()
529  */
530     static void
reg_equi_class(int c)531 reg_equi_class(int c)
532 {
533     if (enc_utf8 || STRCMP(p_enc, "latin1") == 0
534 					 || STRCMP(p_enc, "iso-8859-15") == 0)
535     {
536 #ifdef EBCDIC
537 	int i;
538 
539 	// This might be slower than switch/case below.
540 	for (i = 0; i < 16; i++)
541 	{
542 	    if (vim_strchr(EQUIVAL_CLASS_C[i], c) != NULL)
543 	    {
544 		char *p = EQUIVAL_CLASS_C[i];
545 
546 		while (*p != 0)
547 		    regmbc(*p++);
548 		return;
549 	    }
550 	}
551 #else
552 	switch (c)
553 	{
554 	    // Do not use '\300' style, it results in a negative number.
555 	    case 'A': case 0xc0: case 0xc1: case 0xc2: case 0xc3: case 0xc4:
556 	    case 0xc5: case 0x100: case 0x102: case 0x104: case 0x1cd:
557 	    case 0x1de: case 0x1e0: case 0x1fa: case 0x202: case 0x226:
558 	    case 0x23a: case 0x1e00: case 0x1ea0: case 0x1ea2: case 0x1ea4:
559 	    case 0x1ea6: case 0x1ea8: case 0x1eaa: case 0x1eac: case 0x1eae:
560 	    case 0x1eb0: case 0x1eb2: case 0x1eb4: case 0x1eb6:
561 		      regmbc('A'); regmbc(0xc0); regmbc(0xc1); regmbc(0xc2);
562 		      regmbc(0xc3); regmbc(0xc4); regmbc(0xc5);
563 		      regmbc(0x100); regmbc(0x102); regmbc(0x104);
564 		      regmbc(0x1cd); regmbc(0x1de); regmbc(0x1e0);
565 		      regmbc(0x1fa); regmbc(0x202); regmbc(0x226);
566 		      regmbc(0x23a); regmbc(0x1e00); regmbc(0x1ea0);
567 		      regmbc(0x1ea2); regmbc(0x1ea4); regmbc(0x1ea6);
568 		      regmbc(0x1ea8); regmbc(0x1eaa); regmbc(0x1eac);
569 		      regmbc(0x1eae); regmbc(0x1eb0); regmbc(0x1eb2);
570 		      regmbc(0x1eb4); regmbc(0x1eb6);
571 		      return;
572 	    case 'B': case 0x181: case 0x243: case 0x1e02:
573 	    case 0x1e04: case 0x1e06:
574 		      regmbc('B');
575 		      regmbc(0x181); regmbc(0x243); regmbc(0x1e02);
576 		      regmbc(0x1e04); regmbc(0x1e06);
577 		      return;
578 	    case 'C': case 0xc7:
579 	    case 0x106: case 0x108: case 0x10a: case 0x10c: case 0x187:
580 	    case 0x23b: case 0x1e08: case 0xa792:
581 		      regmbc('C'); regmbc(0xc7);
582 		      regmbc(0x106); regmbc(0x108); regmbc(0x10a);
583 		      regmbc(0x10c); regmbc(0x187); regmbc(0x23b);
584 		      regmbc(0x1e08); regmbc(0xa792);
585 		      return;
586 	    case 'D': case 0x10e: case 0x110: case 0x18a:
587 	    case 0x1e0a: case 0x1e0c: case 0x1e0e: case 0x1e10:
588 	    case 0x1e12:
589 		      regmbc('D'); regmbc(0x10e); regmbc(0x110);
590 		      regmbc(0x18a); regmbc(0x1e0a); regmbc(0x1e0c);
591 		      regmbc(0x1e0e); regmbc(0x1e10); regmbc(0x1e12);
592 		      return;
593 	    case 'E': case 0xc8: case 0xc9: case 0xca: case 0xcb:
594 	    case 0x112: case 0x114: case 0x116: case 0x118: case 0x11a:
595 	    case 0x204: case 0x206: case 0x228: case 0x246: case 0x1e14:
596 	    case 0x1e16: case 0x1e18: case 0x1e1a: case 0x1e1c:
597 	    case 0x1eb8: case 0x1eba: case 0x1ebc: case 0x1ebe:
598 	    case 0x1ec0: case 0x1ec2: case 0x1ec4: case 0x1ec6:
599 		      regmbc('E'); regmbc(0xc8); regmbc(0xc9);
600 		      regmbc(0xca); regmbc(0xcb); regmbc(0x112);
601 		      regmbc(0x114); regmbc(0x116); regmbc(0x118);
602 		      regmbc(0x11a); regmbc(0x204); regmbc(0x206);
603 		      regmbc(0x228); regmbc(0x246); regmbc(0x1e14);
604 		      regmbc(0x1e16); regmbc(0x1e18); regmbc(0x1e1a);
605 		      regmbc(0x1e1c); regmbc(0x1eb8); regmbc(0x1eba);
606 		      regmbc(0x1ebc); regmbc(0x1ebe); regmbc(0x1ec0);
607 		      regmbc(0x1ec2); regmbc(0x1ec4); regmbc(0x1ec6);
608 		      return;
609 	    case 'F': case 0x191: case 0x1e1e: case 0xa798:
610 		      regmbc('F'); regmbc(0x191); regmbc(0x1e1e);
611 		      regmbc(0xa798);
612 		      return;
613 	    case 'G': case 0x11c: case 0x11e: case 0x120:
614 	    case 0x122: case 0x193: case 0x1e4: case 0x1e6:
615 	    case 0x1f4: case 0x1e20: case 0xa7a0:
616 		      regmbc('G'); regmbc(0x11c); regmbc(0x11e);
617 		      regmbc(0x120); regmbc(0x122); regmbc(0x193);
618 		      regmbc(0x1e4); regmbc(0x1e6); regmbc(0x1f4);
619 		      regmbc(0x1e20); regmbc(0xa7a0);
620 		      return;
621 	    case 'H': case 0x124: case 0x126: case 0x21e:
622 	    case 0x1e22: case 0x1e24: case 0x1e26:
623 	    case 0x1e28: case 0x1e2a: case 0x2c67:
624 		      regmbc('H'); regmbc(0x124); regmbc(0x126);
625 		      regmbc(0x21e); regmbc(0x1e22); regmbc(0x1e24);
626 		      regmbc(0x1e26); regmbc(0x1e28); regmbc(0x1e2a);
627 		      regmbc(0x2c67);
628 		      return;
629 	    case 'I': case 0xcc: case 0xcd: case 0xce: case 0xcf:
630 	    case 0x128: case 0x12a: case 0x12c: case 0x12e:
631 	    case 0x130: case 0x197: case 0x1cf: case 0x208:
632 	    case 0x20a: case 0x1e2c: case 0x1e2e: case 0x1ec8:
633 	    case 0x1eca:
634 		      regmbc('I'); regmbc(0xcc); regmbc(0xcd);
635 		      regmbc(0xce); regmbc(0xcf); regmbc(0x128);
636 		      regmbc(0x12a); regmbc(0x12c); regmbc(0x12e);
637 		      regmbc(0x130); regmbc(0x197); regmbc(0x1cf);
638 		      regmbc(0x208); regmbc(0x20a); regmbc(0x1e2c);
639 		      regmbc(0x1e2e); regmbc(0x1ec8); regmbc(0x1eca);
640 		      return;
641 	    case 'J': case 0x134: case 0x248:
642 		      regmbc('J'); regmbc(0x134); regmbc(0x248);
643 		      return;
644 	    case 'K': case 0x136: case 0x198: case 0x1e8: case 0x1e30:
645 	    case 0x1e32: case 0x1e34: case 0x2c69: case 0xa740:
646 		      regmbc('K'); regmbc(0x136); regmbc(0x198);
647 		      regmbc(0x1e8); regmbc(0x1e30); regmbc(0x1e32);
648 		      regmbc(0x1e34); regmbc(0x2c69); regmbc(0xa740);
649 		      return;
650 	    case 'L': case 0x139: case 0x13b: case 0x13d: case 0x13f:
651 	    case 0x141: case 0x23d: case 0x1e36: case 0x1e38:
652 	    case 0x1e3a: case 0x1e3c: case 0x2c60:
653 		      regmbc('L'); regmbc(0x139); regmbc(0x13b);
654 		      regmbc(0x13d); regmbc(0x13f); regmbc(0x141);
655 		      regmbc(0x23d); regmbc(0x1e36); regmbc(0x1e38);
656 		      regmbc(0x1e3a); regmbc(0x1e3c); regmbc(0x2c60);
657 		      return;
658 	    case 'M': case 0x1e3e: case 0x1e40: case 0x1e42:
659 		      regmbc('M'); regmbc(0x1e3e); regmbc(0x1e40);
660 		      regmbc(0x1e42);
661 		      return;
662 	    case 'N': case 0xd1:
663 	    case 0x143: case 0x145: case 0x147: case 0x1f8:
664 	    case 0x1e44: case 0x1e46: case 0x1e48: case 0x1e4a:
665 	    case 0xa7a4:
666 		      regmbc('N'); regmbc(0xd1);
667 		      regmbc(0x143); regmbc(0x145); regmbc(0x147);
668 		      regmbc(0x1f8); regmbc(0x1e44); regmbc(0x1e46);
669 		      regmbc(0x1e48); regmbc(0x1e4a); regmbc(0xa7a4);
670 		      return;
671 	    case 'O': case 0xd2: case 0xd3: case 0xd4: case 0xd5: case 0xd6:
672 	    case 0xd8: case 0x14c: case 0x14e: case 0x150: case 0x19f:
673 	    case 0x1a0: case 0x1d1: case 0x1ea: case 0x1ec: case 0x1fe:
674 	    case 0x20c: case 0x20e: case 0x22a: case 0x22c: case 0x22e:
675 	    case 0x230: case 0x1e4c: case 0x1e4e: case 0x1e50: case 0x1e52:
676 	    case 0x1ecc: case 0x1ece: case 0x1ed0: case 0x1ed2: case 0x1ed4:
677 	    case 0x1ed6: case 0x1ed8: case 0x1eda: case 0x1edc: case 0x1ede:
678 	    case 0x1ee0: case 0x1ee2:
679 		      regmbc('O'); regmbc(0xd2); regmbc(0xd3); regmbc(0xd4);
680 		      regmbc(0xd5); regmbc(0xd6); regmbc(0xd8);
681 		      regmbc(0x14c); regmbc(0x14e); regmbc(0x150);
682 		      regmbc(0x19f); regmbc(0x1a0); regmbc(0x1d1);
683 		      regmbc(0x1ea); regmbc(0x1ec); regmbc(0x1fe);
684 		      regmbc(0x20c); regmbc(0x20e); regmbc(0x22a);
685 		      regmbc(0x22c); regmbc(0x22e); regmbc(0x230);
686 		      regmbc(0x1e4c); regmbc(0x1e4e); regmbc(0x1e50);
687 		      regmbc(0x1e52); regmbc(0x1ecc); regmbc(0x1ece);
688 		      regmbc(0x1ed0); regmbc(0x1ed2); regmbc(0x1ed4);
689 		      regmbc(0x1ed6); regmbc(0x1ed8); regmbc(0x1eda);
690 		      regmbc(0x1edc); regmbc(0x1ede); regmbc(0x1ee0);
691 		      regmbc(0x1ee2);
692 		      return;
693 	    case 'P': case 0x1a4: case 0x1e54: case 0x1e56: case 0x2c63:
694 		      regmbc('P'); regmbc(0x1a4); regmbc(0x1e54);
695 		      regmbc(0x1e56); regmbc(0x2c63);
696 		      return;
697 	    case 'Q': case 0x24a:
698 		      regmbc('Q'); regmbc(0x24a);
699 		      return;
700 	    case 'R': case 0x154: case 0x156: case 0x158: case 0x210:
701 	    case 0x212: case 0x24c: case 0x1e58: case 0x1e5a:
702 	    case 0x1e5c: case 0x1e5e: case 0x2c64: case 0xa7a6:
703 		      regmbc('R'); regmbc(0x154); regmbc(0x156);
704 		      regmbc(0x210); regmbc(0x212); regmbc(0x158);
705 		      regmbc(0x24c); regmbc(0x1e58); regmbc(0x1e5a);
706 		      regmbc(0x1e5c); regmbc(0x1e5e); regmbc(0x2c64);
707 		      regmbc(0xa7a6);
708 		      return;
709 	    case 'S': case 0x15a: case 0x15c: case 0x15e: case 0x160:
710 	    case 0x218: case 0x1e60: case 0x1e62: case 0x1e64:
711 	    case 0x1e66: case 0x1e68: case 0x2c7e: case 0xa7a8:
712 		      regmbc('S'); regmbc(0x15a); regmbc(0x15c);
713 		      regmbc(0x15e); regmbc(0x160); regmbc(0x218);
714 		      regmbc(0x1e60); regmbc(0x1e62); regmbc(0x1e64);
715 		      regmbc(0x1e66); regmbc(0x1e68); regmbc(0x2c7e);
716 			  regmbc(0xa7a8);
717 		      return;
718 	    case 'T': case 0x162: case 0x164: case 0x166: case 0x1ac:
719 	    case 0x1ae: case 0x21a: case 0x23e: case 0x1e6a: case 0x1e6c:
720 	    case 0x1e6e: case 0x1e70:
721 		      regmbc('T'); regmbc(0x162); regmbc(0x164);
722 		      regmbc(0x166); regmbc(0x1ac); regmbc(0x23e);
723 		      regmbc(0x1ae); regmbc(0x21a); regmbc(0x1e6a);
724 		      regmbc(0x1e6c); regmbc(0x1e6e); regmbc(0x1e70);
725 		      return;
726 	    case 'U': case 0xd9: case 0xda: case 0xdb: case 0xdc:
727 	    case 0x168: case 0x16a: case 0x16c: case 0x16e:
728 	    case 0x170: case 0x172: case 0x1af: case 0x1d3:
729 	    case 0x1d5: case 0x1d7: case 0x1d9: case 0x1db:
730 	    case 0x214: case 0x216: case 0x244: case 0x1e72:
731 	    case 0x1e74: case 0x1e76: case 0x1e78: case 0x1e7a:
732 	    case 0x1ee4: case 0x1ee6: case 0x1ee8: case 0x1eea:
733 	    case 0x1eec: case 0x1eee: case 0x1ef0:
734 		      regmbc('U'); regmbc(0xd9); regmbc(0xda);
735 		      regmbc(0xdb); regmbc(0xdc); regmbc(0x168);
736 		      regmbc(0x16a); regmbc(0x16c); regmbc(0x16e);
737 		      regmbc(0x170); regmbc(0x172); regmbc(0x1af);
738 		      regmbc(0x1d3); regmbc(0x1d5); regmbc(0x1d7);
739 		      regmbc(0x1d9); regmbc(0x1db); regmbc(0x214);
740 		      regmbc(0x216); regmbc(0x244); regmbc(0x1e72);
741 		      regmbc(0x1e74); regmbc(0x1e76); regmbc(0x1e78);
742 		      regmbc(0x1e7a); regmbc(0x1ee4); regmbc(0x1ee6);
743 		      regmbc(0x1ee8); regmbc(0x1eea); regmbc(0x1eec);
744 		      regmbc(0x1eee); regmbc(0x1ef0);
745 		      return;
746 	    case 'V': case 0x1b2: case 0x1e7c: case 0x1e7e:
747 		      regmbc('V'); regmbc(0x1b2); regmbc(0x1e7c);
748 		      regmbc(0x1e7e);
749 		      return;
750 	    case 'W': case 0x174: case 0x1e80: case 0x1e82:
751 	    case 0x1e84: case 0x1e86: case 0x1e88:
752 		      regmbc('W'); regmbc(0x174); regmbc(0x1e80);
753 		      regmbc(0x1e82); regmbc(0x1e84); regmbc(0x1e86);
754 		      regmbc(0x1e88);
755 		      return;
756 	    case 'X': case 0x1e8a: case 0x1e8c:
757 		      regmbc('X'); regmbc(0x1e8a); regmbc(0x1e8c);
758 		      return;
759 	    case 'Y': case 0xdd:
760 	    case 0x176: case 0x178: case 0x1b3: case 0x232: case 0x24e:
761 	    case 0x1e8e: case 0x1ef2: case 0x1ef6: case 0x1ef4: case 0x1ef8:
762 		      regmbc('Y'); regmbc(0xdd); regmbc(0x176);
763 		      regmbc(0x178); regmbc(0x1b3); regmbc(0x232);
764 		      regmbc(0x24e); regmbc(0x1e8e); regmbc(0x1ef2);
765 		      regmbc(0x1ef4); regmbc(0x1ef6); regmbc(0x1ef8);
766 		      return;
767 	    case 'Z': case 0x179: case 0x17b: case 0x17d: case 0x1b5:
768 	    case 0x1e90: case 0x1e92: case 0x1e94: case 0x2c6b:
769 		      regmbc('Z'); regmbc(0x179); regmbc(0x17b);
770 		      regmbc(0x17d); regmbc(0x1b5); regmbc(0x1e90);
771 		      regmbc(0x1e92); regmbc(0x1e94); regmbc(0x2c6b);
772 		      return;
773 	    case 'a': case 0xe0: case 0xe1: case 0xe2:
774 	    case 0xe3: case 0xe4: case 0xe5: case 0x101: case 0x103:
775 	    case 0x105: case 0x1ce: case 0x1df: case 0x1e1: case 0x1fb:
776 	    case 0x201: case 0x203: case 0x227: case 0x1d8f: case 0x1e01:
777 	    case 0x1e9a: case 0x1ea1: case 0x1ea3: case 0x1ea5:
778 	    case 0x1ea7: case 0x1ea9: case 0x1eab: case 0x1ead:
779 	    case 0x1eaf: case 0x1eb1: case 0x1eb3: case 0x1eb5:
780 	    case 0x1eb7: case 0x2c65:
781 		      regmbc('a'); regmbc(0xe0); regmbc(0xe1);
782 		      regmbc(0xe2); regmbc(0xe3); regmbc(0xe4);
783 		      regmbc(0xe5); regmbc(0x101); regmbc(0x103);
784 		      regmbc(0x105); regmbc(0x1ce); regmbc(0x1df);
785 		      regmbc(0x1e1); regmbc(0x1fb); regmbc(0x201);
786 		      regmbc(0x203); regmbc(0x227); regmbc(0x1d8f);
787 		      regmbc(0x1e01); regmbc(0x1e9a); regmbc(0x1ea1);
788 		      regmbc(0x1ea3); regmbc(0x1ea5); regmbc(0x1ea7);
789 		      regmbc(0x1ea9); regmbc(0x1eab); regmbc(0x1ead);
790 		      regmbc(0x1eaf); regmbc(0x1eb1); regmbc(0x1eb3);
791 		      regmbc(0x1eb5); regmbc(0x1eb7); regmbc(0x2c65);
792 		      return;
793 	    case 'b': case 0x180: case 0x253: case 0x1d6c: case 0x1d80:
794 	    case 0x1e03: case 0x1e05: case 0x1e07:
795 		      regmbc('b');
796 		      regmbc(0x180); regmbc(0x253); regmbc(0x1d6c);
797 		      regmbc(0x1d80); regmbc(0x1e03); regmbc(0x1e05);
798 		      regmbc(0x1e07);
799 		      return;
800 	    case 'c': case 0xe7:
801 	    case 0x107: case 0x109: case 0x10b: case 0x10d: case 0x188:
802 	    case 0x23c: case 0x1e09: case 0xa793: case 0xa794:
803 		      regmbc('c'); regmbc(0xe7); regmbc(0x107);
804 		      regmbc(0x109); regmbc(0x10b); regmbc(0x10d);
805 		      regmbc(0x188); regmbc(0x23c); regmbc(0x1e09);
806 		      regmbc(0xa793); regmbc(0xa794);
807 		      return;
808 	    case 'd': case 0x10f: case 0x111: case 0x257: case 0x1d6d:
809 	    case 0x1d81: case 0x1d91: case 0x1e0b: case 0x1e0d:
810 	    case 0x1e0f: case 0x1e11: case 0x1e13:
811 		      regmbc('d'); regmbc(0x10f); regmbc(0x111);
812 		      regmbc(0x257); regmbc(0x1d6d); regmbc(0x1d81);
813 		      regmbc(0x1d91); regmbc(0x1e0b); regmbc(0x1e0d);
814 		      regmbc(0x1e0f); regmbc(0x1e11); regmbc(0x1e13);
815 		      return;
816 	    case 'e': case 0xe8: case 0xe9: case 0xea: case 0xeb:
817 	    case 0x113: case 0x115: case 0x117: case 0x119:
818 	    case 0x11b: case 0x205: case 0x207: case 0x229:
819 	    case 0x247: case 0x1d92: case 0x1e15: case 0x1e17:
820 	    case 0x1e19: case 0x1e1b: case 0x1eb9: case 0x1ebb:
821 	    case 0x1e1d: case 0x1ebd: case 0x1ebf: case 0x1ec1:
822 	    case 0x1ec3: case 0x1ec5: case 0x1ec7:
823 		      regmbc('e'); regmbc(0xe8); regmbc(0xe9);
824 		      regmbc(0xea); regmbc(0xeb); regmbc(0x113);
825 		      regmbc(0x115); regmbc(0x117); regmbc(0x119);
826 		      regmbc(0x11b); regmbc(0x205); regmbc(0x207);
827 		      regmbc(0x229); regmbc(0x247); regmbc(0x1d92);
828 		      regmbc(0x1e15); regmbc(0x1e17); regmbc(0x1e19);
829 		      regmbc(0x1e1b); regmbc(0x1e1d); regmbc(0x1eb9);
830 		      regmbc(0x1ebb); regmbc(0x1ebd); regmbc(0x1ebf);
831 		      regmbc(0x1ec1); regmbc(0x1ec3); regmbc(0x1ec5);
832 		      regmbc(0x1ec7);
833 		      return;
834 	    case 'f': case 0x192: case 0x1d6e: case 0x1d82:
835 	    case 0x1e1f: case 0xa799:
836 		     regmbc('f'); regmbc(0x192); regmbc(0x1d6e);
837 		     regmbc(0x1d82); regmbc(0x1e1f); regmbc(0xa799);
838 		     return;
839 	    case 'g': case 0x11d: case 0x11f: case 0x121: case 0x123:
840 	    case 0x1e5: case 0x1e7: case 0x260: case 0x1f5: case 0x1d83:
841 	    case 0x1e21: case 0xa7a1:
842 		      regmbc('g'); regmbc(0x11d); regmbc(0x11f);
843 		      regmbc(0x121); regmbc(0x123); regmbc(0x1e5);
844 		      regmbc(0x1e7); regmbc(0x1f5); regmbc(0x260);
845 		      regmbc(0x1d83); regmbc(0x1e21); regmbc(0xa7a1);
846 		      return;
847 	    case 'h': case 0x125: case 0x127: case 0x21f: case 0x1e23:
848 	    case 0x1e25: case 0x1e27: case 0x1e29: case 0x1e2b:
849 	    case 0x1e96: case 0x2c68: case 0xa795:
850 		      regmbc('h'); regmbc(0x125); regmbc(0x127);
851 		      regmbc(0x21f); regmbc(0x1e23); regmbc(0x1e25);
852 		      regmbc(0x1e27); regmbc(0x1e29); regmbc(0x1e2b);
853 		      regmbc(0x1e96); regmbc(0x2c68); regmbc(0xa795);
854 		      return;
855 	    case 'i': case 0xec: case 0xed: case 0xee: case 0xef:
856 	    case 0x129: case 0x12b: case 0x12d: case 0x12f:
857 	    case 0x1d0: case 0x209: case 0x20b: case 0x268:
858 	    case 0x1d96: case 0x1e2d: case 0x1e2f: case 0x1ec9:
859 	    case 0x1ecb:
860 		      regmbc('i'); regmbc(0xec); regmbc(0xed);
861 		      regmbc(0xee); regmbc(0xef); regmbc(0x129);
862 		      regmbc(0x12b); regmbc(0x12d); regmbc(0x12f);
863 		      regmbc(0x1d0); regmbc(0x209); regmbc(0x20b);
864 		      regmbc(0x268); regmbc(0x1d96); regmbc(0x1e2d);
865 		      regmbc(0x1e2f); regmbc(0x1ec9); regmbc(0x1ecb);
866 		      return;
867 	    case 'j': case 0x135: case 0x1f0: case 0x249:
868 		      regmbc('j'); regmbc(0x135); regmbc(0x1f0);
869 		      regmbc(0x249);
870 		      return;
871 	    case 'k': case 0x137: case 0x199: case 0x1e9:
872 	    case 0x1d84: case 0x1e31: case 0x1e33: case 0x1e35:
873 	    case 0x2c6a: case 0xa741:
874 		      regmbc('k'); regmbc(0x137); regmbc(0x199);
875 		      regmbc(0x1e9); regmbc(0x1d84); regmbc(0x1e31);
876 		      regmbc(0x1e33); regmbc(0x1e35); regmbc(0x2c6a);
877 		      regmbc(0xa741);
878 		      return;
879 	    case 'l': case 0x13a: case 0x13c: case 0x13e:
880 	    case 0x140: case 0x142: case 0x19a: case 0x1e37:
881 	    case 0x1e39: case 0x1e3b: case 0x1e3d: case 0x2c61:
882 		      regmbc('l'); regmbc(0x13a); regmbc(0x13c);
883 		      regmbc(0x13e); regmbc(0x140); regmbc(0x142);
884 		      regmbc(0x19a); regmbc(0x1e37); regmbc(0x1e39);
885 		      regmbc(0x1e3b); regmbc(0x1e3d); regmbc(0x2c61);
886 		      return;
887 	    case 'm': case 0x1d6f: case 0x1e3f: case 0x1e41: case 0x1e43:
888 		      regmbc('m'); regmbc(0x1d6f); regmbc(0x1e3f);
889 		      regmbc(0x1e41); regmbc(0x1e43);
890 		      return;
891 	    case 'n': case 0xf1: case 0x144: case 0x146: case 0x148:
892 	    case 0x149: case 0x1f9: case 0x1d70: case 0x1d87:
893 	    case 0x1e45: case 0x1e47: case 0x1e49: case 0x1e4b:
894 	    case 0xa7a5:
895 		      regmbc('n'); regmbc(0xf1); regmbc(0x144);
896 		      regmbc(0x146); regmbc(0x148); regmbc(0x149);
897 		      regmbc(0x1f9); regmbc(0x1d70); regmbc(0x1d87);
898 		      regmbc(0x1e45); regmbc(0x1e47); regmbc(0x1e49);
899 		      regmbc(0x1e4b); regmbc(0xa7a5);
900 		      return;
901 	    case 'o': case 0xf2: case 0xf3: case 0xf4: case 0xf5:
902 	    case 0xf6: case 0xf8: case 0x14d: case 0x14f: case 0x151:
903 	    case 0x1a1: case 0x1d2: case 0x1eb: case 0x1ed: case 0x1ff:
904 	    case 0x20d: case 0x20f: case 0x22b: case 0x22d: case 0x22f:
905 	    case 0x231: case 0x275: case 0x1e4d: case 0x1e4f:
906 	    case 0x1e51: case 0x1e53: case 0x1ecd: case 0x1ecf:
907 	    case 0x1ed1: case 0x1ed3: case 0x1ed5: case 0x1ed7:
908 	    case 0x1ed9: case 0x1edb: case 0x1edd: case 0x1edf:
909 	    case 0x1ee1: case 0x1ee3:
910 		      regmbc('o'); regmbc(0xf2); regmbc(0xf3);
911 		      regmbc(0xf4); regmbc(0xf5); regmbc(0xf6);
912 		      regmbc(0xf8); regmbc(0x14d); regmbc(0x14f);
913 		      regmbc(0x151); regmbc(0x1a1); regmbc(0x1d2);
914 		      regmbc(0x1eb); regmbc(0x1ed); regmbc(0x1ff);
915 		      regmbc(0x20d); regmbc(0x20f); regmbc(0x22b);
916 		      regmbc(0x22d); regmbc(0x22f); regmbc(0x231);
917 		      regmbc(0x275); regmbc(0x1e4d); regmbc(0x1e4f);
918 		      regmbc(0x1e51); regmbc(0x1e53); regmbc(0x1ecd);
919 		      regmbc(0x1ecf); regmbc(0x1ed1); regmbc(0x1ed3);
920 		      regmbc(0x1ed5); regmbc(0x1ed7); regmbc(0x1ed9);
921 		      regmbc(0x1edb); regmbc(0x1edd); regmbc(0x1edf);
922 		      regmbc(0x1ee1); regmbc(0x1ee3);
923 		      return;
924 	    case 'p': case 0x1a5: case 0x1d71: case 0x1d88: case 0x1d7d:
925 	    case 0x1e55: case 0x1e57:
926 		      regmbc('p'); regmbc(0x1a5); regmbc(0x1d71);
927 		      regmbc(0x1d7d); regmbc(0x1d88); regmbc(0x1e55);
928 		      regmbc(0x1e57);
929 		      return;
930 	    case 'q': case 0x24b: case 0x2a0:
931 		      regmbc('q'); regmbc(0x24b); regmbc(0x2a0);
932 		      return;
933 	    case 'r': case 0x155: case 0x157: case 0x159: case 0x211:
934 	    case 0x213: case 0x24d: case 0x27d: case 0x1d72: case 0x1d73:
935 	    case 0x1d89: case 0x1e59: case 0x1e5b: case 0x1e5d: case 0x1e5f:
936 	    case 0xa7a7:
937 		      regmbc('r'); regmbc(0x155); regmbc(0x157);
938 		      regmbc(0x159); regmbc(0x211); regmbc(0x213);
939 		      regmbc(0x24d); regmbc(0x1d72); regmbc(0x1d73);
940 		      regmbc(0x1d89); regmbc(0x1e59); regmbc(0x27d);
941 		      regmbc(0x1e5b); regmbc(0x1e5d); regmbc(0x1e5f);
942 		      regmbc(0xa7a7);
943 		      return;
944 	    case 's': case 0x15b: case 0x15d: case 0x15f: case 0x161:
945 	    case 0x1e61: case 0x219: case 0x23f: case 0x1d74: case 0x1d8a:
946 	    case 0x1e63: case 0x1e65: case 0x1e67: case 0x1e69: case 0xa7a9:
947 		      regmbc('s'); regmbc(0x15b); regmbc(0x15d);
948 		      regmbc(0x15f); regmbc(0x161); regmbc(0x23f);
949 		      regmbc(0x219); regmbc(0x1d74); regmbc(0x1d8a);
950 		      regmbc(0x1e61); regmbc(0x1e63); regmbc(0x1e65);
951 		      regmbc(0x1e67); regmbc(0x1e69); regmbc(0xa7a9);
952 		      return;
953 	    case 't': case 0x163: case 0x165: case 0x167: case 0x1ab:
954 	    case 0x1ad: case 0x21b: case 0x288: case 0x1d75: case 0x1e6b:
955 	    case 0x1e6d: case 0x1e6f: case 0x1e71: case 0x1e97: case 0x2c66:
956 		      regmbc('t'); regmbc(0x163); regmbc(0x165);
957 		      regmbc(0x167); regmbc(0x1ab); regmbc(0x21b);
958 		      regmbc(0x1ad); regmbc(0x288); regmbc(0x1d75);
959 		      regmbc(0x1e6b); regmbc(0x1e6d); regmbc(0x1e6f);
960 		      regmbc(0x1e71); regmbc(0x1e97); regmbc(0x2c66);
961 		      return;
962 	    case 'u': case 0xf9: case 0xfa: case 0xfb: case 0xfc:
963 	    case 0x169: case 0x16b: case 0x16d: case 0x16f:
964 	    case 0x171: case 0x173: case 0x1b0: case 0x1d4:
965 	    case 0x1d6: case 0x1d8: case 0x1da: case 0x1dc:
966 	    case 0x215: case 0x217: case 0x289: case 0x1e73:
967 	    case 0x1d7e: case 0x1d99: case 0x1e75: case 0x1e77:
968 	    case 0x1e79: case 0x1e7b: case 0x1ee5: case 0x1ee7:
969 	    case 0x1ee9: case 0x1eeb: case 0x1eed: case 0x1eef:
970 	    case 0x1ef1:
971 		      regmbc('u'); regmbc(0xf9); regmbc(0xfa);
972 		      regmbc(0xfb); regmbc(0xfc); regmbc(0x169);
973 		      regmbc(0x16b); regmbc(0x16d); regmbc(0x16f);
974 		      regmbc(0x171); regmbc(0x173); regmbc(0x1d6);
975 		      regmbc(0x1d8); regmbc(0x1da); regmbc(0x1dc);
976 		      regmbc(0x215); regmbc(0x217); regmbc(0x1b0);
977 		      regmbc(0x1d4); regmbc(0x289); regmbc(0x1d7e);
978 		      regmbc(0x1d99); regmbc(0x1e73); regmbc(0x1e75);
979 		      regmbc(0x1e77); regmbc(0x1e79); regmbc(0x1e7b);
980 		      regmbc(0x1ee5); regmbc(0x1ee7); regmbc(0x1ee9);
981 		      regmbc(0x1eeb); regmbc(0x1eed); regmbc(0x1eef);
982 		      regmbc(0x1ef1);
983 		      return;
984 	    case 'v': case 0x28b: case 0x1d8c: case 0x1e7d: case 0x1e7f:
985 		      regmbc('v'); regmbc(0x28b); regmbc(0x1d8c);
986 		      regmbc(0x1e7d); regmbc(0x1e7f);
987 		      return;
988 	    case 'w': case 0x175: case 0x1e81: case 0x1e83:
989 	    case 0x1e85: case 0x1e87: case 0x1e89: case 0x1e98:
990 		      regmbc('w'); regmbc(0x175); regmbc(0x1e81);
991 		      regmbc(0x1e83); regmbc(0x1e85); regmbc(0x1e87);
992 		      regmbc(0x1e89); regmbc(0x1e98);
993 		      return;
994 	    case 'x': case 0x1e8b: case 0x1e8d:
995 		      regmbc('x'); regmbc(0x1e8b); regmbc(0x1e8d);
996 		      return;
997 	    case 'y': case 0xfd: case 0xff: case 0x177: case 0x1b4:
998 	    case 0x233: case 0x24f: case 0x1e8f: case 0x1e99: case 0x1ef3:
999 	    case 0x1ef5: case 0x1ef7: case 0x1ef9:
1000 		      regmbc('y'); regmbc(0xfd); regmbc(0xff);
1001 		      regmbc(0x177); regmbc(0x1b4); regmbc(0x233);
1002 		      regmbc(0x24f); regmbc(0x1e8f); regmbc(0x1e99);
1003 		      regmbc(0x1ef3); regmbc(0x1ef5); regmbc(0x1ef7);
1004 		      regmbc(0x1ef9);
1005 		      return;
1006 	    case 'z': case 0x17a: case 0x17c: case 0x17e: case 0x1b6:
1007 	    case 0x1d76: case 0x1d8e: case 0x1e91: case 0x1e93:
1008 	    case 0x1e95: case 0x2c6c:
1009 		      regmbc('z'); regmbc(0x17a); regmbc(0x17c);
1010 		      regmbc(0x17e); regmbc(0x1b6); regmbc(0x1d76);
1011 		      regmbc(0x1d8e); regmbc(0x1e91); regmbc(0x1e93);
1012 		      regmbc(0x1e95); regmbc(0x2c6c);
1013 		      return;
1014 	}
1015 #endif
1016     }
1017     regmbc(c);
1018 }
1019 
1020 /*
1021  * Emit a node.
1022  * Return pointer to generated code.
1023  */
1024     static char_u *
regnode(int op)1025 regnode(int op)
1026 {
1027     char_u  *ret;
1028 
1029     ret = regcode;
1030     if (ret == JUST_CALC_SIZE)
1031 	regsize += 3;
1032     else
1033     {
1034 	*regcode++ = op;
1035 	*regcode++ = NUL;		// Null "next" pointer.
1036 	*regcode++ = NUL;
1037     }
1038     return ret;
1039 }
1040 
1041 /*
1042  * Write a long as four bytes at "p" and return pointer to the next char.
1043  */
1044     static char_u *
re_put_long(char_u * p,long_u val)1045 re_put_long(char_u *p, long_u val)
1046 {
1047     *p++ = (char_u) ((val >> 24) & 0377);
1048     *p++ = (char_u) ((val >> 16) & 0377);
1049     *p++ = (char_u) ((val >> 8) & 0377);
1050     *p++ = (char_u) (val & 0377);
1051     return p;
1052 }
1053 
1054 /*
1055  * regnext - dig the "next" pointer out of a node
1056  * Returns NULL when calculating size, when there is no next item and when
1057  * there is an error.
1058  */
1059     static char_u *
regnext(char_u * p)1060 regnext(char_u *p)
1061 {
1062     int	    offset;
1063 
1064     if (p == JUST_CALC_SIZE || reg_toolong)
1065 	return NULL;
1066 
1067     offset = NEXT(p);
1068     if (offset == 0)
1069 	return NULL;
1070 
1071     if (OP(p) == BACK)
1072 	return p - offset;
1073     else
1074 	return p + offset;
1075 }
1076 
1077 /*
1078  * Set the next-pointer at the end of a node chain.
1079  */
1080     static void
regtail(char_u * p,char_u * val)1081 regtail(char_u *p, char_u *val)
1082 {
1083     char_u	*scan;
1084     char_u	*temp;
1085     int		offset;
1086 
1087     if (p == JUST_CALC_SIZE)
1088 	return;
1089 
1090     // Find last node.
1091     scan = p;
1092     for (;;)
1093     {
1094 	temp = regnext(scan);
1095 	if (temp == NULL)
1096 	    break;
1097 	scan = temp;
1098     }
1099 
1100     if (OP(scan) == BACK)
1101 	offset = (int)(scan - val);
1102     else
1103 	offset = (int)(val - scan);
1104     // When the offset uses more than 16 bits it can no longer fit in the two
1105     // bytes available.  Use a global flag to avoid having to check return
1106     // values in too many places.
1107     if (offset > 0xffff)
1108 	reg_toolong = TRUE;
1109     else
1110     {
1111 	*(scan + 1) = (char_u) (((unsigned)offset >> 8) & 0377);
1112 	*(scan + 2) = (char_u) (offset & 0377);
1113     }
1114 }
1115 
1116 /*
1117  * Like regtail, on item after a BRANCH; nop if none.
1118  */
1119     static void
regoptail(char_u * p,char_u * val)1120 regoptail(char_u *p, char_u *val)
1121 {
1122     // When op is neither BRANCH nor BRACE_COMPLEX0-9, it is "operandless"
1123     if (p == NULL || p == JUST_CALC_SIZE
1124 	    || (OP(p) != BRANCH
1125 		&& (OP(p) < BRACE_COMPLEX || OP(p) > BRACE_COMPLEX + 9)))
1126 	return;
1127     regtail(OPERAND(p), val);
1128 }
1129 
1130 /*
1131  * Insert an operator in front of already-emitted operand
1132  *
1133  * Means relocating the operand.
1134  */
1135     static void
reginsert(int op,char_u * opnd)1136 reginsert(int op, char_u *opnd)
1137 {
1138     char_u	*src;
1139     char_u	*dst;
1140     char_u	*place;
1141 
1142     if (regcode == JUST_CALC_SIZE)
1143     {
1144 	regsize += 3;
1145 	return;
1146     }
1147     src = regcode;
1148     regcode += 3;
1149     dst = regcode;
1150     while (src > opnd)
1151 	*--dst = *--src;
1152 
1153     place = opnd;		// Op node, where operand used to be.
1154     *place++ = op;
1155     *place++ = NUL;
1156     *place = NUL;
1157 }
1158 
1159 /*
1160  * Insert an operator in front of already-emitted operand.
1161  * Add a number to the operator.
1162  */
1163     static void
reginsert_nr(int op,long val,char_u * opnd)1164 reginsert_nr(int op, long val, char_u *opnd)
1165 {
1166     char_u	*src;
1167     char_u	*dst;
1168     char_u	*place;
1169 
1170     if (regcode == JUST_CALC_SIZE)
1171     {
1172 	regsize += 7;
1173 	return;
1174     }
1175     src = regcode;
1176     regcode += 7;
1177     dst = regcode;
1178     while (src > opnd)
1179 	*--dst = *--src;
1180 
1181     place = opnd;		// Op node, where operand used to be.
1182     *place++ = op;
1183     *place++ = NUL;
1184     *place++ = NUL;
1185     re_put_long(place, (long_u)val);
1186 }
1187 
1188 /*
1189  * Insert an operator in front of already-emitted operand.
1190  * The operator has the given limit values as operands.  Also set next pointer.
1191  *
1192  * Means relocating the operand.
1193  */
1194     static void
reginsert_limits(int op,long minval,long maxval,char_u * opnd)1195 reginsert_limits(
1196     int		op,
1197     long	minval,
1198     long	maxval,
1199     char_u	*opnd)
1200 {
1201     char_u	*src;
1202     char_u	*dst;
1203     char_u	*place;
1204 
1205     if (regcode == JUST_CALC_SIZE)
1206     {
1207 	regsize += 11;
1208 	return;
1209     }
1210     src = regcode;
1211     regcode += 11;
1212     dst = regcode;
1213     while (src > opnd)
1214 	*--dst = *--src;
1215 
1216     place = opnd;		// Op node, where operand used to be.
1217     *place++ = op;
1218     *place++ = NUL;
1219     *place++ = NUL;
1220     place = re_put_long(place, (long_u)minval);
1221     place = re_put_long(place, (long_u)maxval);
1222     regtail(opnd, place);
1223 }
1224 
1225 /*
1226  * Return TRUE if the back reference is legal. We must have seen the close
1227  * brace.
1228  * TODO: Should also check that we don't refer to something that is repeated
1229  * (+*=): what instance of the repetition should we match?
1230  */
1231     static int
seen_endbrace(int refnum)1232 seen_endbrace(int refnum)
1233 {
1234     if (!had_endbrace[refnum])
1235     {
1236 	char_u *p;
1237 
1238 	// Trick: check if "@<=" or "@<!" follows, in which case
1239 	// the \1 can appear before the referenced match.
1240 	for (p = regparse; *p != NUL; ++p)
1241 	    if (p[0] == '@' && p[1] == '<' && (p[2] == '!' || p[2] == '='))
1242 		break;
1243 	if (*p == NUL)
1244 	{
1245 	    emsg(_("E65: Illegal back reference"));
1246 	    rc_did_emsg = TRUE;
1247 	    return FALSE;
1248 	}
1249     }
1250     return TRUE;
1251 }
1252 
1253 /*
1254  * Parse the lowest level.
1255  *
1256  * Optimization:  gobbles an entire sequence of ordinary characters so that
1257  * it can turn them into a single node, which is smaller to store and
1258  * faster to run.  Don't do this when one_exactly is set.
1259  */
1260     static char_u *
regatom(int * flagp)1261 regatom(int *flagp)
1262 {
1263     char_u	    *ret;
1264     int		    flags;
1265     int		    c;
1266     char_u	    *p;
1267     int		    extra = 0;
1268     int		    save_prev_at_start = prev_at_start;
1269 
1270     *flagp = WORST;		// Tentatively.
1271 
1272     c = getchr();
1273     switch (c)
1274     {
1275       case Magic('^'):
1276 	ret = regnode(BOL);
1277 	break;
1278 
1279       case Magic('$'):
1280 	ret = regnode(EOL);
1281 #if defined(FEAT_SYN_HL) || defined(PROTO)
1282 	had_eol = TRUE;
1283 #endif
1284 	break;
1285 
1286       case Magic('<'):
1287 	ret = regnode(BOW);
1288 	break;
1289 
1290       case Magic('>'):
1291 	ret = regnode(EOW);
1292 	break;
1293 
1294       case Magic('_'):
1295 	c = no_Magic(getchr());
1296 	if (c == '^')		// "\_^" is start-of-line
1297 	{
1298 	    ret = regnode(BOL);
1299 	    break;
1300 	}
1301 	if (c == '$')		// "\_$" is end-of-line
1302 	{
1303 	    ret = regnode(EOL);
1304 #if defined(FEAT_SYN_HL) || defined(PROTO)
1305 	    had_eol = TRUE;
1306 #endif
1307 	    break;
1308 	}
1309 
1310 	extra = ADD_NL;
1311 	*flagp |= HASNL;
1312 
1313 	// "\_[" is character range plus newline
1314 	if (c == '[')
1315 	    goto collection;
1316 
1317 	// "\_x" is character class plus newline
1318 	// FALLTHROUGH
1319 
1320 	// Character classes.
1321       case Magic('.'):
1322       case Magic('i'):
1323       case Magic('I'):
1324       case Magic('k'):
1325       case Magic('K'):
1326       case Magic('f'):
1327       case Magic('F'):
1328       case Magic('p'):
1329       case Magic('P'):
1330       case Magic('s'):
1331       case Magic('S'):
1332       case Magic('d'):
1333       case Magic('D'):
1334       case Magic('x'):
1335       case Magic('X'):
1336       case Magic('o'):
1337       case Magic('O'):
1338       case Magic('w'):
1339       case Magic('W'):
1340       case Magic('h'):
1341       case Magic('H'):
1342       case Magic('a'):
1343       case Magic('A'):
1344       case Magic('l'):
1345       case Magic('L'):
1346       case Magic('u'):
1347       case Magic('U'):
1348 	p = vim_strchr(classchars, no_Magic(c));
1349 	if (p == NULL)
1350 	    EMSG_RET_NULL(_("E63: invalid use of \\_"));
1351 
1352 	// When '.' is followed by a composing char ignore the dot, so that
1353 	// the composing char is matched here.
1354 	if (enc_utf8 && c == Magic('.') && utf_iscomposing(peekchr()))
1355 	{
1356 	    c = getchr();
1357 	    goto do_multibyte;
1358 	}
1359 	ret = regnode(classcodes[p - classchars] + extra);
1360 	*flagp |= HASWIDTH | SIMPLE;
1361 	break;
1362 
1363       case Magic('n'):
1364 	if (reg_string)
1365 	{
1366 	    // In a string "\n" matches a newline character.
1367 	    ret = regnode(EXACTLY);
1368 	    regc(NL);
1369 	    regc(NUL);
1370 	    *flagp |= HASWIDTH | SIMPLE;
1371 	}
1372 	else
1373 	{
1374 	    // In buffer text "\n" matches the end of a line.
1375 	    ret = regnode(NEWL);
1376 	    *flagp |= HASWIDTH | HASNL;
1377 	}
1378 	break;
1379 
1380       case Magic('('):
1381 	if (one_exactly)
1382 	    EMSG_ONE_RET_NULL;
1383 	ret = reg(REG_PAREN, &flags);
1384 	if (ret == NULL)
1385 	    return NULL;
1386 	*flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
1387 	break;
1388 
1389       case NUL:
1390       case Magic('|'):
1391       case Magic('&'):
1392       case Magic(')'):
1393 	if (one_exactly)
1394 	    EMSG_ONE_RET_NULL;
1395 	IEMSG_RET_NULL(_(e_internal));	// Supposed to be caught earlier.
1396 	// NOTREACHED
1397 
1398       case Magic('='):
1399       case Magic('?'):
1400       case Magic('+'):
1401       case Magic('@'):
1402       case Magic('{'):
1403       case Magic('*'):
1404 	c = no_Magic(c);
1405 	EMSG3_RET_NULL(_("E64: %s%c follows nothing"),
1406 		(c == '*' ? reg_magic >= MAGIC_ON : reg_magic == MAGIC_ALL), c);
1407 	// NOTREACHED
1408 
1409       case Magic('~'):		// previous substitute pattern
1410 	    if (reg_prev_sub != NULL)
1411 	    {
1412 		char_u	    *lp;
1413 
1414 		ret = regnode(EXACTLY);
1415 		lp = reg_prev_sub;
1416 		while (*lp != NUL)
1417 		    regc(*lp++);
1418 		regc(NUL);
1419 		if (*reg_prev_sub != NUL)
1420 		{
1421 		    *flagp |= HASWIDTH;
1422 		    if ((lp - reg_prev_sub) == 1)
1423 			*flagp |= SIMPLE;
1424 		}
1425 	    }
1426 	    else
1427 		EMSG_RET_NULL(_(e_no_previous_substitute_regular_expression));
1428 	    break;
1429 
1430       case Magic('1'):
1431       case Magic('2'):
1432       case Magic('3'):
1433       case Magic('4'):
1434       case Magic('5'):
1435       case Magic('6'):
1436       case Magic('7'):
1437       case Magic('8'):
1438       case Magic('9'):
1439 	    {
1440 		int		    refnum;
1441 
1442 		refnum = c - Magic('0');
1443 		if (!seen_endbrace(refnum))
1444 		    return NULL;
1445 		ret = regnode(BACKREF + refnum);
1446 	    }
1447 	    break;
1448 
1449       case Magic('z'):
1450 	{
1451 	    c = no_Magic(getchr());
1452 	    switch (c)
1453 	    {
1454 #ifdef FEAT_SYN_HL
1455 		case '(': if ((reg_do_extmatch & REX_SET) == 0)
1456 			      EMSG_RET_NULL(_(e_z_not_allowed));
1457 			  if (one_exactly)
1458 			      EMSG_ONE_RET_NULL;
1459 			  ret = reg(REG_ZPAREN, &flags);
1460 			  if (ret == NULL)
1461 			      return NULL;
1462 			  *flagp |= flags & (HASWIDTH|SPSTART|HASNL|HASLOOKBH);
1463 			  re_has_z = REX_SET;
1464 			  break;
1465 
1466 		case '1':
1467 		case '2':
1468 		case '3':
1469 		case '4':
1470 		case '5':
1471 		case '6':
1472 		case '7':
1473 		case '8':
1474 		case '9': if ((reg_do_extmatch & REX_USE) == 0)
1475 			      EMSG_RET_NULL(_(e_z1_not_allowed));
1476 			  ret = regnode(ZREF + c - '0');
1477 			  re_has_z = REX_USE;
1478 			  break;
1479 #endif
1480 
1481 		case 's': ret = regnode(MOPEN + 0);
1482 			  if (re_mult_next("\\zs") == FAIL)
1483 			      return NULL;
1484 			  break;
1485 
1486 		case 'e': ret = regnode(MCLOSE + 0);
1487 			  if (re_mult_next("\\ze") == FAIL)
1488 			      return NULL;
1489 			  break;
1490 
1491 		default:  EMSG_RET_NULL(_("E68: Invalid character after \\z"));
1492 	    }
1493 	}
1494 	break;
1495 
1496       case Magic('%'):
1497 	{
1498 	    c = no_Magic(getchr());
1499 	    switch (c)
1500 	    {
1501 		// () without a back reference
1502 		case '(':
1503 		    if (one_exactly)
1504 			EMSG_ONE_RET_NULL;
1505 		    ret = reg(REG_NPAREN, &flags);
1506 		    if (ret == NULL)
1507 			return NULL;
1508 		    *flagp |= flags & (HASWIDTH | SPSTART | HASNL | HASLOOKBH);
1509 		    break;
1510 
1511 		// Catch \%^ and \%$ regardless of where they appear in the
1512 		// pattern -- regardless of whether or not it makes sense.
1513 		case '^':
1514 		    ret = regnode(RE_BOF);
1515 		    break;
1516 
1517 		case '$':
1518 		    ret = regnode(RE_EOF);
1519 		    break;
1520 
1521 		case '#':
1522 		    ret = regnode(CURSOR);
1523 		    break;
1524 
1525 		case 'V':
1526 		    ret = regnode(RE_VISUAL);
1527 		    break;
1528 
1529 		case 'C':
1530 		    ret = regnode(RE_COMPOSING);
1531 		    break;
1532 
1533 		// \%[abc]: Emit as a list of branches, all ending at the last
1534 		// branch which matches nothing.
1535 		case '[':
1536 			  if (one_exactly)	// doesn't nest
1537 			      EMSG_ONE_RET_NULL;
1538 			  {
1539 			      char_u	*lastbranch;
1540 			      char_u	*lastnode = NULL;
1541 			      char_u	*br;
1542 
1543 			      ret = NULL;
1544 			      while ((c = getchr()) != ']')
1545 			      {
1546 				  if (c == NUL)
1547 				      EMSG2_RET_NULL(_(e_missing_sb),
1548 						      reg_magic == MAGIC_ALL);
1549 				  br = regnode(BRANCH);
1550 				  if (ret == NULL)
1551 				      ret = br;
1552 				  else
1553 				  {
1554 				      regtail(lastnode, br);
1555 				      if (reg_toolong)
1556 					  return NULL;
1557 				  }
1558 
1559 				  ungetchr();
1560 				  one_exactly = TRUE;
1561 				  lastnode = regatom(flagp);
1562 				  one_exactly = FALSE;
1563 				  if (lastnode == NULL)
1564 				      return NULL;
1565 			      }
1566 			      if (ret == NULL)
1567 				  EMSG2_RET_NULL(_(e_empty_sb),
1568 						      reg_magic == MAGIC_ALL);
1569 			      lastbranch = regnode(BRANCH);
1570 			      br = regnode(NOTHING);
1571 			      if (ret != JUST_CALC_SIZE)
1572 			      {
1573 				  regtail(lastnode, br);
1574 				  regtail(lastbranch, br);
1575 				  // connect all branches to the NOTHING
1576 				  // branch at the end
1577 				  for (br = ret; br != lastnode; )
1578 				  {
1579 				      if (OP(br) == BRANCH)
1580 				      {
1581 					  regtail(br, lastbranch);
1582 					  if (reg_toolong)
1583 					      return NULL;
1584 					  br = OPERAND(br);
1585 				      }
1586 				      else
1587 					  br = regnext(br);
1588 				  }
1589 			      }
1590 			      *flagp &= ~(HASWIDTH | SIMPLE);
1591 			      break;
1592 			  }
1593 
1594 		case 'd':   // %d123 decimal
1595 		case 'o':   // %o123 octal
1596 		case 'x':   // %xab hex 2
1597 		case 'u':   // %uabcd hex 4
1598 		case 'U':   // %U1234abcd hex 8
1599 			  {
1600 			      long i;
1601 
1602 			      switch (c)
1603 			      {
1604 				  case 'd': i = getdecchrs(); break;
1605 				  case 'o': i = getoctchrs(); break;
1606 				  case 'x': i = gethexchrs(2); break;
1607 				  case 'u': i = gethexchrs(4); break;
1608 				  case 'U': i = gethexchrs(8); break;
1609 				  default:  i = -1; break;
1610 			      }
1611 
1612 			      if (i < 0 || i > INT_MAX)
1613 				  EMSG2_RET_NULL(
1614 					_("E678: Invalid character after %s%%[dxouU]"),
1615 					reg_magic == MAGIC_ALL);
1616 			      if (use_multibytecode(i))
1617 				  ret = regnode(MULTIBYTECODE);
1618 			      else
1619 				  ret = regnode(EXACTLY);
1620 			      if (i == 0)
1621 				  regc(0x0a);
1622 			      else
1623 				  regmbc(i);
1624 			      regc(NUL);
1625 			      *flagp |= HASWIDTH;
1626 			      break;
1627 			  }
1628 
1629 		default:
1630 			  if (VIM_ISDIGIT(c) || c == '<' || c == '>'
1631 						|| c == '\'' || c == '.')
1632 			  {
1633 			      long_u	n = 0;
1634 			      int	cmp;
1635 			      int	cur = FALSE;
1636 
1637 			      cmp = c;
1638 			      if (cmp == '<' || cmp == '>')
1639 				  c = getchr();
1640 			      if (no_Magic(c) == '.')
1641 			      {
1642 				  cur = TRUE;
1643 				  c = getchr();
1644 			      }
1645 			      while (VIM_ISDIGIT(c))
1646 			      {
1647 				  n = n * 10 + (c - '0');
1648 				  c = getchr();
1649 			      }
1650 			      if (c == '\'' && n == 0)
1651 			      {
1652 				  // "\%'m", "\%<'m" and "\%>'m": Mark
1653 				  c = getchr();
1654 				  ret = regnode(RE_MARK);
1655 				  if (ret == JUST_CALC_SIZE)
1656 				      regsize += 2;
1657 				  else
1658 				  {
1659 				      *regcode++ = c;
1660 				      *regcode++ = cmp;
1661 				  }
1662 				  break;
1663 			      }
1664 			      else if (c == 'l' || c == 'c' || c == 'v')
1665 			      {
1666 				  if (cur && n)
1667 				  {
1668 				    semsg(_(e_regexp_number_after_dot_pos_search), no_Magic(c));
1669 				    rc_did_emsg = TRUE;
1670 				    return NULL;
1671 				  }
1672 				  if (c == 'l')
1673 				  {
1674 				      if (cur)
1675 					  n = curwin->w_cursor.lnum;
1676 				      ret = regnode(RE_LNUM);
1677 				      if (save_prev_at_start)
1678 					  at_start = TRUE;
1679 				  }
1680 				  else if (c == 'c')
1681 				  {
1682 				      if (cur)
1683 				      {
1684 					  n = curwin->w_cursor.col;
1685 					  n++;
1686 				      }
1687 				      ret = regnode(RE_COL);
1688 				  }
1689 				  else
1690 				  {
1691 				      if (cur)
1692 				      {
1693 					  colnr_T vcol = 0;
1694 
1695 					  getvvcol(curwin, &curwin->w_cursor,
1696 							    NULL, NULL, &vcol);
1697 					  ++vcol;
1698 					  n = vcol;
1699 				      }
1700 				      ret = regnode(RE_VCOL);
1701 				  }
1702 				  if (ret == JUST_CALC_SIZE)
1703 				      regsize += 5;
1704 				  else
1705 				  {
1706 				      // put the number and the optional
1707 				      // comparator after the opcode
1708 				      regcode = re_put_long(regcode, n);
1709 				      *regcode++ = cmp;
1710 				  }
1711 				  break;
1712 			      }
1713 			  }
1714 
1715 			  EMSG2_RET_NULL(_("E71: Invalid character after %s%%"),
1716 						      reg_magic == MAGIC_ALL);
1717 	    }
1718 	}
1719 	break;
1720 
1721       case Magic('['):
1722 collection:
1723 	{
1724 	    char_u	*lp;
1725 
1726 	    // If there is no matching ']', we assume the '[' is a normal
1727 	    // character.  This makes 'incsearch' and ":help [" work.
1728 	    lp = skip_anyof(regparse);
1729 	    if (*lp == ']')	// there is a matching ']'
1730 	    {
1731 		int	startc = -1;	// > 0 when next '-' is a range
1732 		int	endc;
1733 
1734 		// In a character class, different parsing rules apply.
1735 		// Not even \ is special anymore, nothing is.
1736 		if (*regparse == '^')	    // Complement of range.
1737 		{
1738 		    ret = regnode(ANYBUT + extra);
1739 		    regparse++;
1740 		}
1741 		else
1742 		    ret = regnode(ANYOF + extra);
1743 
1744 		// At the start ']' and '-' mean the literal character.
1745 		if (*regparse == ']' || *regparse == '-')
1746 		{
1747 		    startc = *regparse;
1748 		    regc(*regparse++);
1749 		}
1750 
1751 		while (*regparse != NUL && *regparse != ']')
1752 		{
1753 		    if (*regparse == '-')
1754 		    {
1755 			++regparse;
1756 			// The '-' is not used for a range at the end and
1757 			// after or before a '\n'.
1758 			if (*regparse == ']' || *regparse == NUL
1759 				|| startc == -1
1760 				|| (regparse[0] == '\\' && regparse[1] == 'n'))
1761 			{
1762 			    regc('-');
1763 			    startc = '-';	// [--x] is a range
1764 			}
1765 			else
1766 			{
1767 			    // Also accept "a-[.z.]"
1768 			    endc = 0;
1769 			    if (*regparse == '[')
1770 				endc = get_coll_element(&regparse);
1771 			    if (endc == 0)
1772 			    {
1773 				if (has_mbyte)
1774 				    endc = mb_ptr2char_adv(&regparse);
1775 				else
1776 				    endc = *regparse++;
1777 			    }
1778 
1779 			    // Handle \o40, \x20 and \u20AC style sequences
1780 			    if (endc == '\\' && !reg_cpo_lit && !reg_cpo_bsl)
1781 				endc = coll_get_char();
1782 
1783 			    if (startc > endc)
1784 				EMSG_RET_NULL(_(e_reverse_range));
1785 			    if (has_mbyte && ((*mb_char2len)(startc) > 1
1786 						 || (*mb_char2len)(endc) > 1))
1787 			    {
1788 				// Limit to a range of 256 chars.
1789 				if (endc > startc + 256)
1790 				    EMSG_RET_NULL(_(e_large_class));
1791 				while (++startc <= endc)
1792 				    regmbc(startc);
1793 			    }
1794 			    else
1795 			    {
1796 #ifdef EBCDIC
1797 				int	alpha_only = FALSE;
1798 
1799 				// for alphabetical range skip the gaps
1800 				// 'i'-'j', 'r'-'s', 'I'-'J' and 'R'-'S'.
1801 				if (isalpha(startc) && isalpha(endc))
1802 				    alpha_only = TRUE;
1803 #endif
1804 				while (++startc <= endc)
1805 #ifdef EBCDIC
1806 				    if (!alpha_only || isalpha(startc))
1807 #endif
1808 					regc(startc);
1809 			    }
1810 			    startc = -1;
1811 			}
1812 		    }
1813 		    // Only "\]", "\^", "\]" and "\\" are special in Vi.  Vim
1814 		    // accepts "\t", "\e", etc., but only when the 'l' flag in
1815 		    // 'cpoptions' is not included.
1816 		    // Posix doesn't recognize backslash at all.
1817 		    else if (*regparse == '\\'
1818 			    && !reg_cpo_bsl
1819 			    && (vim_strchr(REGEXP_INRANGE, regparse[1]) != NULL
1820 				|| (!reg_cpo_lit
1821 				    && vim_strchr(REGEXP_ABBR,
1822 						       regparse[1]) != NULL)))
1823 		    {
1824 			regparse++;
1825 			if (*regparse == 'n')
1826 			{
1827 			    // '\n' in range: also match NL
1828 			    if (ret != JUST_CALC_SIZE)
1829 			    {
1830 				// Using \n inside [^] does not change what
1831 				// matches. "[^\n]" is the same as ".".
1832 				if (*ret == ANYOF)
1833 				{
1834 				    *ret = ANYOF + ADD_NL;
1835 				    *flagp |= HASNL;
1836 				}
1837 				// else: must have had a \n already
1838 			    }
1839 			    regparse++;
1840 			    startc = -1;
1841 			}
1842 			else if (*regparse == 'd'
1843 				|| *regparse == 'o'
1844 				|| *regparse == 'x'
1845 				|| *regparse == 'u'
1846 				|| *regparse == 'U')
1847 			{
1848 			    startc = coll_get_char();
1849 			    if (startc == 0)
1850 				regc(0x0a);
1851 			    else
1852 				regmbc(startc);
1853 			}
1854 			else
1855 			{
1856 			    startc = backslash_trans(*regparse++);
1857 			    regc(startc);
1858 			}
1859 		    }
1860 		    else if (*regparse == '[')
1861 		    {
1862 			int c_class;
1863 			int cu;
1864 
1865 			c_class = get_char_class(&regparse);
1866 			startc = -1;
1867 			// Characters assumed to be 8 bits!
1868 			switch (c_class)
1869 			{
1870 			    case CLASS_NONE:
1871 				c_class = get_equi_class(&regparse);
1872 				if (c_class != 0)
1873 				{
1874 				    // produce equivalence class
1875 				    reg_equi_class(c_class);
1876 				}
1877 				else if ((c_class =
1878 					    get_coll_element(&regparse)) != 0)
1879 				{
1880 				    // produce a collating element
1881 				    regmbc(c_class);
1882 				}
1883 				else
1884 				{
1885 				    // literal '[', allow [[-x] as a range
1886 				    startc = *regparse++;
1887 				    regc(startc);
1888 				}
1889 				break;
1890 			    case CLASS_ALNUM:
1891 				for (cu = 1; cu < 128; cu++)
1892 				    if (isalnum(cu))
1893 					regmbc(cu);
1894 				break;
1895 			    case CLASS_ALPHA:
1896 				for (cu = 1; cu < 128; cu++)
1897 				    if (isalpha(cu))
1898 					regmbc(cu);
1899 				break;
1900 			    case CLASS_BLANK:
1901 				regc(' ');
1902 				regc('\t');
1903 				break;
1904 			    case CLASS_CNTRL:
1905 				for (cu = 1; cu <= 127; cu++)
1906 				    if (iscntrl(cu))
1907 					regmbc(cu);
1908 				break;
1909 			    case CLASS_DIGIT:
1910 				for (cu = 1; cu <= 127; cu++)
1911 				    if (VIM_ISDIGIT(cu))
1912 					regmbc(cu);
1913 				break;
1914 			    case CLASS_GRAPH:
1915 				for (cu = 1; cu <= 127; cu++)
1916 				    if (isgraph(cu))
1917 					regmbc(cu);
1918 				break;
1919 			    case CLASS_LOWER:
1920 				for (cu = 1; cu <= 255; cu++)
1921 				    if (MB_ISLOWER(cu) && cu != 170
1922 								 && cu != 186)
1923 					regmbc(cu);
1924 				break;
1925 			    case CLASS_PRINT:
1926 				for (cu = 1; cu <= 255; cu++)
1927 				    if (vim_isprintc(cu))
1928 					regmbc(cu);
1929 				break;
1930 			    case CLASS_PUNCT:
1931 				for (cu = 1; cu < 128; cu++)
1932 				    if (ispunct(cu))
1933 					regmbc(cu);
1934 				break;
1935 			    case CLASS_SPACE:
1936 				for (cu = 9; cu <= 13; cu++)
1937 				    regc(cu);
1938 				regc(' ');
1939 				break;
1940 			    case CLASS_UPPER:
1941 				for (cu = 1; cu <= 255; cu++)
1942 				    if (MB_ISUPPER(cu))
1943 					regmbc(cu);
1944 				break;
1945 			    case CLASS_XDIGIT:
1946 				for (cu = 1; cu <= 255; cu++)
1947 				    if (vim_isxdigit(cu))
1948 					regmbc(cu);
1949 				break;
1950 			    case CLASS_TAB:
1951 				regc('\t');
1952 				break;
1953 			    case CLASS_RETURN:
1954 				regc('\r');
1955 				break;
1956 			    case CLASS_BACKSPACE:
1957 				regc('\b');
1958 				break;
1959 			    case CLASS_ESCAPE:
1960 				regc('\033');
1961 				break;
1962 			    case CLASS_IDENT:
1963 				for (cu = 1; cu <= 255; cu++)
1964 				    if (vim_isIDc(cu))
1965 					regmbc(cu);
1966 				break;
1967 			    case CLASS_KEYWORD:
1968 				for (cu = 1; cu <= 255; cu++)
1969 				    if (reg_iswordc(cu))
1970 					regmbc(cu);
1971 				break;
1972 			    case CLASS_FNAME:
1973 				for (cu = 1; cu <= 255; cu++)
1974 				    if (vim_isfilec(cu))
1975 					regmbc(cu);
1976 				break;
1977 			}
1978 		    }
1979 		    else
1980 		    {
1981 			if (has_mbyte)
1982 			{
1983 			    int	len;
1984 
1985 			    // produce a multibyte character, including any
1986 			    // following composing characters
1987 			    startc = mb_ptr2char(regparse);
1988 			    len = (*mb_ptr2len)(regparse);
1989 			    if (enc_utf8 && utf_char2len(startc) != len)
1990 				startc = -1;	// composing chars
1991 			    while (--len >= 0)
1992 				regc(*regparse++);
1993 			}
1994 			else
1995 			{
1996 			    startc = *regparse++;
1997 			    regc(startc);
1998 			}
1999 		    }
2000 		}
2001 		regc(NUL);
2002 		prevchr_len = 1;	// last char was the ']'
2003 		if (*regparse != ']')
2004 		    EMSG_RET_NULL(_(e_toomsbra));	// Cannot happen?
2005 		skipchr();	    // let's be friends with the lexer again
2006 		*flagp |= HASWIDTH | SIMPLE;
2007 		break;
2008 	    }
2009 	    else if (reg_strict)
2010 		EMSG2_RET_NULL(_(e_missingbracket), reg_magic > MAGIC_OFF);
2011 	}
2012 	// FALLTHROUGH
2013 
2014       default:
2015 	{
2016 	    int		len;
2017 
2018 	    // A multi-byte character is handled as a separate atom if it's
2019 	    // before a multi and when it's a composing char.
2020 	    if (use_multibytecode(c))
2021 	    {
2022 do_multibyte:
2023 		ret = regnode(MULTIBYTECODE);
2024 		regmbc(c);
2025 		*flagp |= HASWIDTH | SIMPLE;
2026 		break;
2027 	    }
2028 
2029 	    ret = regnode(EXACTLY);
2030 
2031 	    // Append characters as long as:
2032 	    // - there is no following multi, we then need the character in
2033 	    //   front of it as a single character operand
2034 	    // - not running into a Magic character
2035 	    // - "one_exactly" is not set
2036 	    // But always emit at least one character.  Might be a Multi,
2037 	    // e.g., a "[" without matching "]".
2038 	    for (len = 0; c != NUL && (len == 0
2039 			|| (re_multi_type(peekchr()) == NOT_MULTI
2040 			    && !one_exactly
2041 			    && !is_Magic(c))); ++len)
2042 	    {
2043 		c = no_Magic(c);
2044 		if (has_mbyte)
2045 		{
2046 		    regmbc(c);
2047 		    if (enc_utf8)
2048 		    {
2049 			int	l;
2050 
2051 			// Need to get composing character too.
2052 			for (;;)
2053 			{
2054 			    l = utf_ptr2len(regparse);
2055 			    if (!UTF_COMPOSINGLIKE(regparse, regparse + l))
2056 				break;
2057 			    regmbc(utf_ptr2char(regparse));
2058 			    skipchr();
2059 			}
2060 		    }
2061 		}
2062 		else
2063 		    regc(c);
2064 		c = getchr();
2065 	    }
2066 	    ungetchr();
2067 
2068 	    regc(NUL);
2069 	    *flagp |= HASWIDTH;
2070 	    if (len == 1)
2071 		*flagp |= SIMPLE;
2072 	}
2073 	break;
2074     }
2075 
2076     return ret;
2077 }
2078 
2079 /*
2080  * Parse something followed by possible [*+=].
2081  *
2082  * Note that the branching code sequences used for = and the general cases
2083  * of * and + are somewhat optimized:  they use the same NOTHING node as
2084  * both the endmarker for their branch list and the body of the last branch.
2085  * It might seem that this node could be dispensed with entirely, but the
2086  * endmarker role is not redundant.
2087  */
2088     static char_u *
regpiece(int * flagp)2089 regpiece(int *flagp)
2090 {
2091     char_u	    *ret;
2092     int		    op;
2093     char_u	    *next;
2094     int		    flags;
2095     long	    minval;
2096     long	    maxval;
2097 
2098     ret = regatom(&flags);
2099     if (ret == NULL)
2100 	return NULL;
2101 
2102     op = peekchr();
2103     if (re_multi_type(op) == NOT_MULTI)
2104     {
2105 	*flagp = flags;
2106 	return ret;
2107     }
2108     // default flags
2109     *flagp = (WORST | SPSTART | (flags & (HASNL | HASLOOKBH)));
2110 
2111     skipchr();
2112     switch (op)
2113     {
2114 	case Magic('*'):
2115 	    if (flags & SIMPLE)
2116 		reginsert(STAR, ret);
2117 	    else
2118 	    {
2119 		// Emit x* as (x&|), where & means "self".
2120 		reginsert(BRANCH, ret); // Either x
2121 		regoptail(ret, regnode(BACK));	// and loop
2122 		regoptail(ret, ret);	// back
2123 		regtail(ret, regnode(BRANCH));	// or
2124 		regtail(ret, regnode(NOTHING)); // null.
2125 	    }
2126 	    break;
2127 
2128 	case Magic('+'):
2129 	    if (flags & SIMPLE)
2130 		reginsert(PLUS, ret);
2131 	    else
2132 	    {
2133 		// Emit x+ as x(&|), where & means "self".
2134 		next = regnode(BRANCH); // Either
2135 		regtail(ret, next);
2136 		regtail(regnode(BACK), ret);	// loop back
2137 		regtail(next, regnode(BRANCH)); // or
2138 		regtail(ret, regnode(NOTHING)); // null.
2139 	    }
2140 	    *flagp = (WORST | HASWIDTH | (flags & (HASNL | HASLOOKBH)));
2141 	    break;
2142 
2143 	case Magic('@'):
2144 	    {
2145 		int	lop = END;
2146 		long	nr;
2147 
2148 		nr = getdecchrs();
2149 		switch (no_Magic(getchr()))
2150 		{
2151 		    case '=': lop = MATCH; break;		  // \@=
2152 		    case '!': lop = NOMATCH; break;		  // \@!
2153 		    case '>': lop = SUBPAT; break;		  // \@>
2154 		    case '<': switch (no_Magic(getchr()))
2155 			      {
2156 				  case '=': lop = BEHIND; break;   // \@<=
2157 				  case '!': lop = NOBEHIND; break; // \@<!
2158 			      }
2159 		}
2160 		if (lop == END)
2161 		    EMSG2_RET_NULL(_(e_invalid_character_after_str_at),
2162 						      reg_magic == MAGIC_ALL);
2163 		// Look behind must match with behind_pos.
2164 		if (lop == BEHIND || lop == NOBEHIND)
2165 		{
2166 		    regtail(ret, regnode(BHPOS));
2167 		    *flagp |= HASLOOKBH;
2168 		}
2169 		regtail(ret, regnode(END)); // operand ends
2170 		if (lop == BEHIND || lop == NOBEHIND)
2171 		{
2172 		    if (nr < 0)
2173 			nr = 0; // no limit is same as zero limit
2174 		    reginsert_nr(lop, nr, ret);
2175 		}
2176 		else
2177 		    reginsert(lop, ret);
2178 		break;
2179 	    }
2180 
2181 	case Magic('?'):
2182 	case Magic('='):
2183 	    // Emit x= as (x|)
2184 	    reginsert(BRANCH, ret);		// Either x
2185 	    regtail(ret, regnode(BRANCH));	// or
2186 	    next = regnode(NOTHING);		// null.
2187 	    regtail(ret, next);
2188 	    regoptail(ret, next);
2189 	    break;
2190 
2191 	case Magic('{'):
2192 	    if (!read_limits(&minval, &maxval))
2193 		return NULL;
2194 	    if (flags & SIMPLE)
2195 	    {
2196 		reginsert(BRACE_SIMPLE, ret);
2197 		reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
2198 	    }
2199 	    else
2200 	    {
2201 		if (num_complex_braces >= 10)
2202 		    EMSG2_RET_NULL(_(e_too_many_complex_str_curly),
2203 						      reg_magic == MAGIC_ALL);
2204 		reginsert(BRACE_COMPLEX + num_complex_braces, ret);
2205 		regoptail(ret, regnode(BACK));
2206 		regoptail(ret, ret);
2207 		reginsert_limits(BRACE_LIMITS, minval, maxval, ret);
2208 		++num_complex_braces;
2209 	    }
2210 	    if (minval > 0 && maxval > 0)
2211 		*flagp = (HASWIDTH | (flags & (HASNL | HASLOOKBH)));
2212 	    break;
2213     }
2214     if (re_multi_type(peekchr()) != NOT_MULTI)
2215     {
2216 	// Can't have a multi follow a multi.
2217 	if (peekchr() == Magic('*'))
2218 	    EMSG2_RET_NULL(_("E61: Nested %s*"), reg_magic >= MAGIC_ON);
2219 	EMSG3_RET_NULL(_("E62: Nested %s%c"), reg_magic == MAGIC_ALL,
2220 							  no_Magic(peekchr()));
2221     }
2222 
2223     return ret;
2224 }
2225 
2226 /*
2227  * Parse one alternative of an | or & operator.
2228  * Implements the concatenation operator.
2229  */
2230     static char_u *
regconcat(int * flagp)2231 regconcat(int *flagp)
2232 {
2233     char_u	*first = NULL;
2234     char_u	*chain = NULL;
2235     char_u	*latest;
2236     int		flags;
2237     int		cont = TRUE;
2238 
2239     *flagp = WORST;		// Tentatively.
2240 
2241     while (cont)
2242     {
2243 	switch (peekchr())
2244 	{
2245 	    case NUL:
2246 	    case Magic('|'):
2247 	    case Magic('&'):
2248 	    case Magic(')'):
2249 			    cont = FALSE;
2250 			    break;
2251 	    case Magic('Z'):
2252 			    regflags |= RF_ICOMBINE;
2253 			    skipchr_keepstart();
2254 			    break;
2255 	    case Magic('c'):
2256 			    regflags |= RF_ICASE;
2257 			    skipchr_keepstart();
2258 			    break;
2259 	    case Magic('C'):
2260 			    regflags |= RF_NOICASE;
2261 			    skipchr_keepstart();
2262 			    break;
2263 	    case Magic('v'):
2264 			    reg_magic = MAGIC_ALL;
2265 			    skipchr_keepstart();
2266 			    curchr = -1;
2267 			    break;
2268 	    case Magic('m'):
2269 			    reg_magic = MAGIC_ON;
2270 			    skipchr_keepstart();
2271 			    curchr = -1;
2272 			    break;
2273 	    case Magic('M'):
2274 			    reg_magic = MAGIC_OFF;
2275 			    skipchr_keepstart();
2276 			    curchr = -1;
2277 			    break;
2278 	    case Magic('V'):
2279 			    reg_magic = MAGIC_NONE;
2280 			    skipchr_keepstart();
2281 			    curchr = -1;
2282 			    break;
2283 	    default:
2284 			    latest = regpiece(&flags);
2285 			    if (latest == NULL || reg_toolong)
2286 				return NULL;
2287 			    *flagp |= flags & (HASWIDTH | HASNL | HASLOOKBH);
2288 			    if (chain == NULL)	// First piece.
2289 				*flagp |= flags & SPSTART;
2290 			    else
2291 				regtail(chain, latest);
2292 			    chain = latest;
2293 			    if (first == NULL)
2294 				first = latest;
2295 			    break;
2296 	}
2297     }
2298     if (first == NULL)		// Loop ran zero times.
2299 	first = regnode(NOTHING);
2300     return first;
2301 }
2302 
2303 /*
2304  * Parse one alternative of an | operator.
2305  * Implements the & operator.
2306  */
2307     static char_u *
regbranch(int * flagp)2308 regbranch(int *flagp)
2309 {
2310     char_u	*ret;
2311     char_u	*chain = NULL;
2312     char_u	*latest;
2313     int		flags;
2314 
2315     *flagp = WORST | HASNL;		// Tentatively.
2316 
2317     ret = regnode(BRANCH);
2318     for (;;)
2319     {
2320 	latest = regconcat(&flags);
2321 	if (latest == NULL)
2322 	    return NULL;
2323 	// If one of the branches has width, the whole thing has.  If one of
2324 	// the branches anchors at start-of-line, the whole thing does.
2325 	// If one of the branches uses look-behind, the whole thing does.
2326 	*flagp |= flags & (HASWIDTH | SPSTART | HASLOOKBH);
2327 	// If one of the branches doesn't match a line-break, the whole thing
2328 	// doesn't.
2329 	*flagp &= ~HASNL | (flags & HASNL);
2330 	if (chain != NULL)
2331 	    regtail(chain, latest);
2332 	if (peekchr() != Magic('&'))
2333 	    break;
2334 	skipchr();
2335 	regtail(latest, regnode(END)); // operand ends
2336 	if (reg_toolong)
2337 	    break;
2338 	reginsert(MATCH, latest);
2339 	chain = latest;
2340     }
2341 
2342     return ret;
2343 }
2344 
2345 /*
2346  * Parse regular expression, i.e. main body or parenthesized thing.
2347  *
2348  * Caller must absorb opening parenthesis.
2349  *
2350  * Combining parenthesis handling with the base level of regular expression
2351  * is a trifle forced, but the need to tie the tails of the branches to what
2352  * follows makes it hard to avoid.
2353  */
2354     static char_u *
reg(int paren,int * flagp)2355 reg(
2356     int		paren,	// REG_NOPAREN, REG_PAREN, REG_NPAREN or REG_ZPAREN
2357     int		*flagp)
2358 {
2359     char_u	*ret;
2360     char_u	*br;
2361     char_u	*ender;
2362     int		parno = 0;
2363     int		flags;
2364 
2365     *flagp = HASWIDTH;		// Tentatively.
2366 
2367 #ifdef FEAT_SYN_HL
2368     if (paren == REG_ZPAREN)
2369     {
2370 	// Make a ZOPEN node.
2371 	if (regnzpar >= NSUBEXP)
2372 	    EMSG_RET_NULL(_(e_too_many_z));
2373 	parno = regnzpar;
2374 	regnzpar++;
2375 	ret = regnode(ZOPEN + parno);
2376     }
2377     else
2378 #endif
2379 	if (paren == REG_PAREN)
2380     {
2381 	// Make a MOPEN node.
2382 	if (regnpar >= NSUBEXP)
2383 	    EMSG2_RET_NULL(_(e_too_many_str_open), reg_magic == MAGIC_ALL);
2384 	parno = regnpar;
2385 	++regnpar;
2386 	ret = regnode(MOPEN + parno);
2387     }
2388     else if (paren == REG_NPAREN)
2389     {
2390 	// Make a NOPEN node.
2391 	ret = regnode(NOPEN);
2392     }
2393     else
2394 	ret = NULL;
2395 
2396     // Pick up the branches, linking them together.
2397     br = regbranch(&flags);
2398     if (br == NULL)
2399 	return NULL;
2400     if (ret != NULL)
2401 	regtail(ret, br);	// [MZ]OPEN -> first.
2402     else
2403 	ret = br;
2404     // If one of the branches can be zero-width, the whole thing can.
2405     // If one of the branches has * at start or matches a line-break, the
2406     // whole thing can.
2407     if (!(flags & HASWIDTH))
2408 	*flagp &= ~HASWIDTH;
2409     *flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
2410     while (peekchr() == Magic('|'))
2411     {
2412 	skipchr();
2413 	br = regbranch(&flags);
2414 	if (br == NULL || reg_toolong)
2415 	    return NULL;
2416 	regtail(ret, br);	// BRANCH -> BRANCH.
2417 	if (!(flags & HASWIDTH))
2418 	    *flagp &= ~HASWIDTH;
2419 	*flagp |= flags & (SPSTART | HASNL | HASLOOKBH);
2420     }
2421 
2422     // Make a closing node, and hook it on the end.
2423     ender = regnode(
2424 #ifdef FEAT_SYN_HL
2425 	    paren == REG_ZPAREN ? ZCLOSE + parno :
2426 #endif
2427 	    paren == REG_PAREN ? MCLOSE + parno :
2428 	    paren == REG_NPAREN ? NCLOSE : END);
2429     regtail(ret, ender);
2430 
2431     // Hook the tails of the branches to the closing node.
2432     for (br = ret; br != NULL; br = regnext(br))
2433 	regoptail(br, ender);
2434 
2435     // Check for proper termination.
2436     if (paren != REG_NOPAREN && getchr() != Magic(')'))
2437     {
2438 #ifdef FEAT_SYN_HL
2439 	if (paren == REG_ZPAREN)
2440 	    EMSG_RET_NULL(_(e_unmatched_z));
2441 	else
2442 #endif
2443 	    if (paren == REG_NPAREN)
2444 	    EMSG2_RET_NULL(_(e_unmatched_str_percent_open), reg_magic == MAGIC_ALL);
2445 	else
2446 	    EMSG2_RET_NULL(_(e_unmatched_str_open), reg_magic == MAGIC_ALL);
2447     }
2448     else if (paren == REG_NOPAREN && peekchr() != NUL)
2449     {
2450 	if (curchr == Magic(')'))
2451 	    EMSG2_RET_NULL(_(e_unmatched_str_close), reg_magic == MAGIC_ALL);
2452 	else
2453 	    EMSG_RET_NULL(_(e_trailing));	// "Can't happen".
2454 	// NOTREACHED
2455     }
2456     // Here we set the flag allowing back references to this set of
2457     // parentheses.
2458     if (paren == REG_PAREN)
2459 	had_endbrace[parno] = TRUE;	// have seen the close paren
2460     return ret;
2461 }
2462 
2463 /*
2464  * bt_regcomp() - compile a regular expression into internal code for the
2465  * traditional back track matcher.
2466  * Returns the program in allocated space.  Returns NULL for an error.
2467  *
2468  * We can't allocate space until we know how big the compiled form will be,
2469  * but we can't compile it (and thus know how big it is) until we've got a
2470  * place to put the code.  So we cheat:  we compile it twice, once with code
2471  * generation turned off and size counting turned on, and once "for real".
2472  * This also means that we don't allocate space until we are sure that the
2473  * thing really will compile successfully, and we never have to move the
2474  * code and thus invalidate pointers into it.  (Note that it has to be in
2475  * one piece because vim_free() must be able to free it all.)
2476  *
2477  * Whether upper/lower case is to be ignored is decided when executing the
2478  * program, it does not matter here.
2479  *
2480  * Beware that the optimization-preparation code in here knows about some
2481  * of the structure of the compiled regexp.
2482  * "re_flags": RE_MAGIC and/or RE_STRING.
2483  */
2484     static regprog_T *
bt_regcomp(char_u * expr,int re_flags)2485 bt_regcomp(char_u *expr, int re_flags)
2486 {
2487     bt_regprog_T    *r;
2488     char_u	*scan;
2489     char_u	*longest;
2490     int		len;
2491     int		flags;
2492 
2493     if (expr == NULL)
2494 	IEMSG_RET_NULL(_(e_null_argument));
2495 
2496     init_class_tab();
2497 
2498     // First pass: determine size, legality.
2499     regcomp_start(expr, re_flags);
2500     regcode = JUST_CALC_SIZE;
2501     regc(REGMAGIC);
2502     if (reg(REG_NOPAREN, &flags) == NULL)
2503 	return NULL;
2504 
2505     // Allocate space.
2506     r = alloc(offsetof(bt_regprog_T, program) + regsize);
2507     if (r == NULL)
2508 	return NULL;
2509     r->re_in_use = FALSE;
2510 
2511     // Second pass: emit code.
2512     regcomp_start(expr, re_flags);
2513     regcode = r->program;
2514     regc(REGMAGIC);
2515     if (reg(REG_NOPAREN, &flags) == NULL || reg_toolong)
2516     {
2517 	vim_free(r);
2518 	if (reg_toolong)
2519 	    EMSG_RET_NULL(_("E339: Pattern too long"));
2520 	return NULL;
2521     }
2522 
2523     // Dig out information for optimizations.
2524     r->regstart = NUL;		// Worst-case defaults.
2525     r->reganch = 0;
2526     r->regmust = NULL;
2527     r->regmlen = 0;
2528     r->regflags = regflags;
2529     if (flags & HASNL)
2530 	r->regflags |= RF_HASNL;
2531     if (flags & HASLOOKBH)
2532 	r->regflags |= RF_LOOKBH;
2533 #ifdef FEAT_SYN_HL
2534     // Remember whether this pattern has any \z specials in it.
2535     r->reghasz = re_has_z;
2536 #endif
2537     scan = r->program + 1;	// First BRANCH.
2538     if (OP(regnext(scan)) == END)   // Only one top-level choice.
2539     {
2540 	scan = OPERAND(scan);
2541 
2542 	// Starting-point info.
2543 	if (OP(scan) == BOL || OP(scan) == RE_BOF)
2544 	{
2545 	    r->reganch++;
2546 	    scan = regnext(scan);
2547 	}
2548 
2549 	if (OP(scan) == EXACTLY)
2550 	{
2551 	    if (has_mbyte)
2552 		r->regstart = (*mb_ptr2char)(OPERAND(scan));
2553 	    else
2554 		r->regstart = *OPERAND(scan);
2555 	}
2556 	else if ((OP(scan) == BOW
2557 		    || OP(scan) == EOW
2558 		    || OP(scan) == NOTHING
2559 		    || OP(scan) == MOPEN + 0 || OP(scan) == NOPEN
2560 		    || OP(scan) == MCLOSE + 0 || OP(scan) == NCLOSE)
2561 		 && OP(regnext(scan)) == EXACTLY)
2562 	{
2563 	    if (has_mbyte)
2564 		r->regstart = (*mb_ptr2char)(OPERAND(regnext(scan)));
2565 	    else
2566 		r->regstart = *OPERAND(regnext(scan));
2567 	}
2568 
2569 	// If there's something expensive in the r.e., find the longest
2570 	// literal string that must appear and make it the regmust.  Resolve
2571 	// ties in favor of later strings, since the regstart check works
2572 	// with the beginning of the r.e. and avoiding duplication
2573 	// strengthens checking.  Not a strong reason, but sufficient in the
2574 	// absence of others.
2575 
2576 	// When the r.e. starts with BOW, it is faster to look for a regmust
2577 	// first. Used a lot for "#" and "*" commands. (Added by mool).
2578 	if ((flags & SPSTART || OP(scan) == BOW || OP(scan) == EOW)
2579 							  && !(flags & HASNL))
2580 	{
2581 	    longest = NULL;
2582 	    len = 0;
2583 	    for (; scan != NULL; scan = regnext(scan))
2584 		if (OP(scan) == EXACTLY && STRLEN(OPERAND(scan)) >= (size_t)len)
2585 		{
2586 		    longest = OPERAND(scan);
2587 		    len = (int)STRLEN(OPERAND(scan));
2588 		}
2589 	    r->regmust = longest;
2590 	    r->regmlen = len;
2591 	}
2592     }
2593 #ifdef BT_REGEXP_DUMP
2594     regdump(expr, r);
2595 #endif
2596     r->engine = &bt_regengine;
2597     return (regprog_T *)r;
2598 }
2599 
2600 #if defined(FEAT_SYN_HL) || defined(PROTO)
2601 /*
2602  * Check if during the previous call to vim_regcomp the EOL item "$" has been
2603  * found.  This is messy, but it works fine.
2604  */
2605     int
vim_regcomp_had_eol(void)2606 vim_regcomp_had_eol(void)
2607 {
2608     return had_eol;
2609 }
2610 #endif
2611 
2612 /*
2613  * Get a number after a backslash that is inside [].
2614  * When nothing is recognized return a backslash.
2615  */
2616     static int
coll_get_char(void)2617 coll_get_char(void)
2618 {
2619     long	nr = -1;
2620 
2621     switch (*regparse++)
2622     {
2623 	case 'd': nr = getdecchrs(); break;
2624 	case 'o': nr = getoctchrs(); break;
2625 	case 'x': nr = gethexchrs(2); break;
2626 	case 'u': nr = gethexchrs(4); break;
2627 	case 'U': nr = gethexchrs(8); break;
2628     }
2629     if (nr < 0 || nr > INT_MAX)
2630     {
2631 	// If getting the number fails be backwards compatible: the character
2632 	// is a backslash.
2633 	--regparse;
2634 	nr = '\\';
2635     }
2636     return nr;
2637 }
2638 
2639 /*
2640  * Free a compiled regexp program, returned by bt_regcomp().
2641  */
2642     static void
bt_regfree(regprog_T * prog)2643 bt_regfree(regprog_T *prog)
2644 {
2645     vim_free(prog);
2646 }
2647 
2648 #define ADVANCE_REGINPUT() MB_PTR_ADV(rex.input)
2649 
2650 /*
2651  * The arguments from BRACE_LIMITS are stored here.  They are actually local
2652  * to regmatch(), but they are here to reduce the amount of stack space used
2653  * (it can be called recursively many times).
2654  */
2655 static long	bl_minval;
2656 static long	bl_maxval;
2657 
2658 /*
2659  * Save the input line and position in a regsave_T.
2660  */
2661     static void
reg_save(regsave_T * save,garray_T * gap)2662 reg_save(regsave_T *save, garray_T *gap)
2663 {
2664     if (REG_MULTI)
2665     {
2666 	save->rs_u.pos.col = (colnr_T)(rex.input - rex.line);
2667 	save->rs_u.pos.lnum = rex.lnum;
2668     }
2669     else
2670 	save->rs_u.ptr = rex.input;
2671     save->rs_len = gap->ga_len;
2672 }
2673 
2674 /*
2675  * Restore the input line and position from a regsave_T.
2676  */
2677     static void
reg_restore(regsave_T * save,garray_T * gap)2678 reg_restore(regsave_T *save, garray_T *gap)
2679 {
2680     if (REG_MULTI)
2681     {
2682 	if (rex.lnum != save->rs_u.pos.lnum)
2683 	{
2684 	    // only call reg_getline() when the line number changed to save
2685 	    // a bit of time
2686 	    rex.lnum = save->rs_u.pos.lnum;
2687 	    rex.line = reg_getline(rex.lnum);
2688 	}
2689 	rex.input = rex.line + save->rs_u.pos.col;
2690     }
2691     else
2692 	rex.input = save->rs_u.ptr;
2693     gap->ga_len = save->rs_len;
2694 }
2695 
2696 /*
2697  * Return TRUE if current position is equal to saved position.
2698  */
2699     static int
reg_save_equal(regsave_T * save)2700 reg_save_equal(regsave_T *save)
2701 {
2702     if (REG_MULTI)
2703 	return rex.lnum == save->rs_u.pos.lnum
2704 				  && rex.input == rex.line + save->rs_u.pos.col;
2705     return rex.input == save->rs_u.ptr;
2706 }
2707 
2708 // Save the sub-expressions before attempting a match.
2709 #define save_se(savep, posp, pp) \
2710     REG_MULTI ? save_se_multi((savep), (posp)) : save_se_one((savep), (pp))
2711 
2712 // After a failed match restore the sub-expressions.
2713 #define restore_se(savep, posp, pp) { \
2714     if (REG_MULTI) \
2715 	*(posp) = (savep)->se_u.pos; \
2716     else \
2717 	*(pp) = (savep)->se_u.ptr; }
2718 
2719 /*
2720  * Tentatively set the sub-expression start to the current position (after
2721  * calling regmatch() they will have changed).  Need to save the existing
2722  * values for when there is no match.
2723  * Use se_save() to use pointer (save_se_multi()) or position (save_se_one()),
2724  * depending on REG_MULTI.
2725  */
2726     static void
save_se_multi(save_se_T * savep,lpos_T * posp)2727 save_se_multi(save_se_T *savep, lpos_T *posp)
2728 {
2729     savep->se_u.pos = *posp;
2730     posp->lnum = rex.lnum;
2731     posp->col = (colnr_T)(rex.input - rex.line);
2732 }
2733 
2734     static void
save_se_one(save_se_T * savep,char_u ** pp)2735 save_se_one(save_se_T *savep, char_u **pp)
2736 {
2737     savep->se_u.ptr = *pp;
2738     *pp = rex.input;
2739 }
2740 
2741 /*
2742  * regrepeat - repeatedly match something simple, return how many.
2743  * Advances rex.input (and rex.lnum) to just after the matched chars.
2744  */
2745     static int
regrepeat(char_u * p,long maxcount)2746 regrepeat(
2747     char_u	*p,
2748     long	maxcount)   // maximum number of matches allowed
2749 {
2750     long	count = 0;
2751     char_u	*scan;
2752     char_u	*opnd;
2753     int		mask;
2754     int		testval = 0;
2755 
2756     scan = rex.input;	    // Make local copy of rex.input for speed.
2757     opnd = OPERAND(p);
2758     switch (OP(p))
2759     {
2760       case ANY:
2761       case ANY + ADD_NL:
2762 	while (count < maxcount)
2763 	{
2764 	    // Matching anything means we continue until end-of-line (or
2765 	    // end-of-file for ANY + ADD_NL), only limited by maxcount.
2766 	    while (*scan != NUL && count < maxcount)
2767 	    {
2768 		++count;
2769 		MB_PTR_ADV(scan);
2770 	    }
2771 	    if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
2772 				      || rex.reg_line_lbr || count == maxcount)
2773 		break;
2774 	    ++count;		// count the line-break
2775 	    reg_nextline();
2776 	    scan = rex.input;
2777 	    if (got_int)
2778 		break;
2779 	}
2780 	break;
2781 
2782       case IDENT:
2783       case IDENT + ADD_NL:
2784 	testval = TRUE;
2785 	// FALLTHROUGH
2786       case SIDENT:
2787       case SIDENT + ADD_NL:
2788 	while (count < maxcount)
2789 	{
2790 	    if (vim_isIDc(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan)))
2791 	    {
2792 		MB_PTR_ADV(scan);
2793 	    }
2794 	    else if (*scan == NUL)
2795 	    {
2796 		if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
2797 							   || rex.reg_line_lbr)
2798 		    break;
2799 		reg_nextline();
2800 		scan = rex.input;
2801 		if (got_int)
2802 		    break;
2803 	    }
2804 	    else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
2805 		++scan;
2806 	    else
2807 		break;
2808 	    ++count;
2809 	}
2810 	break;
2811 
2812       case KWORD:
2813       case KWORD + ADD_NL:
2814 	testval = TRUE;
2815 	// FALLTHROUGH
2816       case SKWORD:
2817       case SKWORD + ADD_NL:
2818 	while (count < maxcount)
2819 	{
2820 	    if (vim_iswordp_buf(scan, rex.reg_buf)
2821 					  && (testval || !VIM_ISDIGIT(*scan)))
2822 	    {
2823 		MB_PTR_ADV(scan);
2824 	    }
2825 	    else if (*scan == NUL)
2826 	    {
2827 		if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
2828 							   || rex.reg_line_lbr)
2829 		    break;
2830 		reg_nextline();
2831 		scan = rex.input;
2832 		if (got_int)
2833 		    break;
2834 	    }
2835 	    else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
2836 		++scan;
2837 	    else
2838 		break;
2839 	    ++count;
2840 	}
2841 	break;
2842 
2843       case FNAME:
2844       case FNAME + ADD_NL:
2845 	testval = TRUE;
2846 	// FALLTHROUGH
2847       case SFNAME:
2848       case SFNAME + ADD_NL:
2849 	while (count < maxcount)
2850 	{
2851 	    if (vim_isfilec(PTR2CHAR(scan)) && (testval || !VIM_ISDIGIT(*scan)))
2852 	    {
2853 		MB_PTR_ADV(scan);
2854 	    }
2855 	    else if (*scan == NUL)
2856 	    {
2857 		if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
2858 							   || rex.reg_line_lbr)
2859 		    break;
2860 		reg_nextline();
2861 		scan = rex.input;
2862 		if (got_int)
2863 		    break;
2864 	    }
2865 	    else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
2866 		++scan;
2867 	    else
2868 		break;
2869 	    ++count;
2870 	}
2871 	break;
2872 
2873       case PRINT:
2874       case PRINT + ADD_NL:
2875 	testval = TRUE;
2876 	// FALLTHROUGH
2877       case SPRINT:
2878       case SPRINT + ADD_NL:
2879 	while (count < maxcount)
2880 	{
2881 	    if (*scan == NUL)
2882 	    {
2883 		if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
2884 							   || rex.reg_line_lbr)
2885 		    break;
2886 		reg_nextline();
2887 		scan = rex.input;
2888 		if (got_int)
2889 		    break;
2890 	    }
2891 	    else if (vim_isprintc(PTR2CHAR(scan)) == 1
2892 					  && (testval || !VIM_ISDIGIT(*scan)))
2893 	    {
2894 		MB_PTR_ADV(scan);
2895 	    }
2896 	    else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
2897 		++scan;
2898 	    else
2899 		break;
2900 	    ++count;
2901 	}
2902 	break;
2903 
2904       case WHITE:
2905       case WHITE + ADD_NL:
2906 	testval = mask = RI_WHITE;
2907 do_class:
2908 	while (count < maxcount)
2909 	{
2910 	    int		l;
2911 
2912 	    if (*scan == NUL)
2913 	    {
2914 		if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
2915 							   || rex.reg_line_lbr)
2916 		    break;
2917 		reg_nextline();
2918 		scan = rex.input;
2919 		if (got_int)
2920 		    break;
2921 	    }
2922 	    else if (has_mbyte && (l = (*mb_ptr2len)(scan)) > 1)
2923 	    {
2924 		if (testval != 0)
2925 		    break;
2926 		scan += l;
2927 	    }
2928 	    else if ((class_tab[*scan] & mask) == testval)
2929 		++scan;
2930 	    else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
2931 		++scan;
2932 	    else
2933 		break;
2934 	    ++count;
2935 	}
2936 	break;
2937 
2938       case NWHITE:
2939       case NWHITE + ADD_NL:
2940 	mask = RI_WHITE;
2941 	goto do_class;
2942       case DIGIT:
2943       case DIGIT + ADD_NL:
2944 	testval = mask = RI_DIGIT;
2945 	goto do_class;
2946       case NDIGIT:
2947       case NDIGIT + ADD_NL:
2948 	mask = RI_DIGIT;
2949 	goto do_class;
2950       case HEX:
2951       case HEX + ADD_NL:
2952 	testval = mask = RI_HEX;
2953 	goto do_class;
2954       case NHEX:
2955       case NHEX + ADD_NL:
2956 	mask = RI_HEX;
2957 	goto do_class;
2958       case OCTAL:
2959       case OCTAL + ADD_NL:
2960 	testval = mask = RI_OCTAL;
2961 	goto do_class;
2962       case NOCTAL:
2963       case NOCTAL + ADD_NL:
2964 	mask = RI_OCTAL;
2965 	goto do_class;
2966       case WORD:
2967       case WORD + ADD_NL:
2968 	testval = mask = RI_WORD;
2969 	goto do_class;
2970       case NWORD:
2971       case NWORD + ADD_NL:
2972 	mask = RI_WORD;
2973 	goto do_class;
2974       case HEAD:
2975       case HEAD + ADD_NL:
2976 	testval = mask = RI_HEAD;
2977 	goto do_class;
2978       case NHEAD:
2979       case NHEAD + ADD_NL:
2980 	mask = RI_HEAD;
2981 	goto do_class;
2982       case ALPHA:
2983       case ALPHA + ADD_NL:
2984 	testval = mask = RI_ALPHA;
2985 	goto do_class;
2986       case NALPHA:
2987       case NALPHA + ADD_NL:
2988 	mask = RI_ALPHA;
2989 	goto do_class;
2990       case LOWER:
2991       case LOWER + ADD_NL:
2992 	testval = mask = RI_LOWER;
2993 	goto do_class;
2994       case NLOWER:
2995       case NLOWER + ADD_NL:
2996 	mask = RI_LOWER;
2997 	goto do_class;
2998       case UPPER:
2999       case UPPER + ADD_NL:
3000 	testval = mask = RI_UPPER;
3001 	goto do_class;
3002       case NUPPER:
3003       case NUPPER + ADD_NL:
3004 	mask = RI_UPPER;
3005 	goto do_class;
3006 
3007       case EXACTLY:
3008 	{
3009 	    int	    cu, cl;
3010 
3011 	    // This doesn't do a multi-byte character, because a MULTIBYTECODE
3012 	    // would have been used for it.  It does handle single-byte
3013 	    // characters, such as latin1.
3014 	    if (rex.reg_ic)
3015 	    {
3016 		cu = MB_TOUPPER(*opnd);
3017 		cl = MB_TOLOWER(*opnd);
3018 		while (count < maxcount && (*scan == cu || *scan == cl))
3019 		{
3020 		    count++;
3021 		    scan++;
3022 		}
3023 	    }
3024 	    else
3025 	    {
3026 		cu = *opnd;
3027 		while (count < maxcount && *scan == cu)
3028 		{
3029 		    count++;
3030 		    scan++;
3031 		}
3032 	    }
3033 	    break;
3034 	}
3035 
3036       case MULTIBYTECODE:
3037 	{
3038 	    int		i, len, cf = 0;
3039 
3040 	    // Safety check (just in case 'encoding' was changed since
3041 	    // compiling the program).
3042 	    if ((len = (*mb_ptr2len)(opnd)) > 1)
3043 	    {
3044 		if (rex.reg_ic && enc_utf8)
3045 		    cf = utf_fold(utf_ptr2char(opnd));
3046 		while (count < maxcount && (*mb_ptr2len)(scan) >= len)
3047 		{
3048 		    for (i = 0; i < len; ++i)
3049 			if (opnd[i] != scan[i])
3050 			    break;
3051 		    if (i < len && (!rex.reg_ic || !enc_utf8
3052 					|| utf_fold(utf_ptr2char(scan)) != cf))
3053 			break;
3054 		    scan += len;
3055 		    ++count;
3056 		}
3057 	    }
3058 	}
3059 	break;
3060 
3061       case ANYOF:
3062       case ANYOF + ADD_NL:
3063 	testval = TRUE;
3064 	// FALLTHROUGH
3065 
3066       case ANYBUT:
3067       case ANYBUT + ADD_NL:
3068 	while (count < maxcount)
3069 	{
3070 	    int len;
3071 
3072 	    if (*scan == NUL)
3073 	    {
3074 		if (!REG_MULTI || !WITH_NL(OP(p)) || rex.lnum > rex.reg_maxline
3075 							   || rex.reg_line_lbr)
3076 		    break;
3077 		reg_nextline();
3078 		scan = rex.input;
3079 		if (got_int)
3080 		    break;
3081 	    }
3082 	    else if (rex.reg_line_lbr && *scan == '\n' && WITH_NL(OP(p)))
3083 		++scan;
3084 	    else if (has_mbyte && (len = (*mb_ptr2len)(scan)) > 1)
3085 	    {
3086 		if ((cstrchr(opnd, (*mb_ptr2char)(scan)) == NULL) == testval)
3087 		    break;
3088 		scan += len;
3089 	    }
3090 	    else
3091 	    {
3092 		if ((cstrchr(opnd, *scan) == NULL) == testval)
3093 		    break;
3094 		++scan;
3095 	    }
3096 	    ++count;
3097 	}
3098 	break;
3099 
3100       case NEWL:
3101 	while (count < maxcount
3102 		&& ((*scan == NUL && rex.lnum <= rex.reg_maxline
3103 				       && !rex.reg_line_lbr && REG_MULTI)
3104 		    || (*scan == '\n' && rex.reg_line_lbr)))
3105 	{
3106 	    count++;
3107 	    if (rex.reg_line_lbr)
3108 		ADVANCE_REGINPUT();
3109 	    else
3110 		reg_nextline();
3111 	    scan = rex.input;
3112 	    if (got_int)
3113 		break;
3114 	}
3115 	break;
3116 
3117       default:			// Oh dear.  Called inappropriately.
3118 	iemsg(_(e_corrupted_regexp_program));
3119 #ifdef DEBUG
3120 	printf("Called regrepeat with op code %d\n", OP(p));
3121 #endif
3122 	break;
3123     }
3124 
3125     rex.input = scan;
3126 
3127     return (int)count;
3128 }
3129 
3130 /*
3131  * Push an item onto the regstack.
3132  * Returns pointer to new item.  Returns NULL when out of memory.
3133  */
3134     static regitem_T *
regstack_push(regstate_T state,char_u * scan)3135 regstack_push(regstate_T state, char_u *scan)
3136 {
3137     regitem_T	*rp;
3138 
3139     if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
3140     {
3141 	emsg(_(e_maxmempat));
3142 	return NULL;
3143     }
3144     if (ga_grow(&regstack, sizeof(regitem_T)) == FAIL)
3145 	return NULL;
3146 
3147     rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len);
3148     rp->rs_state = state;
3149     rp->rs_scan = scan;
3150 
3151     regstack.ga_len += sizeof(regitem_T);
3152     return rp;
3153 }
3154 
3155 /*
3156  * Pop an item from the regstack.
3157  */
3158     static void
regstack_pop(char_u ** scan)3159 regstack_pop(char_u **scan)
3160 {
3161     regitem_T	*rp;
3162 
3163     rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
3164     *scan = rp->rs_scan;
3165 
3166     regstack.ga_len -= sizeof(regitem_T);
3167 }
3168 
3169 /*
3170  * Save the current subexpr to "bp", so that they can be restored
3171  * later by restore_subexpr().
3172  */
3173     static void
save_subexpr(regbehind_T * bp)3174 save_subexpr(regbehind_T *bp)
3175 {
3176     int i;
3177 
3178     // When "rex.need_clear_subexpr" is set we don't need to save the values,
3179     // only remember that this flag needs to be set again when restoring.
3180     bp->save_need_clear_subexpr = rex.need_clear_subexpr;
3181     if (!rex.need_clear_subexpr)
3182     {
3183 	for (i = 0; i < NSUBEXP; ++i)
3184 	{
3185 	    if (REG_MULTI)
3186 	    {
3187 		bp->save_start[i].se_u.pos = rex.reg_startpos[i];
3188 		bp->save_end[i].se_u.pos = rex.reg_endpos[i];
3189 	    }
3190 	    else
3191 	    {
3192 		bp->save_start[i].se_u.ptr = rex.reg_startp[i];
3193 		bp->save_end[i].se_u.ptr = rex.reg_endp[i];
3194 	    }
3195 	}
3196     }
3197 }
3198 
3199 /*
3200  * Restore the subexpr from "bp".
3201  */
3202     static void
restore_subexpr(regbehind_T * bp)3203 restore_subexpr(regbehind_T *bp)
3204 {
3205     int i;
3206 
3207     // Only need to restore saved values when they are not to be cleared.
3208     rex.need_clear_subexpr = bp->save_need_clear_subexpr;
3209     if (!rex.need_clear_subexpr)
3210     {
3211 	for (i = 0; i < NSUBEXP; ++i)
3212 	{
3213 	    if (REG_MULTI)
3214 	    {
3215 		rex.reg_startpos[i] = bp->save_start[i].se_u.pos;
3216 		rex.reg_endpos[i] = bp->save_end[i].se_u.pos;
3217 	    }
3218 	    else
3219 	    {
3220 		rex.reg_startp[i] = bp->save_start[i].se_u.ptr;
3221 		rex.reg_endp[i] = bp->save_end[i].se_u.ptr;
3222 	    }
3223 	}
3224     }
3225 }
3226 
3227 /*
3228  * regmatch - main matching routine
3229  *
3230  * Conceptually the strategy is simple: Check to see whether the current node
3231  * matches, push an item onto the regstack and loop to see whether the rest
3232  * matches, and then act accordingly.  In practice we make some effort to
3233  * avoid using the regstack, in particular by going through "ordinary" nodes
3234  * (that don't need to know whether the rest of the match failed) by a nested
3235  * loop.
3236  *
3237  * Returns TRUE when there is a match.  Leaves rex.input and rex.lnum just after
3238  * the last matched character.
3239  * Returns FALSE when there is no match.  Leaves rex.input and rex.lnum in an
3240  * undefined state!
3241  */
3242     static int
regmatch(char_u * scan,proftime_T * tm UNUSED,int * timed_out UNUSED)3243 regmatch(
3244     char_u	*scan,		    // Current node.
3245     proftime_T	*tm UNUSED,	    // timeout limit or NULL
3246     int		*timed_out UNUSED)  // flag set on timeout or NULL
3247 {
3248   char_u	*next;		// Next node.
3249   int		op;
3250   int		c;
3251   regitem_T	*rp;
3252   int		no;
3253   int		status;		// one of the RA_ values:
3254 #ifdef FEAT_RELTIME
3255   int		tm_count = 0;
3256 #endif
3257 
3258   // Make "regstack" and "backpos" empty.  They are allocated and freed in
3259   // bt_regexec_both() to reduce malloc()/free() calls.
3260   regstack.ga_len = 0;
3261   backpos.ga_len = 0;
3262 
3263   // Repeat until "regstack" is empty.
3264   for (;;)
3265   {
3266     // Some patterns may take a long time to match, e.g., "\([a-z]\+\)\+Q".
3267     // Allow interrupting them with CTRL-C.
3268     fast_breakcheck();
3269 
3270 #ifdef DEBUG
3271     if (scan != NULL && regnarrate)
3272     {
3273 	mch_errmsg((char *)regprop(scan));
3274 	mch_errmsg("(\n");
3275     }
3276 #endif
3277 
3278     // Repeat for items that can be matched sequentially, without using the
3279     // regstack.
3280     for (;;)
3281     {
3282 	if (got_int || scan == NULL)
3283 	{
3284 	    status = RA_FAIL;
3285 	    break;
3286 	}
3287 #ifdef FEAT_RELTIME
3288 	// Check for timeout once in a 100 times to avoid overhead.
3289 	if (tm != NULL && ++tm_count == 100)
3290 	{
3291 	    tm_count = 0;
3292 	    if (profile_passed_limit(tm))
3293 	    {
3294 		if (timed_out != NULL)
3295 		    *timed_out = TRUE;
3296 		status = RA_FAIL;
3297 		break;
3298 	    }
3299 	}
3300 #endif
3301 	status = RA_CONT;
3302 
3303 #ifdef DEBUG
3304 	if (regnarrate)
3305 	{
3306 	    mch_errmsg((char *)regprop(scan));
3307 	    mch_errmsg("...\n");
3308 # ifdef FEAT_SYN_HL
3309 	    if (re_extmatch_in != NULL)
3310 	    {
3311 		int i;
3312 
3313 		mch_errmsg(_("External submatches:\n"));
3314 		for (i = 0; i < NSUBEXP; i++)
3315 		{
3316 		    mch_errmsg("    \"");
3317 		    if (re_extmatch_in->matches[i] != NULL)
3318 			mch_errmsg((char *)re_extmatch_in->matches[i]);
3319 		    mch_errmsg("\"\n");
3320 		}
3321 	    }
3322 # endif
3323 	}
3324 #endif
3325 	next = regnext(scan);
3326 
3327 	op = OP(scan);
3328 	// Check for character class with NL added.
3329 	if (!rex.reg_line_lbr && WITH_NL(op) && REG_MULTI
3330 			     && *rex.input == NUL && rex.lnum <= rex.reg_maxline)
3331 	{
3332 	    reg_nextline();
3333 	}
3334 	else if (rex.reg_line_lbr && WITH_NL(op) && *rex.input == '\n')
3335 	{
3336 	    ADVANCE_REGINPUT();
3337 	}
3338 	else
3339 	{
3340 	  if (WITH_NL(op))
3341 	      op -= ADD_NL;
3342 	  if (has_mbyte)
3343 	      c = (*mb_ptr2char)(rex.input);
3344 	  else
3345 	      c = *rex.input;
3346 	  switch (op)
3347 	  {
3348 	  case BOL:
3349 	    if (rex.input != rex.line)
3350 		status = RA_NOMATCH;
3351 	    break;
3352 
3353 	  case EOL:
3354 	    if (c != NUL)
3355 		status = RA_NOMATCH;
3356 	    break;
3357 
3358 	  case RE_BOF:
3359 	    // We're not at the beginning of the file when below the first
3360 	    // line where we started, not at the start of the line or we
3361 	    // didn't start at the first line of the buffer.
3362 	    if (rex.lnum != 0 || rex.input != rex.line
3363 				       || (REG_MULTI && rex.reg_firstlnum > 1))
3364 		status = RA_NOMATCH;
3365 	    break;
3366 
3367 	  case RE_EOF:
3368 	    if (rex.lnum != rex.reg_maxline || c != NUL)
3369 		status = RA_NOMATCH;
3370 	    break;
3371 
3372 	  case CURSOR:
3373 	    // Check if the buffer is in a window and compare the
3374 	    // rex.reg_win->w_cursor position to the match position.
3375 	    if (rex.reg_win == NULL
3376 		    || (rex.lnum + rex.reg_firstlnum
3377 						 != rex.reg_win->w_cursor.lnum)
3378 		    || ((colnr_T)(rex.input - rex.line)
3379 						 != rex.reg_win->w_cursor.col))
3380 		status = RA_NOMATCH;
3381 	    break;
3382 
3383 	  case RE_MARK:
3384 	    // Compare the mark position to the match position.
3385 	    {
3386 		int	mark = OPERAND(scan)[0];
3387 		int	cmp = OPERAND(scan)[1];
3388 		pos_T	*pos;
3389 
3390 		pos = getmark_buf(rex.reg_buf, mark, FALSE);
3391 		if (pos == NULL		     // mark doesn't exist
3392 			|| pos->lnum <= 0)   // mark isn't set in reg_buf
3393 		{
3394 		    status = RA_NOMATCH;
3395 		}
3396 		else
3397 		{
3398 		    colnr_T pos_col = pos->lnum == rex.lnum + rex.reg_firstlnum
3399 							  && pos->col == MAXCOL
3400 				      ? (colnr_T)STRLEN(reg_getline(
3401 						pos->lnum - rex.reg_firstlnum))
3402 				      : pos->col;
3403 
3404 		    if ((pos->lnum == rex.lnum + rex.reg_firstlnum
3405 				? (pos_col == (colnr_T)(rex.input - rex.line)
3406 				    ? (cmp == '<' || cmp == '>')
3407 				    : (pos_col < (colnr_T)(rex.input - rex.line)
3408 					? cmp != '>'
3409 					: cmp != '<'))
3410 				: (pos->lnum < rex.lnum + rex.reg_firstlnum
3411 				    ? cmp != '>'
3412 				    : cmp != '<')))
3413 		    status = RA_NOMATCH;
3414 		}
3415 	    }
3416 	    break;
3417 
3418 	  case RE_VISUAL:
3419 	    if (!reg_match_visual())
3420 		status = RA_NOMATCH;
3421 	    break;
3422 
3423 	  case RE_LNUM:
3424 	    if (!REG_MULTI || !re_num_cmp((long_u)(rex.lnum + rex.reg_firstlnum),
3425 									scan))
3426 		status = RA_NOMATCH;
3427 	    break;
3428 
3429 	  case RE_COL:
3430 	    if (!re_num_cmp((long_u)(rex.input - rex.line) + 1, scan))
3431 		status = RA_NOMATCH;
3432 	    break;
3433 
3434 	  case RE_VCOL:
3435 	    if (!re_num_cmp((long_u)win_linetabsize(
3436 			    rex.reg_win == NULL ? curwin : rex.reg_win,
3437 			    rex.line, (colnr_T)(rex.input - rex.line)) + 1, scan))
3438 		status = RA_NOMATCH;
3439 	    break;
3440 
3441 	  case BOW:	// \<word; rex.input points to w
3442 	    if (c == NUL)	// Can't match at end of line
3443 		status = RA_NOMATCH;
3444 	    else if (has_mbyte)
3445 	    {
3446 		int this_class;
3447 
3448 		// Get class of current and previous char (if it exists).
3449 		this_class = mb_get_class_buf(rex.input, rex.reg_buf);
3450 		if (this_class <= 1)
3451 		    status = RA_NOMATCH;  // not on a word at all
3452 		else if (reg_prev_class() == this_class)
3453 		    status = RA_NOMATCH;  // previous char is in same word
3454 	    }
3455 	    else
3456 	    {
3457 		if (!vim_iswordc_buf(c, rex.reg_buf) || (rex.input > rex.line
3458 				&& vim_iswordc_buf(rex.input[-1], rex.reg_buf)))
3459 		    status = RA_NOMATCH;
3460 	    }
3461 	    break;
3462 
3463 	  case EOW:	// word\>; rex.input points after d
3464 	    if (rex.input == rex.line)    // Can't match at start of line
3465 		status = RA_NOMATCH;
3466 	    else if (has_mbyte)
3467 	    {
3468 		int this_class, prev_class;
3469 
3470 		// Get class of current and previous char (if it exists).
3471 		this_class = mb_get_class_buf(rex.input, rex.reg_buf);
3472 		prev_class = reg_prev_class();
3473 		if (this_class == prev_class
3474 			|| prev_class == 0 || prev_class == 1)
3475 		    status = RA_NOMATCH;
3476 	    }
3477 	    else
3478 	    {
3479 		if (!vim_iswordc_buf(rex.input[-1], rex.reg_buf)
3480 			|| (rex.input[0] != NUL
3481 					   && vim_iswordc_buf(c, rex.reg_buf)))
3482 		    status = RA_NOMATCH;
3483 	    }
3484 	    break; // Matched with EOW
3485 
3486 	  case ANY:
3487 	    // ANY does not match new lines.
3488 	    if (c == NUL)
3489 		status = RA_NOMATCH;
3490 	    else
3491 		ADVANCE_REGINPUT();
3492 	    break;
3493 
3494 	  case IDENT:
3495 	    if (!vim_isIDc(c))
3496 		status = RA_NOMATCH;
3497 	    else
3498 		ADVANCE_REGINPUT();
3499 	    break;
3500 
3501 	  case SIDENT:
3502 	    if (VIM_ISDIGIT(*rex.input) || !vim_isIDc(c))
3503 		status = RA_NOMATCH;
3504 	    else
3505 		ADVANCE_REGINPUT();
3506 	    break;
3507 
3508 	  case KWORD:
3509 	    if (!vim_iswordp_buf(rex.input, rex.reg_buf))
3510 		status = RA_NOMATCH;
3511 	    else
3512 		ADVANCE_REGINPUT();
3513 	    break;
3514 
3515 	  case SKWORD:
3516 	    if (VIM_ISDIGIT(*rex.input)
3517 				    || !vim_iswordp_buf(rex.input, rex.reg_buf))
3518 		status = RA_NOMATCH;
3519 	    else
3520 		ADVANCE_REGINPUT();
3521 	    break;
3522 
3523 	  case FNAME:
3524 	    if (!vim_isfilec(c))
3525 		status = RA_NOMATCH;
3526 	    else
3527 		ADVANCE_REGINPUT();
3528 	    break;
3529 
3530 	  case SFNAME:
3531 	    if (VIM_ISDIGIT(*rex.input) || !vim_isfilec(c))
3532 		status = RA_NOMATCH;
3533 	    else
3534 		ADVANCE_REGINPUT();
3535 	    break;
3536 
3537 	  case PRINT:
3538 	    if (!vim_isprintc(PTR2CHAR(rex.input)))
3539 		status = RA_NOMATCH;
3540 	    else
3541 		ADVANCE_REGINPUT();
3542 	    break;
3543 
3544 	  case SPRINT:
3545 	    if (VIM_ISDIGIT(*rex.input) || !vim_isprintc(PTR2CHAR(rex.input)))
3546 		status = RA_NOMATCH;
3547 	    else
3548 		ADVANCE_REGINPUT();
3549 	    break;
3550 
3551 	  case WHITE:
3552 	    if (!VIM_ISWHITE(c))
3553 		status = RA_NOMATCH;
3554 	    else
3555 		ADVANCE_REGINPUT();
3556 	    break;
3557 
3558 	  case NWHITE:
3559 	    if (c == NUL || VIM_ISWHITE(c))
3560 		status = RA_NOMATCH;
3561 	    else
3562 		ADVANCE_REGINPUT();
3563 	    break;
3564 
3565 	  case DIGIT:
3566 	    if (!ri_digit(c))
3567 		status = RA_NOMATCH;
3568 	    else
3569 		ADVANCE_REGINPUT();
3570 	    break;
3571 
3572 	  case NDIGIT:
3573 	    if (c == NUL || ri_digit(c))
3574 		status = RA_NOMATCH;
3575 	    else
3576 		ADVANCE_REGINPUT();
3577 	    break;
3578 
3579 	  case HEX:
3580 	    if (!ri_hex(c))
3581 		status = RA_NOMATCH;
3582 	    else
3583 		ADVANCE_REGINPUT();
3584 	    break;
3585 
3586 	  case NHEX:
3587 	    if (c == NUL || ri_hex(c))
3588 		status = RA_NOMATCH;
3589 	    else
3590 		ADVANCE_REGINPUT();
3591 	    break;
3592 
3593 	  case OCTAL:
3594 	    if (!ri_octal(c))
3595 		status = RA_NOMATCH;
3596 	    else
3597 		ADVANCE_REGINPUT();
3598 	    break;
3599 
3600 	  case NOCTAL:
3601 	    if (c == NUL || ri_octal(c))
3602 		status = RA_NOMATCH;
3603 	    else
3604 		ADVANCE_REGINPUT();
3605 	    break;
3606 
3607 	  case WORD:
3608 	    if (!ri_word(c))
3609 		status = RA_NOMATCH;
3610 	    else
3611 		ADVANCE_REGINPUT();
3612 	    break;
3613 
3614 	  case NWORD:
3615 	    if (c == NUL || ri_word(c))
3616 		status = RA_NOMATCH;
3617 	    else
3618 		ADVANCE_REGINPUT();
3619 	    break;
3620 
3621 	  case HEAD:
3622 	    if (!ri_head(c))
3623 		status = RA_NOMATCH;
3624 	    else
3625 		ADVANCE_REGINPUT();
3626 	    break;
3627 
3628 	  case NHEAD:
3629 	    if (c == NUL || ri_head(c))
3630 		status = RA_NOMATCH;
3631 	    else
3632 		ADVANCE_REGINPUT();
3633 	    break;
3634 
3635 	  case ALPHA:
3636 	    if (!ri_alpha(c))
3637 		status = RA_NOMATCH;
3638 	    else
3639 		ADVANCE_REGINPUT();
3640 	    break;
3641 
3642 	  case NALPHA:
3643 	    if (c == NUL || ri_alpha(c))
3644 		status = RA_NOMATCH;
3645 	    else
3646 		ADVANCE_REGINPUT();
3647 	    break;
3648 
3649 	  case LOWER:
3650 	    if (!ri_lower(c))
3651 		status = RA_NOMATCH;
3652 	    else
3653 		ADVANCE_REGINPUT();
3654 	    break;
3655 
3656 	  case NLOWER:
3657 	    if (c == NUL || ri_lower(c))
3658 		status = RA_NOMATCH;
3659 	    else
3660 		ADVANCE_REGINPUT();
3661 	    break;
3662 
3663 	  case UPPER:
3664 	    if (!ri_upper(c))
3665 		status = RA_NOMATCH;
3666 	    else
3667 		ADVANCE_REGINPUT();
3668 	    break;
3669 
3670 	  case NUPPER:
3671 	    if (c == NUL || ri_upper(c))
3672 		status = RA_NOMATCH;
3673 	    else
3674 		ADVANCE_REGINPUT();
3675 	    break;
3676 
3677 	  case EXACTLY:
3678 	    {
3679 		int	len;
3680 		char_u	*opnd;
3681 
3682 		opnd = OPERAND(scan);
3683 		// Inline the first byte, for speed.
3684 		if (*opnd != *rex.input
3685 			&& (!rex.reg_ic
3686 			    || (!enc_utf8
3687 			      && MB_TOLOWER(*opnd) != MB_TOLOWER(*rex.input))))
3688 		    status = RA_NOMATCH;
3689 		else if (*opnd == NUL)
3690 		{
3691 		    // match empty string always works; happens when "~" is
3692 		    // empty.
3693 		}
3694 		else
3695 		{
3696 		    if (opnd[1] == NUL && !(enc_utf8 && rex.reg_ic))
3697 		    {
3698 			len = 1;	// matched a single byte above
3699 		    }
3700 		    else
3701 		    {
3702 			// Need to match first byte again for multi-byte.
3703 			len = (int)STRLEN(opnd);
3704 			if (cstrncmp(opnd, rex.input, &len) != 0)
3705 			    status = RA_NOMATCH;
3706 		    }
3707 		    // Check for following composing character, unless %C
3708 		    // follows (skips over all composing chars).
3709 		    if (status != RA_NOMATCH
3710 			    && enc_utf8
3711 			    && UTF_COMPOSINGLIKE(rex.input, rex.input + len)
3712 			    && !rex.reg_icombine
3713 			    && OP(next) != RE_COMPOSING)
3714 		    {
3715 			// raaron: This code makes a composing character get
3716 			// ignored, which is the correct behavior (sometimes)
3717 			// for voweled Hebrew texts.
3718 			status = RA_NOMATCH;
3719 		    }
3720 		    if (status != RA_NOMATCH)
3721 			rex.input += len;
3722 		}
3723 	    }
3724 	    break;
3725 
3726 	  case ANYOF:
3727 	  case ANYBUT:
3728 	    if (c == NUL)
3729 		status = RA_NOMATCH;
3730 	    else if ((cstrchr(OPERAND(scan), c) == NULL) == (op == ANYOF))
3731 		status = RA_NOMATCH;
3732 	    else
3733 		ADVANCE_REGINPUT();
3734 	    break;
3735 
3736 	  case MULTIBYTECODE:
3737 	    if (has_mbyte)
3738 	    {
3739 		int	i, len;
3740 		char_u	*opnd;
3741 		int	opndc = 0, inpc;
3742 
3743 		opnd = OPERAND(scan);
3744 		// Safety check (just in case 'encoding' was changed since
3745 		// compiling the program).
3746 		if ((len = (*mb_ptr2len)(opnd)) < 2)
3747 		{
3748 		    status = RA_NOMATCH;
3749 		    break;
3750 		}
3751 		if (enc_utf8)
3752 		    opndc = utf_ptr2char(opnd);
3753 		if (enc_utf8 && utf_iscomposing(opndc))
3754 		{
3755 		    // When only a composing char is given match at any
3756 		    // position where that composing char appears.
3757 		    status = RA_NOMATCH;
3758 		    for (i = 0; rex.input[i] != NUL;
3759 						i += utf_ptr2len(rex.input + i))
3760 		    {
3761 			inpc = utf_ptr2char(rex.input + i);
3762 			if (!utf_iscomposing(inpc))
3763 			{
3764 			    if (i > 0)
3765 				break;
3766 			}
3767 			else if (opndc == inpc)
3768 			{
3769 			    // Include all following composing chars.
3770 			    len = i + utfc_ptr2len(rex.input + i);
3771 			    status = RA_MATCH;
3772 			    break;
3773 			}
3774 		    }
3775 		}
3776 		else
3777 		    for (i = 0; i < len; ++i)
3778 			if (opnd[i] != rex.input[i])
3779 			{
3780 			    status = RA_NOMATCH;
3781 			    break;
3782 			}
3783 		rex.input += len;
3784 	    }
3785 	    else
3786 		status = RA_NOMATCH;
3787 	    break;
3788 	  case RE_COMPOSING:
3789 	    if (enc_utf8)
3790 	    {
3791 		// Skip composing characters.
3792 		while (utf_iscomposing(utf_ptr2char(rex.input)))
3793 		    MB_CPTR_ADV(rex.input);
3794 	    }
3795 	    break;
3796 
3797 	  case NOTHING:
3798 	    break;
3799 
3800 	  case BACK:
3801 	    {
3802 		int		i;
3803 		backpos_T	*bp;
3804 
3805 		// When we run into BACK we need to check if we don't keep
3806 		// looping without matching any input.  The second and later
3807 		// times a BACK is encountered it fails if the input is still
3808 		// at the same position as the previous time.
3809 		// The positions are stored in "backpos" and found by the
3810 		// current value of "scan", the position in the RE program.
3811 		bp = (backpos_T *)backpos.ga_data;
3812 		for (i = 0; i < backpos.ga_len; ++i)
3813 		    if (bp[i].bp_scan == scan)
3814 			break;
3815 		if (i == backpos.ga_len)
3816 		{
3817 		    // First time at this BACK, make room to store the pos.
3818 		    if (ga_grow(&backpos, 1) == FAIL)
3819 			status = RA_FAIL;
3820 		    else
3821 		    {
3822 			// get "ga_data" again, it may have changed
3823 			bp = (backpos_T *)backpos.ga_data;
3824 			bp[i].bp_scan = scan;
3825 			++backpos.ga_len;
3826 		    }
3827 		}
3828 		else if (reg_save_equal(&bp[i].bp_pos))
3829 		    // Still at same position as last time, fail.
3830 		    status = RA_NOMATCH;
3831 
3832 		if (status != RA_FAIL && status != RA_NOMATCH)
3833 		    reg_save(&bp[i].bp_pos, &backpos);
3834 	    }
3835 	    break;
3836 
3837 	  case MOPEN + 0:   // Match start: \zs
3838 	  case MOPEN + 1:   // \(
3839 	  case MOPEN + 2:
3840 	  case MOPEN + 3:
3841 	  case MOPEN + 4:
3842 	  case MOPEN + 5:
3843 	  case MOPEN + 6:
3844 	  case MOPEN + 7:
3845 	  case MOPEN + 8:
3846 	  case MOPEN + 9:
3847 	    {
3848 		no = op - MOPEN;
3849 		cleanup_subexpr();
3850 		rp = regstack_push(RS_MOPEN, scan);
3851 		if (rp == NULL)
3852 		    status = RA_FAIL;
3853 		else
3854 		{
3855 		    rp->rs_no = no;
3856 		    save_se(&rp->rs_un.sesave, &rex.reg_startpos[no],
3857 							  &rex.reg_startp[no]);
3858 		    // We simply continue and handle the result when done.
3859 		}
3860 	    }
3861 	    break;
3862 
3863 	  case NOPEN:	    // \%(
3864 	  case NCLOSE:	    // \) after \%(
3865 		if (regstack_push(RS_NOPEN, scan) == NULL)
3866 		    status = RA_FAIL;
3867 		// We simply continue and handle the result when done.
3868 		break;
3869 
3870 #ifdef FEAT_SYN_HL
3871 	  case ZOPEN + 1:
3872 	  case ZOPEN + 2:
3873 	  case ZOPEN + 3:
3874 	  case ZOPEN + 4:
3875 	  case ZOPEN + 5:
3876 	  case ZOPEN + 6:
3877 	  case ZOPEN + 7:
3878 	  case ZOPEN + 8:
3879 	  case ZOPEN + 9:
3880 	    {
3881 		no = op - ZOPEN;
3882 		cleanup_zsubexpr();
3883 		rp = regstack_push(RS_ZOPEN, scan);
3884 		if (rp == NULL)
3885 		    status = RA_FAIL;
3886 		else
3887 		{
3888 		    rp->rs_no = no;
3889 		    save_se(&rp->rs_un.sesave, &reg_startzpos[no],
3890 							     &reg_startzp[no]);
3891 		    // We simply continue and handle the result when done.
3892 		}
3893 	    }
3894 	    break;
3895 #endif
3896 
3897 	  case MCLOSE + 0:  // Match end: \ze
3898 	  case MCLOSE + 1:  // \)
3899 	  case MCLOSE + 2:
3900 	  case MCLOSE + 3:
3901 	  case MCLOSE + 4:
3902 	  case MCLOSE + 5:
3903 	  case MCLOSE + 6:
3904 	  case MCLOSE + 7:
3905 	  case MCLOSE + 8:
3906 	  case MCLOSE + 9:
3907 	    {
3908 		no = op - MCLOSE;
3909 		cleanup_subexpr();
3910 		rp = regstack_push(RS_MCLOSE, scan);
3911 		if (rp == NULL)
3912 		    status = RA_FAIL;
3913 		else
3914 		{
3915 		    rp->rs_no = no;
3916 		    save_se(&rp->rs_un.sesave, &rex.reg_endpos[no],
3917 							    &rex.reg_endp[no]);
3918 		    // We simply continue and handle the result when done.
3919 		}
3920 	    }
3921 	    break;
3922 
3923 #ifdef FEAT_SYN_HL
3924 	  case ZCLOSE + 1:  // \) after \z(
3925 	  case ZCLOSE + 2:
3926 	  case ZCLOSE + 3:
3927 	  case ZCLOSE + 4:
3928 	  case ZCLOSE + 5:
3929 	  case ZCLOSE + 6:
3930 	  case ZCLOSE + 7:
3931 	  case ZCLOSE + 8:
3932 	  case ZCLOSE + 9:
3933 	    {
3934 		no = op - ZCLOSE;
3935 		cleanup_zsubexpr();
3936 		rp = regstack_push(RS_ZCLOSE, scan);
3937 		if (rp == NULL)
3938 		    status = RA_FAIL;
3939 		else
3940 		{
3941 		    rp->rs_no = no;
3942 		    save_se(&rp->rs_un.sesave, &reg_endzpos[no],
3943 							      &reg_endzp[no]);
3944 		    // We simply continue and handle the result when done.
3945 		}
3946 	    }
3947 	    break;
3948 #endif
3949 
3950 	  case BACKREF + 1:
3951 	  case BACKREF + 2:
3952 	  case BACKREF + 3:
3953 	  case BACKREF + 4:
3954 	  case BACKREF + 5:
3955 	  case BACKREF + 6:
3956 	  case BACKREF + 7:
3957 	  case BACKREF + 8:
3958 	  case BACKREF + 9:
3959 	    {
3960 		int		len;
3961 
3962 		no = op - BACKREF;
3963 		cleanup_subexpr();
3964 		if (!REG_MULTI)		// Single-line regexp
3965 		{
3966 		    if (rex.reg_startp[no] == NULL || rex.reg_endp[no] == NULL)
3967 		    {
3968 			// Backref was not set: Match an empty string.
3969 			len = 0;
3970 		    }
3971 		    else
3972 		    {
3973 			// Compare current input with back-ref in the same
3974 			// line.
3975 			len = (int)(rex.reg_endp[no] - rex.reg_startp[no]);
3976 			if (cstrncmp(rex.reg_startp[no], rex.input, &len) != 0)
3977 			    status = RA_NOMATCH;
3978 		    }
3979 		}
3980 		else				// Multi-line regexp
3981 		{
3982 		    if (rex.reg_startpos[no].lnum < 0
3983 						|| rex.reg_endpos[no].lnum < 0)
3984 		    {
3985 			// Backref was not set: Match an empty string.
3986 			len = 0;
3987 		    }
3988 		    else
3989 		    {
3990 			if (rex.reg_startpos[no].lnum == rex.lnum
3991 				&& rex.reg_endpos[no].lnum == rex.lnum)
3992 			{
3993 			    // Compare back-ref within the current line.
3994 			    len = rex.reg_endpos[no].col
3995 						    - rex.reg_startpos[no].col;
3996 			    if (cstrncmp(rex.line + rex.reg_startpos[no].col,
3997 							  rex.input, &len) != 0)
3998 				status = RA_NOMATCH;
3999 			}
4000 			else
4001 			{
4002 			    // Messy situation: Need to compare between two
4003 			    // lines.
4004 			    int r = match_with_backref(
4005 					    rex.reg_startpos[no].lnum,
4006 					    rex.reg_startpos[no].col,
4007 					    rex.reg_endpos[no].lnum,
4008 					    rex.reg_endpos[no].col,
4009 					    &len);
4010 
4011 			    if (r != RA_MATCH)
4012 				status = r;
4013 			}
4014 		    }
4015 		}
4016 
4017 		// Matched the backref, skip over it.
4018 		rex.input += len;
4019 	    }
4020 	    break;
4021 
4022 #ifdef FEAT_SYN_HL
4023 	  case ZREF + 1:
4024 	  case ZREF + 2:
4025 	  case ZREF + 3:
4026 	  case ZREF + 4:
4027 	  case ZREF + 5:
4028 	  case ZREF + 6:
4029 	  case ZREF + 7:
4030 	  case ZREF + 8:
4031 	  case ZREF + 9:
4032 	    {
4033 		int	len;
4034 
4035 		cleanup_zsubexpr();
4036 		no = op - ZREF;
4037 		if (re_extmatch_in != NULL
4038 			&& re_extmatch_in->matches[no] != NULL)
4039 		{
4040 		    len = (int)STRLEN(re_extmatch_in->matches[no]);
4041 		    if (cstrncmp(re_extmatch_in->matches[no],
4042 							  rex.input, &len) != 0)
4043 			status = RA_NOMATCH;
4044 		    else
4045 			rex.input += len;
4046 		}
4047 		else
4048 		{
4049 		    // Backref was not set: Match an empty string.
4050 		}
4051 	    }
4052 	    break;
4053 #endif
4054 
4055 	  case BRANCH:
4056 	    {
4057 		if (OP(next) != BRANCH) // No choice.
4058 		    next = OPERAND(scan);	// Avoid recursion.
4059 		else
4060 		{
4061 		    rp = regstack_push(RS_BRANCH, scan);
4062 		    if (rp == NULL)
4063 			status = RA_FAIL;
4064 		    else
4065 			status = RA_BREAK;	// rest is below
4066 		}
4067 	    }
4068 	    break;
4069 
4070 	  case BRACE_LIMITS:
4071 	    {
4072 		if (OP(next) == BRACE_SIMPLE)
4073 		{
4074 		    bl_minval = OPERAND_MIN(scan);
4075 		    bl_maxval = OPERAND_MAX(scan);
4076 		}
4077 		else if (OP(next) >= BRACE_COMPLEX
4078 			&& OP(next) < BRACE_COMPLEX + 10)
4079 		{
4080 		    no = OP(next) - BRACE_COMPLEX;
4081 		    brace_min[no] = OPERAND_MIN(scan);
4082 		    brace_max[no] = OPERAND_MAX(scan);
4083 		    brace_count[no] = 0;
4084 		}
4085 		else
4086 		{
4087 		    internal_error("BRACE_LIMITS");
4088 		    status = RA_FAIL;
4089 		}
4090 	    }
4091 	    break;
4092 
4093 	  case BRACE_COMPLEX + 0:
4094 	  case BRACE_COMPLEX + 1:
4095 	  case BRACE_COMPLEX + 2:
4096 	  case BRACE_COMPLEX + 3:
4097 	  case BRACE_COMPLEX + 4:
4098 	  case BRACE_COMPLEX + 5:
4099 	  case BRACE_COMPLEX + 6:
4100 	  case BRACE_COMPLEX + 7:
4101 	  case BRACE_COMPLEX + 8:
4102 	  case BRACE_COMPLEX + 9:
4103 	    {
4104 		no = op - BRACE_COMPLEX;
4105 		++brace_count[no];
4106 
4107 		// If not matched enough times yet, try one more
4108 		if (brace_count[no] <= (brace_min[no] <= brace_max[no]
4109 					     ? brace_min[no] : brace_max[no]))
4110 		{
4111 		    rp = regstack_push(RS_BRCPLX_MORE, scan);
4112 		    if (rp == NULL)
4113 			status = RA_FAIL;
4114 		    else
4115 		    {
4116 			rp->rs_no = no;
4117 			reg_save(&rp->rs_un.regsave, &backpos);
4118 			next = OPERAND(scan);
4119 			// We continue and handle the result when done.
4120 		    }
4121 		    break;
4122 		}
4123 
4124 		// If matched enough times, may try matching some more
4125 		if (brace_min[no] <= brace_max[no])
4126 		{
4127 		    // Range is the normal way around, use longest match
4128 		    if (brace_count[no] <= brace_max[no])
4129 		    {
4130 			rp = regstack_push(RS_BRCPLX_LONG, scan);
4131 			if (rp == NULL)
4132 			    status = RA_FAIL;
4133 			else
4134 			{
4135 			    rp->rs_no = no;
4136 			    reg_save(&rp->rs_un.regsave, &backpos);
4137 			    next = OPERAND(scan);
4138 			    // We continue and handle the result when done.
4139 			}
4140 		    }
4141 		}
4142 		else
4143 		{
4144 		    // Range is backwards, use shortest match first
4145 		    if (brace_count[no] <= brace_min[no])
4146 		    {
4147 			rp = regstack_push(RS_BRCPLX_SHORT, scan);
4148 			if (rp == NULL)
4149 			    status = RA_FAIL;
4150 			else
4151 			{
4152 			    reg_save(&rp->rs_un.regsave, &backpos);
4153 			    // We continue and handle the result when done.
4154 			}
4155 		    }
4156 		}
4157 	    }
4158 	    break;
4159 
4160 	  case BRACE_SIMPLE:
4161 	  case STAR:
4162 	  case PLUS:
4163 	    {
4164 		regstar_T	rst;
4165 
4166 		// Lookahead to avoid useless match attempts when we know
4167 		// what character comes next.
4168 		if (OP(next) == EXACTLY)
4169 		{
4170 		    rst.nextb = *OPERAND(next);
4171 		    if (rex.reg_ic)
4172 		    {
4173 			if (MB_ISUPPER(rst.nextb))
4174 			    rst.nextb_ic = MB_TOLOWER(rst.nextb);
4175 			else
4176 			    rst.nextb_ic = MB_TOUPPER(rst.nextb);
4177 		    }
4178 		    else
4179 			rst.nextb_ic = rst.nextb;
4180 		}
4181 		else
4182 		{
4183 		    rst.nextb = NUL;
4184 		    rst.nextb_ic = NUL;
4185 		}
4186 		if (op != BRACE_SIMPLE)
4187 		{
4188 		    rst.minval = (op == STAR) ? 0 : 1;
4189 		    rst.maxval = MAX_LIMIT;
4190 		}
4191 		else
4192 		{
4193 		    rst.minval = bl_minval;
4194 		    rst.maxval = bl_maxval;
4195 		}
4196 
4197 		// When maxval > minval, try matching as much as possible, up
4198 		// to maxval.  When maxval < minval, try matching at least the
4199 		// minimal number (since the range is backwards, that's also
4200 		// maxval!).
4201 		rst.count = regrepeat(OPERAND(scan), rst.maxval);
4202 		if (got_int)
4203 		{
4204 		    status = RA_FAIL;
4205 		    break;
4206 		}
4207 		if (rst.minval <= rst.maxval
4208 			  ? rst.count >= rst.minval : rst.count >= rst.maxval)
4209 		{
4210 		    // It could match.  Prepare for trying to match what
4211 		    // follows.  The code is below.  Parameters are stored in
4212 		    // a regstar_T on the regstack.
4213 		    if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
4214 		    {
4215 			emsg(_(e_maxmempat));
4216 			status = RA_FAIL;
4217 		    }
4218 		    else if (ga_grow(&regstack, sizeof(regstar_T)) == FAIL)
4219 			status = RA_FAIL;
4220 		    else
4221 		    {
4222 			regstack.ga_len += sizeof(regstar_T);
4223 			rp = regstack_push(rst.minval <= rst.maxval
4224 					? RS_STAR_LONG : RS_STAR_SHORT, scan);
4225 			if (rp == NULL)
4226 			    status = RA_FAIL;
4227 			else
4228 			{
4229 			    *(((regstar_T *)rp) - 1) = rst;
4230 			    status = RA_BREAK;	    // skip the restore bits
4231 			}
4232 		    }
4233 		}
4234 		else
4235 		    status = RA_NOMATCH;
4236 
4237 	    }
4238 	    break;
4239 
4240 	  case NOMATCH:
4241 	  case MATCH:
4242 	  case SUBPAT:
4243 	    rp = regstack_push(RS_NOMATCH, scan);
4244 	    if (rp == NULL)
4245 		status = RA_FAIL;
4246 	    else
4247 	    {
4248 		rp->rs_no = op;
4249 		reg_save(&rp->rs_un.regsave, &backpos);
4250 		next = OPERAND(scan);
4251 		// We continue and handle the result when done.
4252 	    }
4253 	    break;
4254 
4255 	  case BEHIND:
4256 	  case NOBEHIND:
4257 	    // Need a bit of room to store extra positions.
4258 	    if ((long)((unsigned)regstack.ga_len >> 10) >= p_mmp)
4259 	    {
4260 		emsg(_(e_maxmempat));
4261 		status = RA_FAIL;
4262 	    }
4263 	    else if (ga_grow(&regstack, sizeof(regbehind_T)) == FAIL)
4264 		status = RA_FAIL;
4265 	    else
4266 	    {
4267 		regstack.ga_len += sizeof(regbehind_T);
4268 		rp = regstack_push(RS_BEHIND1, scan);
4269 		if (rp == NULL)
4270 		    status = RA_FAIL;
4271 		else
4272 		{
4273 		    // Need to save the subexpr to be able to restore them
4274 		    // when there is a match but we don't use it.
4275 		    save_subexpr(((regbehind_T *)rp) - 1);
4276 
4277 		    rp->rs_no = op;
4278 		    reg_save(&rp->rs_un.regsave, &backpos);
4279 		    // First try if what follows matches.  If it does then we
4280 		    // check the behind match by looping.
4281 		}
4282 	    }
4283 	    break;
4284 
4285 	  case BHPOS:
4286 	    if (REG_MULTI)
4287 	    {
4288 		if (behind_pos.rs_u.pos.col != (colnr_T)(rex.input - rex.line)
4289 			|| behind_pos.rs_u.pos.lnum != rex.lnum)
4290 		    status = RA_NOMATCH;
4291 	    }
4292 	    else if (behind_pos.rs_u.ptr != rex.input)
4293 		status = RA_NOMATCH;
4294 	    break;
4295 
4296 	  case NEWL:
4297 	    if ((c != NUL || !REG_MULTI || rex.lnum > rex.reg_maxline
4298 			     || rex.reg_line_lbr)
4299 					   && (c != '\n' || !rex.reg_line_lbr))
4300 		status = RA_NOMATCH;
4301 	    else if (rex.reg_line_lbr)
4302 		ADVANCE_REGINPUT();
4303 	    else
4304 		reg_nextline();
4305 	    break;
4306 
4307 	  case END:
4308 	    status = RA_MATCH;	// Success!
4309 	    break;
4310 
4311 	  default:
4312 	    iemsg(_(e_corrupted_regexp_program));
4313 #ifdef DEBUG
4314 	    printf("Illegal op code %d\n", op);
4315 #endif
4316 	    status = RA_FAIL;
4317 	    break;
4318 	  }
4319 	}
4320 
4321 	// If we can't continue sequentially, break the inner loop.
4322 	if (status != RA_CONT)
4323 	    break;
4324 
4325 	// Continue in inner loop, advance to next item.
4326 	scan = next;
4327 
4328     } // end of inner loop
4329 
4330     // If there is something on the regstack execute the code for the state.
4331     // If the state is popped then loop and use the older state.
4332     while (regstack.ga_len > 0 && status != RA_FAIL)
4333     {
4334 	rp = (regitem_T *)((char *)regstack.ga_data + regstack.ga_len) - 1;
4335 	switch (rp->rs_state)
4336 	{
4337 	  case RS_NOPEN:
4338 	    // Result is passed on as-is, simply pop the state.
4339 	    regstack_pop(&scan);
4340 	    break;
4341 
4342 	  case RS_MOPEN:
4343 	    // Pop the state.  Restore pointers when there is no match.
4344 	    if (status == RA_NOMATCH)
4345 		restore_se(&rp->rs_un.sesave, &rex.reg_startpos[rp->rs_no],
4346 						  &rex.reg_startp[rp->rs_no]);
4347 	    regstack_pop(&scan);
4348 	    break;
4349 
4350 #ifdef FEAT_SYN_HL
4351 	  case RS_ZOPEN:
4352 	    // Pop the state.  Restore pointers when there is no match.
4353 	    if (status == RA_NOMATCH)
4354 		restore_se(&rp->rs_un.sesave, &reg_startzpos[rp->rs_no],
4355 						 &reg_startzp[rp->rs_no]);
4356 	    regstack_pop(&scan);
4357 	    break;
4358 #endif
4359 
4360 	  case RS_MCLOSE:
4361 	    // Pop the state.  Restore pointers when there is no match.
4362 	    if (status == RA_NOMATCH)
4363 		restore_se(&rp->rs_un.sesave, &rex.reg_endpos[rp->rs_no],
4364 						    &rex.reg_endp[rp->rs_no]);
4365 	    regstack_pop(&scan);
4366 	    break;
4367 
4368 #ifdef FEAT_SYN_HL
4369 	  case RS_ZCLOSE:
4370 	    // Pop the state.  Restore pointers when there is no match.
4371 	    if (status == RA_NOMATCH)
4372 		restore_se(&rp->rs_un.sesave, &reg_endzpos[rp->rs_no],
4373 						   &reg_endzp[rp->rs_no]);
4374 	    regstack_pop(&scan);
4375 	    break;
4376 #endif
4377 
4378 	  case RS_BRANCH:
4379 	    if (status == RA_MATCH)
4380 		// this branch matched, use it
4381 		regstack_pop(&scan);
4382 	    else
4383 	    {
4384 		if (status != RA_BREAK)
4385 		{
4386 		    // After a non-matching branch: try next one.
4387 		    reg_restore(&rp->rs_un.regsave, &backpos);
4388 		    scan = rp->rs_scan;
4389 		}
4390 		if (scan == NULL || OP(scan) != BRANCH)
4391 		{
4392 		    // no more branches, didn't find a match
4393 		    status = RA_NOMATCH;
4394 		    regstack_pop(&scan);
4395 		}
4396 		else
4397 		{
4398 		    // Prepare to try a branch.
4399 		    rp->rs_scan = regnext(scan);
4400 		    reg_save(&rp->rs_un.regsave, &backpos);
4401 		    scan = OPERAND(scan);
4402 		}
4403 	    }
4404 	    break;
4405 
4406 	  case RS_BRCPLX_MORE:
4407 	    // Pop the state.  Restore pointers when there is no match.
4408 	    if (status == RA_NOMATCH)
4409 	    {
4410 		reg_restore(&rp->rs_un.regsave, &backpos);
4411 		--brace_count[rp->rs_no];	// decrement match count
4412 	    }
4413 	    regstack_pop(&scan);
4414 	    break;
4415 
4416 	  case RS_BRCPLX_LONG:
4417 	    // Pop the state.  Restore pointers when there is no match.
4418 	    if (status == RA_NOMATCH)
4419 	    {
4420 		// There was no match, but we did find enough matches.
4421 		reg_restore(&rp->rs_un.regsave, &backpos);
4422 		--brace_count[rp->rs_no];
4423 		// continue with the items after "\{}"
4424 		status = RA_CONT;
4425 	    }
4426 	    regstack_pop(&scan);
4427 	    if (status == RA_CONT)
4428 		scan = regnext(scan);
4429 	    break;
4430 
4431 	  case RS_BRCPLX_SHORT:
4432 	    // Pop the state.  Restore pointers when there is no match.
4433 	    if (status == RA_NOMATCH)
4434 		// There was no match, try to match one more item.
4435 		reg_restore(&rp->rs_un.regsave, &backpos);
4436 	    regstack_pop(&scan);
4437 	    if (status == RA_NOMATCH)
4438 	    {
4439 		scan = OPERAND(scan);
4440 		status = RA_CONT;
4441 	    }
4442 	    break;
4443 
4444 	  case RS_NOMATCH:
4445 	    // Pop the state.  If the operand matches for NOMATCH or
4446 	    // doesn't match for MATCH/SUBPAT, we fail.  Otherwise backup,
4447 	    // except for SUBPAT, and continue with the next item.
4448 	    if (status == (rp->rs_no == NOMATCH ? RA_MATCH : RA_NOMATCH))
4449 		status = RA_NOMATCH;
4450 	    else
4451 	    {
4452 		status = RA_CONT;
4453 		if (rp->rs_no != SUBPAT)	// zero-width
4454 		    reg_restore(&rp->rs_un.regsave, &backpos);
4455 	    }
4456 	    regstack_pop(&scan);
4457 	    if (status == RA_CONT)
4458 		scan = regnext(scan);
4459 	    break;
4460 
4461 	  case RS_BEHIND1:
4462 	    if (status == RA_NOMATCH)
4463 	    {
4464 		regstack_pop(&scan);
4465 		regstack.ga_len -= sizeof(regbehind_T);
4466 	    }
4467 	    else
4468 	    {
4469 		// The stuff after BEHIND/NOBEHIND matches.  Now try if
4470 		// the behind part does (not) match before the current
4471 		// position in the input.  This must be done at every
4472 		// position in the input and checking if the match ends at
4473 		// the current position.
4474 
4475 		// save the position after the found match for next
4476 		reg_save(&(((regbehind_T *)rp) - 1)->save_after, &backpos);
4477 
4478 		// Start looking for a match with operand at the current
4479 		// position.  Go back one character until we find the
4480 		// result, hitting the start of the line or the previous
4481 		// line (for multi-line matching).
4482 		// Set behind_pos to where the match should end, BHPOS
4483 		// will match it.  Save the current value.
4484 		(((regbehind_T *)rp) - 1)->save_behind = behind_pos;
4485 		behind_pos = rp->rs_un.regsave;
4486 
4487 		rp->rs_state = RS_BEHIND2;
4488 
4489 		reg_restore(&rp->rs_un.regsave, &backpos);
4490 		scan = OPERAND(rp->rs_scan) + 4;
4491 	    }
4492 	    break;
4493 
4494 	  case RS_BEHIND2:
4495 	    // Looping for BEHIND / NOBEHIND match.
4496 	    if (status == RA_MATCH && reg_save_equal(&behind_pos))
4497 	    {
4498 		// found a match that ends where "next" started
4499 		behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
4500 		if (rp->rs_no == BEHIND)
4501 		    reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
4502 								    &backpos);
4503 		else
4504 		{
4505 		    // But we didn't want a match.  Need to restore the
4506 		    // subexpr, because what follows matched, so they have
4507 		    // been set.
4508 		    status = RA_NOMATCH;
4509 		    restore_subexpr(((regbehind_T *)rp) - 1);
4510 		}
4511 		regstack_pop(&scan);
4512 		regstack.ga_len -= sizeof(regbehind_T);
4513 	    }
4514 	    else
4515 	    {
4516 		long limit;
4517 
4518 		// No match or a match that doesn't end where we want it: Go
4519 		// back one character.  May go to previous line once.
4520 		no = OK;
4521 		limit = OPERAND_MIN(rp->rs_scan);
4522 		if (REG_MULTI)
4523 		{
4524 		    if (limit > 0
4525 			    && ((rp->rs_un.regsave.rs_u.pos.lnum
4526 						    < behind_pos.rs_u.pos.lnum
4527 				    ? (colnr_T)STRLEN(rex.line)
4528 				    : behind_pos.rs_u.pos.col)
4529 				- rp->rs_un.regsave.rs_u.pos.col >= limit))
4530 			no = FAIL;
4531 		    else if (rp->rs_un.regsave.rs_u.pos.col == 0)
4532 		    {
4533 			if (rp->rs_un.regsave.rs_u.pos.lnum
4534 					< behind_pos.rs_u.pos.lnum
4535 				|| reg_getline(
4536 					--rp->rs_un.regsave.rs_u.pos.lnum)
4537 								  == NULL)
4538 			    no = FAIL;
4539 			else
4540 			{
4541 			    reg_restore(&rp->rs_un.regsave, &backpos);
4542 			    rp->rs_un.regsave.rs_u.pos.col =
4543 						 (colnr_T)STRLEN(rex.line);
4544 			}
4545 		    }
4546 		    else
4547 		    {
4548 			if (has_mbyte)
4549 			{
4550 			    char_u *line =
4551 				  reg_getline(rp->rs_un.regsave.rs_u.pos.lnum);
4552 
4553 			    rp->rs_un.regsave.rs_u.pos.col -=
4554 				(*mb_head_off)(line, line
4555 				    + rp->rs_un.regsave.rs_u.pos.col - 1) + 1;
4556 			}
4557 			else
4558 			    --rp->rs_un.regsave.rs_u.pos.col;
4559 		    }
4560 		}
4561 		else
4562 		{
4563 		    if (rp->rs_un.regsave.rs_u.ptr == rex.line)
4564 			no = FAIL;
4565 		    else
4566 		    {
4567 			MB_PTR_BACK(rex.line, rp->rs_un.regsave.rs_u.ptr);
4568 			if (limit > 0 && (long)(behind_pos.rs_u.ptr
4569 				     - rp->rs_un.regsave.rs_u.ptr) > limit)
4570 			    no = FAIL;
4571 		    }
4572 		}
4573 		if (no == OK)
4574 		{
4575 		    // Advanced, prepare for finding match again.
4576 		    reg_restore(&rp->rs_un.regsave, &backpos);
4577 		    scan = OPERAND(rp->rs_scan) + 4;
4578 		    if (status == RA_MATCH)
4579 		    {
4580 			// We did match, so subexpr may have been changed,
4581 			// need to restore them for the next try.
4582 			status = RA_NOMATCH;
4583 			restore_subexpr(((regbehind_T *)rp) - 1);
4584 		    }
4585 		}
4586 		else
4587 		{
4588 		    // Can't advance.  For NOBEHIND that's a match.
4589 		    behind_pos = (((regbehind_T *)rp) - 1)->save_behind;
4590 		    if (rp->rs_no == NOBEHIND)
4591 		    {
4592 			reg_restore(&(((regbehind_T *)rp) - 1)->save_after,
4593 								    &backpos);
4594 			status = RA_MATCH;
4595 		    }
4596 		    else
4597 		    {
4598 			// We do want a proper match.  Need to restore the
4599 			// subexpr if we had a match, because they may have
4600 			// been set.
4601 			if (status == RA_MATCH)
4602 			{
4603 			    status = RA_NOMATCH;
4604 			    restore_subexpr(((regbehind_T *)rp) - 1);
4605 			}
4606 		    }
4607 		    regstack_pop(&scan);
4608 		    regstack.ga_len -= sizeof(regbehind_T);
4609 		}
4610 	    }
4611 	    break;
4612 
4613 	  case RS_STAR_LONG:
4614 	  case RS_STAR_SHORT:
4615 	    {
4616 		regstar_T	    *rst = ((regstar_T *)rp) - 1;
4617 
4618 		if (status == RA_MATCH)
4619 		{
4620 		    regstack_pop(&scan);
4621 		    regstack.ga_len -= sizeof(regstar_T);
4622 		    break;
4623 		}
4624 
4625 		// Tried once already, restore input pointers.
4626 		if (status != RA_BREAK)
4627 		    reg_restore(&rp->rs_un.regsave, &backpos);
4628 
4629 		// Repeat until we found a position where it could match.
4630 		for (;;)
4631 		{
4632 		    if (status != RA_BREAK)
4633 		    {
4634 			// Tried first position already, advance.
4635 			if (rp->rs_state == RS_STAR_LONG)
4636 			{
4637 			    // Trying for longest match, but couldn't or
4638 			    // didn't match -- back up one char.
4639 			    if (--rst->count < rst->minval)
4640 				break;
4641 			    if (rex.input == rex.line)
4642 			    {
4643 				// backup to last char of previous line
4644 				--rex.lnum;
4645 				rex.line = reg_getline(rex.lnum);
4646 				// Just in case regrepeat() didn't count
4647 				// right.
4648 				if (rex.line == NULL)
4649 				    break;
4650 				rex.input = rex.line + STRLEN(rex.line);
4651 				fast_breakcheck();
4652 			    }
4653 			    else
4654 				MB_PTR_BACK(rex.line, rex.input);
4655 			}
4656 			else
4657 			{
4658 			    // Range is backwards, use shortest match first.
4659 			    // Careful: maxval and minval are exchanged!
4660 			    // Couldn't or didn't match: try advancing one
4661 			    // char.
4662 			    if (rst->count == rst->minval
4663 				  || regrepeat(OPERAND(rp->rs_scan), 1L) == 0)
4664 				break;
4665 			    ++rst->count;
4666 			}
4667 			if (got_int)
4668 			    break;
4669 		    }
4670 		    else
4671 			status = RA_NOMATCH;
4672 
4673 		    // If it could match, try it.
4674 		    if (rst->nextb == NUL || *rex.input == rst->nextb
4675 					     || *rex.input == rst->nextb_ic)
4676 		    {
4677 			reg_save(&rp->rs_un.regsave, &backpos);
4678 			scan = regnext(rp->rs_scan);
4679 			status = RA_CONT;
4680 			break;
4681 		    }
4682 		}
4683 		if (status != RA_CONT)
4684 		{
4685 		    // Failed.
4686 		    regstack_pop(&scan);
4687 		    regstack.ga_len -= sizeof(regstar_T);
4688 		    status = RA_NOMATCH;
4689 		}
4690 	    }
4691 	    break;
4692 	}
4693 
4694 	// If we want to continue the inner loop or didn't pop a state
4695 	// continue matching loop
4696 	if (status == RA_CONT || rp == (regitem_T *)
4697 			     ((char *)regstack.ga_data + regstack.ga_len) - 1)
4698 	    break;
4699     }
4700 
4701     // May need to continue with the inner loop, starting at "scan".
4702     if (status == RA_CONT)
4703 	continue;
4704 
4705     // If the regstack is empty or something failed we are done.
4706     if (regstack.ga_len == 0 || status == RA_FAIL)
4707     {
4708 	if (scan == NULL)
4709 	{
4710 	    // We get here only if there's trouble -- normally "case END" is
4711 	    // the terminating point.
4712 	    iemsg(_(e_corrupted_regexp_program));
4713 #ifdef DEBUG
4714 	    printf("Premature EOL\n");
4715 #endif
4716 	}
4717 	return (status == RA_MATCH);
4718     }
4719 
4720   } // End of loop until the regstack is empty.
4721 
4722   // NOTREACHED
4723 }
4724 
4725 /*
4726  * regtry - try match of "prog" with at rex.line["col"].
4727  * Returns 0 for failure, number of lines contained in the match otherwise.
4728  */
4729     static long
regtry(bt_regprog_T * prog,colnr_T col,proftime_T * tm,int * timed_out)4730 regtry(
4731     bt_regprog_T	*prog,
4732     colnr_T		col,
4733     proftime_T		*tm,		// timeout limit or NULL
4734     int			*timed_out)	// flag set on timeout or NULL
4735 {
4736     rex.input = rex.line + col;
4737     rex.need_clear_subexpr = TRUE;
4738 #ifdef FEAT_SYN_HL
4739     // Clear the external match subpointers if necessary.
4740     rex.need_clear_zsubexpr = (prog->reghasz == REX_SET);
4741 #endif
4742 
4743     if (regmatch(prog->program + 1, tm, timed_out) == 0)
4744 	return 0;
4745 
4746     cleanup_subexpr();
4747     if (REG_MULTI)
4748     {
4749 	if (rex.reg_startpos[0].lnum < 0)
4750 	{
4751 	    rex.reg_startpos[0].lnum = 0;
4752 	    rex.reg_startpos[0].col = col;
4753 	}
4754 	if (rex.reg_endpos[0].lnum < 0)
4755 	{
4756 	    rex.reg_endpos[0].lnum = rex.lnum;
4757 	    rex.reg_endpos[0].col = (int)(rex.input - rex.line);
4758 	}
4759 	else
4760 	    // Use line number of "\ze".
4761 	    rex.lnum = rex.reg_endpos[0].lnum;
4762     }
4763     else
4764     {
4765 	if (rex.reg_startp[0] == NULL)
4766 	    rex.reg_startp[0] = rex.line + col;
4767 	if (rex.reg_endp[0] == NULL)
4768 	    rex.reg_endp[0] = rex.input;
4769     }
4770 #ifdef FEAT_SYN_HL
4771     // Package any found \z(...\) matches for export. Default is none.
4772     unref_extmatch(re_extmatch_out);
4773     re_extmatch_out = NULL;
4774 
4775     if (prog->reghasz == REX_SET)
4776     {
4777 	int		i;
4778 
4779 	cleanup_zsubexpr();
4780 	re_extmatch_out = make_extmatch();
4781 	if (re_extmatch_out == NULL)
4782 	    return 0;
4783 	for (i = 0; i < NSUBEXP; i++)
4784 	{
4785 	    if (REG_MULTI)
4786 	    {
4787 		// Only accept single line matches.
4788 		if (reg_startzpos[i].lnum >= 0
4789 			&& reg_endzpos[i].lnum == reg_startzpos[i].lnum
4790 			&& reg_endzpos[i].col >= reg_startzpos[i].col)
4791 		    re_extmatch_out->matches[i] =
4792 			vim_strnsave(reg_getline(reg_startzpos[i].lnum)
4793 						       + reg_startzpos[i].col,
4794 				   reg_endzpos[i].col - reg_startzpos[i].col);
4795 	    }
4796 	    else
4797 	    {
4798 		if (reg_startzp[i] != NULL && reg_endzp[i] != NULL)
4799 		    re_extmatch_out->matches[i] =
4800 			    vim_strnsave(reg_startzp[i],
4801 						reg_endzp[i] - reg_startzp[i]);
4802 	    }
4803 	}
4804     }
4805 #endif
4806     return 1 + rex.lnum;
4807 }
4808 
4809 /*
4810  * Match a regexp against a string ("line" points to the string) or multiple
4811  * lines (if "line" is NULL, use reg_getline()).
4812  * Returns 0 for failure, number of lines contained in the match otherwise.
4813  */
4814     static long
bt_regexec_both(char_u * line,colnr_T col,proftime_T * tm,int * timed_out)4815 bt_regexec_both(
4816     char_u	*line,
4817     colnr_T	col,		// column to start looking for match
4818     proftime_T	*tm,		// timeout limit or NULL
4819     int		*timed_out)	// flag set on timeout or NULL
4820 {
4821     bt_regprog_T    *prog;
4822     char_u	    *s;
4823     long	    retval = 0L;
4824 
4825     // Create "regstack" and "backpos" if they are not allocated yet.
4826     // We allocate *_INITIAL amount of bytes first and then set the grow size
4827     // to much bigger value to avoid many malloc calls in case of deep regular
4828     // expressions.
4829     if (regstack.ga_data == NULL)
4830     {
4831 	// Use an item size of 1 byte, since we push different things
4832 	// onto the regstack.
4833 	ga_init2(&regstack, 1, REGSTACK_INITIAL);
4834 	(void)ga_grow(&regstack, REGSTACK_INITIAL);
4835 	regstack.ga_growsize = REGSTACK_INITIAL * 8;
4836     }
4837 
4838     if (backpos.ga_data == NULL)
4839     {
4840 	ga_init2(&backpos, sizeof(backpos_T), BACKPOS_INITIAL);
4841 	(void)ga_grow(&backpos, BACKPOS_INITIAL);
4842 	backpos.ga_growsize = BACKPOS_INITIAL * 8;
4843     }
4844 
4845     if (REG_MULTI)
4846     {
4847 	prog = (bt_regprog_T *)rex.reg_mmatch->regprog;
4848 	line = reg_getline((linenr_T)0);
4849 	rex.reg_startpos = rex.reg_mmatch->startpos;
4850 	rex.reg_endpos = rex.reg_mmatch->endpos;
4851     }
4852     else
4853     {
4854 	prog = (bt_regprog_T *)rex.reg_match->regprog;
4855 	rex.reg_startp = rex.reg_match->startp;
4856 	rex.reg_endp = rex.reg_match->endp;
4857     }
4858 
4859     // Be paranoid...
4860     if (prog == NULL || line == NULL)
4861     {
4862 	iemsg(_(e_null_argument));
4863 	goto theend;
4864     }
4865 
4866     // Check validity of program.
4867     if (prog_magic_wrong())
4868 	goto theend;
4869 
4870     // If the start column is past the maximum column: no need to try.
4871     if (rex.reg_maxcol > 0 && col >= rex.reg_maxcol)
4872 	goto theend;
4873 
4874     // If pattern contains "\c" or "\C": overrule value of rex.reg_ic
4875     if (prog->regflags & RF_ICASE)
4876 	rex.reg_ic = TRUE;
4877     else if (prog->regflags & RF_NOICASE)
4878 	rex.reg_ic = FALSE;
4879 
4880     // If pattern contains "\Z" overrule value of rex.reg_icombine
4881     if (prog->regflags & RF_ICOMBINE)
4882 	rex.reg_icombine = TRUE;
4883 
4884     // If there is a "must appear" string, look for it.
4885     if (prog->regmust != NULL)
4886     {
4887 	int c;
4888 
4889 	if (has_mbyte)
4890 	    c = (*mb_ptr2char)(prog->regmust);
4891 	else
4892 	    c = *prog->regmust;
4893 	s = line + col;
4894 
4895 	// This is used very often, esp. for ":global".  Use three versions of
4896 	// the loop to avoid overhead of conditions.
4897 	if (!rex.reg_ic && !has_mbyte)
4898 	    while ((s = vim_strbyte(s, c)) != NULL)
4899 	    {
4900 		if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
4901 		    break;		// Found it.
4902 		++s;
4903 	    }
4904 	else if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1))
4905 	    while ((s = vim_strchr(s, c)) != NULL)
4906 	    {
4907 		if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
4908 		    break;		// Found it.
4909 		MB_PTR_ADV(s);
4910 	    }
4911 	else
4912 	    while ((s = cstrchr(s, c)) != NULL)
4913 	    {
4914 		if (cstrncmp(s, prog->regmust, &prog->regmlen) == 0)
4915 		    break;		// Found it.
4916 		MB_PTR_ADV(s);
4917 	    }
4918 	if (s == NULL)		// Not present.
4919 	    goto theend;
4920     }
4921 
4922     rex.line = line;
4923     rex.lnum = 0;
4924     reg_toolong = FALSE;
4925 
4926     // Simplest case: Anchored match need be tried only once.
4927     if (prog->reganch)
4928     {
4929 	int	c;
4930 
4931 	if (has_mbyte)
4932 	    c = (*mb_ptr2char)(rex.line + col);
4933 	else
4934 	    c = rex.line[col];
4935 	if (prog->regstart == NUL
4936 		|| prog->regstart == c
4937 		|| (rex.reg_ic
4938 		    && (((enc_utf8 && utf_fold(prog->regstart) == utf_fold(c)))
4939 			|| (c < 255 && prog->regstart < 255 &&
4940 			    MB_TOLOWER(prog->regstart) == MB_TOLOWER(c)))))
4941 	    retval = regtry(prog, col, tm, timed_out);
4942 	else
4943 	    retval = 0;
4944     }
4945     else
4946     {
4947 #ifdef FEAT_RELTIME
4948 	int tm_count = 0;
4949 #endif
4950 	// Messy cases:  unanchored match.
4951 	while (!got_int)
4952 	{
4953 	    if (prog->regstart != NUL)
4954 	    {
4955 		// Skip until the char we know it must start with.
4956 		// Used often, do some work to avoid call overhead.
4957 		if (!rex.reg_ic && !has_mbyte)
4958 		    s = vim_strbyte(rex.line + col, prog->regstart);
4959 		else
4960 		    s = cstrchr(rex.line + col, prog->regstart);
4961 		if (s == NULL)
4962 		{
4963 		    retval = 0;
4964 		    break;
4965 		}
4966 		col = (int)(s - rex.line);
4967 	    }
4968 
4969 	    // Check for maximum column to try.
4970 	    if (rex.reg_maxcol > 0 && col >= rex.reg_maxcol)
4971 	    {
4972 		retval = 0;
4973 		break;
4974 	    }
4975 
4976 	    retval = regtry(prog, col, tm, timed_out);
4977 	    if (retval > 0)
4978 		break;
4979 
4980 	    // if not currently on the first line, get it again
4981 	    if (rex.lnum != 0)
4982 	    {
4983 		rex.lnum = 0;
4984 		rex.line = reg_getline((linenr_T)0);
4985 	    }
4986 	    if (rex.line[col] == NUL)
4987 		break;
4988 	    if (has_mbyte)
4989 		col += (*mb_ptr2len)(rex.line + col);
4990 	    else
4991 		++col;
4992 #ifdef FEAT_RELTIME
4993 	    // Check for timeout once in a twenty times to avoid overhead.
4994 	    if (tm != NULL && ++tm_count == 20)
4995 	    {
4996 		tm_count = 0;
4997 		if (profile_passed_limit(tm))
4998 		{
4999 		    if (timed_out != NULL)
5000 			*timed_out = TRUE;
5001 		    break;
5002 		}
5003 	    }
5004 #endif
5005 	}
5006     }
5007 
5008 theend:
5009     // Free "reg_tofree" when it's a bit big.
5010     // Free regstack and backpos if they are bigger than their initial size.
5011     if (reg_tofreelen > 400)
5012 	VIM_CLEAR(reg_tofree);
5013     if (regstack.ga_maxlen > REGSTACK_INITIAL)
5014 	ga_clear(&regstack);
5015     if (backpos.ga_maxlen > BACKPOS_INITIAL)
5016 	ga_clear(&backpos);
5017 
5018     if (retval > 0)
5019     {
5020 	// Make sure the end is never before the start.  Can happen when \zs
5021 	// and \ze are used.
5022 	if (REG_MULTI)
5023 	{
5024 	    lpos_T *start = &rex.reg_mmatch->startpos[0];
5025 	    lpos_T *end = &rex.reg_mmatch->endpos[0];
5026 
5027 	    if (end->lnum < start->lnum
5028 			|| (end->lnum == start->lnum && end->col < start->col))
5029 		rex.reg_mmatch->endpos[0] = rex.reg_mmatch->startpos[0];
5030 	}
5031 	else
5032 	{
5033 	    if (rex.reg_match->endp[0] < rex.reg_match->startp[0])
5034 		rex.reg_match->endp[0] = rex.reg_match->startp[0];
5035 	}
5036     }
5037 
5038     return retval;
5039 }
5040 
5041 /*
5042  * Match a regexp against a string.
5043  * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
5044  * Uses curbuf for line count and 'iskeyword'.
5045  * if "line_lbr" is TRUE  consider a "\n" in "line" to be a line break.
5046  *
5047  * Returns 0 for failure, number of lines contained in the match otherwise.
5048  */
5049     static int
bt_regexec_nl(regmatch_T * rmp,char_u * line,colnr_T col,int line_lbr)5050 bt_regexec_nl(
5051     regmatch_T	*rmp,
5052     char_u	*line,	// string to match against
5053     colnr_T	col,	// column to start looking for match
5054     int		line_lbr)
5055 {
5056     rex.reg_match = rmp;
5057     rex.reg_mmatch = NULL;
5058     rex.reg_maxline = 0;
5059     rex.reg_line_lbr = line_lbr;
5060     rex.reg_buf = curbuf;
5061     rex.reg_win = NULL;
5062     rex.reg_ic = rmp->rm_ic;
5063     rex.reg_icombine = FALSE;
5064     rex.reg_maxcol = 0;
5065 
5066     return bt_regexec_both(line, col, NULL, NULL);
5067 }
5068 
5069 /*
5070  * Match a regexp against multiple lines.
5071  * "rmp->regprog" is a compiled regexp as returned by vim_regcomp().
5072  * Uses curbuf for line count and 'iskeyword'.
5073  *
5074  * Return zero if there is no match.  Return number of lines contained in the
5075  * match otherwise.
5076  */
5077     static long
bt_regexec_multi(regmmatch_T * rmp,win_T * win,buf_T * buf,linenr_T lnum,colnr_T col,proftime_T * tm,int * timed_out)5078 bt_regexec_multi(
5079     regmmatch_T	*rmp,
5080     win_T	*win,		// window in which to search or NULL
5081     buf_T	*buf,		// buffer in which to search
5082     linenr_T	lnum,		// nr of line to start looking for match
5083     colnr_T	col,		// column to start looking for match
5084     proftime_T	*tm,		// timeout limit or NULL
5085     int		*timed_out)	// flag set on timeout or NULL
5086 {
5087     init_regexec_multi(rmp, win, buf, lnum);
5088     return bt_regexec_both(NULL, col, tm, timed_out);
5089 }
5090 
5091 /*
5092  * Compare a number with the operand of RE_LNUM, RE_COL or RE_VCOL.
5093  */
5094     static int
re_num_cmp(long_u val,char_u * scan)5095 re_num_cmp(long_u val, char_u *scan)
5096 {
5097     long_u  n = OPERAND_MIN(scan);
5098 
5099     if (OPERAND_CMP(scan) == '>')
5100 	return val > n;
5101     if (OPERAND_CMP(scan) == '<')
5102 	return val < n;
5103     return val == n;
5104 }
5105 
5106 #ifdef BT_REGEXP_DUMP
5107 
5108 /*
5109  * regdump - dump a regexp onto stdout in vaguely comprehensible form
5110  */
5111     static void
regdump(char_u * pattern,bt_regprog_T * r)5112 regdump(char_u *pattern, bt_regprog_T *r)
5113 {
5114     char_u  *s;
5115     int	    op = EXACTLY;	// Arbitrary non-END op.
5116     char_u  *next;
5117     char_u  *end = NULL;
5118     FILE    *f;
5119 
5120 #ifdef BT_REGEXP_LOG
5121     f = fopen("bt_regexp_log.log", "a");
5122 #else
5123     f = stdout;
5124 #endif
5125     if (f == NULL)
5126 	return;
5127     fprintf(f, "-------------------------------------\n\r\nregcomp(%s):\r\n", pattern);
5128 
5129     s = r->program + 1;
5130     // Loop until we find the END that isn't before a referred next (an END
5131     // can also appear in a NOMATCH operand).
5132     while (op != END || s <= end)
5133     {
5134 	op = OP(s);
5135 	fprintf(f, "%2d%s", (int)(s - r->program), regprop(s)); // Where, what.
5136 	next = regnext(s);
5137 	if (next == NULL)	// Next ptr.
5138 	    fprintf(f, "(0)");
5139 	else
5140 	    fprintf(f, "(%d)", (int)((s - r->program) + (next - s)));
5141 	if (end < next)
5142 	    end = next;
5143 	if (op == BRACE_LIMITS)
5144 	{
5145 	    // Two ints
5146 	    fprintf(f, " minval %ld, maxval %ld", OPERAND_MIN(s), OPERAND_MAX(s));
5147 	    s += 8;
5148 	}
5149 	else if (op == BEHIND || op == NOBEHIND)
5150 	{
5151 	    // one int
5152 	    fprintf(f, " count %ld", OPERAND_MIN(s));
5153 	    s += 4;
5154 	}
5155 	else if (op == RE_LNUM || op == RE_COL || op == RE_VCOL)
5156 	{
5157 	    // one int plus comparator
5158 	    fprintf(f, " count %ld", OPERAND_MIN(s));
5159 	    s += 5;
5160 	}
5161 	s += 3;
5162 	if (op == ANYOF || op == ANYOF + ADD_NL
5163 		|| op == ANYBUT || op == ANYBUT + ADD_NL
5164 		|| op == EXACTLY)
5165 	{
5166 	    // Literal string, where present.
5167 	    fprintf(f, "\nxxxxxxxxx\n");
5168 	    while (*s != NUL)
5169 		fprintf(f, "%c", *s++);
5170 	    fprintf(f, "\nxxxxxxxxx\n");
5171 	    s++;
5172 	}
5173 	fprintf(f, "\r\n");
5174     }
5175 
5176     // Header fields of interest.
5177     if (r->regstart != NUL)
5178 	fprintf(f, "start `%s' 0x%x; ", r->regstart < 256
5179 		? (char *)transchar(r->regstart)
5180 		: "multibyte", r->regstart);
5181     if (r->reganch)
5182 	fprintf(f, "anchored; ");
5183     if (r->regmust != NULL)
5184 	fprintf(f, "must have \"%s\"", r->regmust);
5185     fprintf(f, "\r\n");
5186 
5187 #ifdef BT_REGEXP_LOG
5188     fclose(f);
5189 #endif
5190 }
5191 #endif	    // BT_REGEXP_DUMP
5192 
5193 #ifdef DEBUG
5194 /*
5195  * regprop - printable representation of opcode
5196  */
5197     static char_u *
regprop(char_u * op)5198 regprop(char_u *op)
5199 {
5200     char	    *p;
5201     static char	    buf[50];
5202 
5203     STRCPY(buf, ":");
5204 
5205     switch ((int) OP(op))
5206     {
5207       case BOL:
5208 	p = "BOL";
5209 	break;
5210       case EOL:
5211 	p = "EOL";
5212 	break;
5213       case RE_BOF:
5214 	p = "BOF";
5215 	break;
5216       case RE_EOF:
5217 	p = "EOF";
5218 	break;
5219       case CURSOR:
5220 	p = "CURSOR";
5221 	break;
5222       case RE_VISUAL:
5223 	p = "RE_VISUAL";
5224 	break;
5225       case RE_LNUM:
5226 	p = "RE_LNUM";
5227 	break;
5228       case RE_MARK:
5229 	p = "RE_MARK";
5230 	break;
5231       case RE_COL:
5232 	p = "RE_COL";
5233 	break;
5234       case RE_VCOL:
5235 	p = "RE_VCOL";
5236 	break;
5237       case BOW:
5238 	p = "BOW";
5239 	break;
5240       case EOW:
5241 	p = "EOW";
5242 	break;
5243       case ANY:
5244 	p = "ANY";
5245 	break;
5246       case ANY + ADD_NL:
5247 	p = "ANY+NL";
5248 	break;
5249       case ANYOF:
5250 	p = "ANYOF";
5251 	break;
5252       case ANYOF + ADD_NL:
5253 	p = "ANYOF+NL";
5254 	break;
5255       case ANYBUT:
5256 	p = "ANYBUT";
5257 	break;
5258       case ANYBUT + ADD_NL:
5259 	p = "ANYBUT+NL";
5260 	break;
5261       case IDENT:
5262 	p = "IDENT";
5263 	break;
5264       case IDENT + ADD_NL:
5265 	p = "IDENT+NL";
5266 	break;
5267       case SIDENT:
5268 	p = "SIDENT";
5269 	break;
5270       case SIDENT + ADD_NL:
5271 	p = "SIDENT+NL";
5272 	break;
5273       case KWORD:
5274 	p = "KWORD";
5275 	break;
5276       case KWORD + ADD_NL:
5277 	p = "KWORD+NL";
5278 	break;
5279       case SKWORD:
5280 	p = "SKWORD";
5281 	break;
5282       case SKWORD + ADD_NL:
5283 	p = "SKWORD+NL";
5284 	break;
5285       case FNAME:
5286 	p = "FNAME";
5287 	break;
5288       case FNAME + ADD_NL:
5289 	p = "FNAME+NL";
5290 	break;
5291       case SFNAME:
5292 	p = "SFNAME";
5293 	break;
5294       case SFNAME + ADD_NL:
5295 	p = "SFNAME+NL";
5296 	break;
5297       case PRINT:
5298 	p = "PRINT";
5299 	break;
5300       case PRINT + ADD_NL:
5301 	p = "PRINT+NL";
5302 	break;
5303       case SPRINT:
5304 	p = "SPRINT";
5305 	break;
5306       case SPRINT + ADD_NL:
5307 	p = "SPRINT+NL";
5308 	break;
5309       case WHITE:
5310 	p = "WHITE";
5311 	break;
5312       case WHITE + ADD_NL:
5313 	p = "WHITE+NL";
5314 	break;
5315       case NWHITE:
5316 	p = "NWHITE";
5317 	break;
5318       case NWHITE + ADD_NL:
5319 	p = "NWHITE+NL";
5320 	break;
5321       case DIGIT:
5322 	p = "DIGIT";
5323 	break;
5324       case DIGIT + ADD_NL:
5325 	p = "DIGIT+NL";
5326 	break;
5327       case NDIGIT:
5328 	p = "NDIGIT";
5329 	break;
5330       case NDIGIT + ADD_NL:
5331 	p = "NDIGIT+NL";
5332 	break;
5333       case HEX:
5334 	p = "HEX";
5335 	break;
5336       case HEX + ADD_NL:
5337 	p = "HEX+NL";
5338 	break;
5339       case NHEX:
5340 	p = "NHEX";
5341 	break;
5342       case NHEX + ADD_NL:
5343 	p = "NHEX+NL";
5344 	break;
5345       case OCTAL:
5346 	p = "OCTAL";
5347 	break;
5348       case OCTAL + ADD_NL:
5349 	p = "OCTAL+NL";
5350 	break;
5351       case NOCTAL:
5352 	p = "NOCTAL";
5353 	break;
5354       case NOCTAL + ADD_NL:
5355 	p = "NOCTAL+NL";
5356 	break;
5357       case WORD:
5358 	p = "WORD";
5359 	break;
5360       case WORD + ADD_NL:
5361 	p = "WORD+NL";
5362 	break;
5363       case NWORD:
5364 	p = "NWORD";
5365 	break;
5366       case NWORD + ADD_NL:
5367 	p = "NWORD+NL";
5368 	break;
5369       case HEAD:
5370 	p = "HEAD";
5371 	break;
5372       case HEAD + ADD_NL:
5373 	p = "HEAD+NL";
5374 	break;
5375       case NHEAD:
5376 	p = "NHEAD";
5377 	break;
5378       case NHEAD + ADD_NL:
5379 	p = "NHEAD+NL";
5380 	break;
5381       case ALPHA:
5382 	p = "ALPHA";
5383 	break;
5384       case ALPHA + ADD_NL:
5385 	p = "ALPHA+NL";
5386 	break;
5387       case NALPHA:
5388 	p = "NALPHA";
5389 	break;
5390       case NALPHA + ADD_NL:
5391 	p = "NALPHA+NL";
5392 	break;
5393       case LOWER:
5394 	p = "LOWER";
5395 	break;
5396       case LOWER + ADD_NL:
5397 	p = "LOWER+NL";
5398 	break;
5399       case NLOWER:
5400 	p = "NLOWER";
5401 	break;
5402       case NLOWER + ADD_NL:
5403 	p = "NLOWER+NL";
5404 	break;
5405       case UPPER:
5406 	p = "UPPER";
5407 	break;
5408       case UPPER + ADD_NL:
5409 	p = "UPPER+NL";
5410 	break;
5411       case NUPPER:
5412 	p = "NUPPER";
5413 	break;
5414       case NUPPER + ADD_NL:
5415 	p = "NUPPER+NL";
5416 	break;
5417       case BRANCH:
5418 	p = "BRANCH";
5419 	break;
5420       case EXACTLY:
5421 	p = "EXACTLY";
5422 	break;
5423       case NOTHING:
5424 	p = "NOTHING";
5425 	break;
5426       case BACK:
5427 	p = "BACK";
5428 	break;
5429       case END:
5430 	p = "END";
5431 	break;
5432       case MOPEN + 0:
5433 	p = "MATCH START";
5434 	break;
5435       case MOPEN + 1:
5436       case MOPEN + 2:
5437       case MOPEN + 3:
5438       case MOPEN + 4:
5439       case MOPEN + 5:
5440       case MOPEN + 6:
5441       case MOPEN + 7:
5442       case MOPEN + 8:
5443       case MOPEN + 9:
5444 	sprintf(buf + STRLEN(buf), "MOPEN%d", OP(op) - MOPEN);
5445 	p = NULL;
5446 	break;
5447       case MCLOSE + 0:
5448 	p = "MATCH END";
5449 	break;
5450       case MCLOSE + 1:
5451       case MCLOSE + 2:
5452       case MCLOSE + 3:
5453       case MCLOSE + 4:
5454       case MCLOSE + 5:
5455       case MCLOSE + 6:
5456       case MCLOSE + 7:
5457       case MCLOSE + 8:
5458       case MCLOSE + 9:
5459 	sprintf(buf + STRLEN(buf), "MCLOSE%d", OP(op) - MCLOSE);
5460 	p = NULL;
5461 	break;
5462       case BACKREF + 1:
5463       case BACKREF + 2:
5464       case BACKREF + 3:
5465       case BACKREF + 4:
5466       case BACKREF + 5:
5467       case BACKREF + 6:
5468       case BACKREF + 7:
5469       case BACKREF + 8:
5470       case BACKREF + 9:
5471 	sprintf(buf + STRLEN(buf), "BACKREF%d", OP(op) - BACKREF);
5472 	p = NULL;
5473 	break;
5474       case NOPEN:
5475 	p = "NOPEN";
5476 	break;
5477       case NCLOSE:
5478 	p = "NCLOSE";
5479 	break;
5480 #ifdef FEAT_SYN_HL
5481       case ZOPEN + 1:
5482       case ZOPEN + 2:
5483       case ZOPEN + 3:
5484       case ZOPEN + 4:
5485       case ZOPEN + 5:
5486       case ZOPEN + 6:
5487       case ZOPEN + 7:
5488       case ZOPEN + 8:
5489       case ZOPEN + 9:
5490 	sprintf(buf + STRLEN(buf), "ZOPEN%d", OP(op) - ZOPEN);
5491 	p = NULL;
5492 	break;
5493       case ZCLOSE + 1:
5494       case ZCLOSE + 2:
5495       case ZCLOSE + 3:
5496       case ZCLOSE + 4:
5497       case ZCLOSE + 5:
5498       case ZCLOSE + 6:
5499       case ZCLOSE + 7:
5500       case ZCLOSE + 8:
5501       case ZCLOSE + 9:
5502 	sprintf(buf + STRLEN(buf), "ZCLOSE%d", OP(op) - ZCLOSE);
5503 	p = NULL;
5504 	break;
5505       case ZREF + 1:
5506       case ZREF + 2:
5507       case ZREF + 3:
5508       case ZREF + 4:
5509       case ZREF + 5:
5510       case ZREF + 6:
5511       case ZREF + 7:
5512       case ZREF + 8:
5513       case ZREF + 9:
5514 	sprintf(buf + STRLEN(buf), "ZREF%d", OP(op) - ZREF);
5515 	p = NULL;
5516 	break;
5517 #endif
5518       case STAR:
5519 	p = "STAR";
5520 	break;
5521       case PLUS:
5522 	p = "PLUS";
5523 	break;
5524       case NOMATCH:
5525 	p = "NOMATCH";
5526 	break;
5527       case MATCH:
5528 	p = "MATCH";
5529 	break;
5530       case BEHIND:
5531 	p = "BEHIND";
5532 	break;
5533       case NOBEHIND:
5534 	p = "NOBEHIND";
5535 	break;
5536       case SUBPAT:
5537 	p = "SUBPAT";
5538 	break;
5539       case BRACE_LIMITS:
5540 	p = "BRACE_LIMITS";
5541 	break;
5542       case BRACE_SIMPLE:
5543 	p = "BRACE_SIMPLE";
5544 	break;
5545       case BRACE_COMPLEX + 0:
5546       case BRACE_COMPLEX + 1:
5547       case BRACE_COMPLEX + 2:
5548       case BRACE_COMPLEX + 3:
5549       case BRACE_COMPLEX + 4:
5550       case BRACE_COMPLEX + 5:
5551       case BRACE_COMPLEX + 6:
5552       case BRACE_COMPLEX + 7:
5553       case BRACE_COMPLEX + 8:
5554       case BRACE_COMPLEX + 9:
5555 	sprintf(buf + STRLEN(buf), "BRACE_COMPLEX%d", OP(op) - BRACE_COMPLEX);
5556 	p = NULL;
5557 	break;
5558       case MULTIBYTECODE:
5559 	p = "MULTIBYTECODE";
5560 	break;
5561       case NEWL:
5562 	p = "NEWL";
5563 	break;
5564       default:
5565 	sprintf(buf + STRLEN(buf), "corrupt %d", OP(op));
5566 	p = NULL;
5567 	break;
5568     }
5569     if (p != NULL)
5570 	STRCAT(buf, p);
5571     return (char_u *)buf;
5572 }
5573 #endif	    // DEBUG
5574