xref: /vim-8.2.3635/src/regexp.h (revision 2bf24176)
1 /* vi:set ts=8 sts=4 sw=4:
2  *
3  * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
4  *
5  * This is NOT the original regular expression code as written by Henry
6  * Spencer.  This code has been modified specifically for use with Vim, and
7  * should not be used apart from compiling Vim.  If you want a good regular
8  * expression library, get the original code.
9  *
10  * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE
11  */
12 
13 #ifndef _REGEXP_H
14 #define _REGEXP_H
15 
16 /*
17  * The number of sub-matches is limited to 10.
18  * The first one (index 0) is the whole match, referenced with "\0".
19  * The second one (index 1) is the first sub-match, referenced with "\1".
20  * This goes up to the tenth (index 9), referenced with "\9".
21  */
22 #define NSUBEXP  10
23 
24 /*
25  * In the NFA engine: how many braces are allowed.
26  * TODO(RE): Use dynamic memory allocation instead of static, like here
27  */
28 #define NFA_MAX_BRACES 20
29 
30 /*
31  * In the NFA engine: how many states are allowed
32  */
33 #define NFA_MAX_STATES 100000
34 #define NFA_TOO_EXPENSIVE -1
35 
36 /* Which regexp engine to use? Needed for vim_regcomp().
37  * Must match with 'regexpengine'. */
38 #define	    AUTOMATIC_ENGINE	0
39 #define	    BACKTRACKING_ENGINE	1
40 #define	    NFA_ENGINE		2
41 
42 typedef struct regengine regengine_T;
43 
44 /*
45  * Structure returned by vim_regcomp() to pass on to vim_regexec().
46  * This is the general structure. For the actual matcher, two specific
47  * structures are used. See code below.
48  */
49 typedef struct regprog
50 {
51     regengine_T		*engine;
52     unsigned		regflags;
53     unsigned		re_engine;   /* automatic, backtracking or nfa engine */
54     unsigned		re_flags;    /* second argument for vim_regcomp() */
55 } regprog_T;
56 
57 /*
58  * Structure used by the back track matcher.
59  * These fields are only to be used in regexp.c!
60  * See regexp.c for an explanation.
61  */
62 typedef struct
63 {
64     /* These four members implement regprog_T */
65     regengine_T		*engine;
66     unsigned		regflags;
67     unsigned		re_engine;
68     unsigned		re_flags;    /* second argument for vim_regcomp() */
69 
70     int			regstart;
71     char_u		reganch;
72     char_u		*regmust;
73     int			regmlen;
74 #ifdef FEAT_SYN_HL
75     char_u		reghasz;
76 #endif
77     char_u		program[1];	/* actually longer.. */
78 } bt_regprog_T;
79 
80 /*
81  * Structure representing a NFA state.
82  * A NFA state may have no outgoing edge, when it is a NFA_MATCH state.
83  */
84 typedef struct nfa_state nfa_state_T;
85 struct nfa_state
86 {
87     int			c;
88     nfa_state_T		*out;
89     nfa_state_T		*out1;
90     int			id;
91     int			lastlist[2]; /* 0: normal, 1: recursive */
92     int			val;
93 };
94 
95 /*
96  * Structure used by the NFA matcher.
97  */
98 typedef struct
99 {
100     /* These three members implement regprog_T */
101     regengine_T		*engine;
102     unsigned		regflags;
103     unsigned		re_engine;
104     unsigned		re_flags;    /* second argument for vim_regcomp() */
105 
106     nfa_state_T		*start;		/* points into state[] */
107 
108     int			reganch;	/* pattern starts with ^ */
109     int			regstart;	/* char at start of pattern */
110     char_u		*match_text;	/* plain text to match with */
111 
112     int			has_zend;	/* pattern contains \ze */
113     int			has_backref;	/* pattern contains \1 .. \9 */
114 #ifdef FEAT_SYN_HL
115     int			reghasz;
116 #endif
117     char_u		*pattern;
118     int			nsubexp;	/* number of () */
119     int			nstate;
120     nfa_state_T		state[1];	/* actually longer.. */
121 } nfa_regprog_T;
122 
123 /*
124  * Structure to be used for single-line matching.
125  * Sub-match "no" starts at "startp[no]" and ends just before "endp[no]".
126  * When there is no match, the pointer is NULL.
127  */
128 typedef struct
129 {
130     regprog_T		*regprog;
131     char_u		*startp[NSUBEXP];
132     char_u		*endp[NSUBEXP];
133     int			rm_ic;
134 } regmatch_T;
135 
136 /*
137  * Structure to be used for multi-line matching.
138  * Sub-match "no" starts in line "startpos[no].lnum" column "startpos[no].col"
139  * and ends in line "endpos[no].lnum" just before column "endpos[no].col".
140  * The line numbers are relative to the first line, thus startpos[0].lnum is
141  * always 0.
142  * When there is no match, the line number is -1.
143  */
144 typedef struct
145 {
146     regprog_T		*regprog;
147     lpos_T		startpos[NSUBEXP];
148     lpos_T		endpos[NSUBEXP];
149     int			rmm_ic;
150     colnr_T		rmm_maxcol;	/* when not zero: maximum column */
151 } regmmatch_T;
152 
153 /*
154  * Structure used to store external references: "\z\(\)" to "\z\1".
155  * Use a reference count to avoid the need to copy this around.  When it goes
156  * from 1 to zero the matches need to be freed.
157  */
158 typedef struct
159 {
160     short		refcnt;
161     char_u		*matches[NSUBEXP];
162 } reg_extmatch_T;
163 
164 struct regengine
165 {
166     regprog_T	*(*regcomp)(char_u*, int);
167     void	(*regfree)(regprog_T *);
168     int		(*regexec_nl)(regmatch_T*, char_u*, colnr_T, int);
169     long	(*regexec_multi)(regmmatch_T*, win_T*, buf_T*, linenr_T, colnr_T, proftime_T*);
170     char_u	*expr;
171 };
172 
173 #endif	/* _REGEXP_H */
174