1 /* vi:set ts=8 sts=4 sw=4: 2 * 3 * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE 4 * 5 * This is NOT the original regular expression code as written by Henry 6 * Spencer. This code has been modified specifically for use with Vim, and 7 * should not be used apart from compiling Vim. If you want a good regular 8 * expression library, get the original code. 9 * 10 * NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE 11 */ 12 13 #ifndef _REGEXP_H 14 #define _REGEXP_H 15 16 /* 17 * The number of sub-matches is limited to 10. 18 * The first one (index 0) is the whole match, referenced with "\0". 19 * The second one (index 1) is the first sub-match, referenced with "\1". 20 * This goes up to the tenth (index 9), referenced with "\9". 21 */ 22 #define NSUBEXP 10 23 24 /* 25 * In the NFA engine: how many braces are allowed. 26 * TODO(RE): Use dynamic memory allocation instead of static, like here 27 */ 28 #define NFA_MAX_BRACES 20 29 30 /* 31 * In the NFA engine: how many states are allowed 32 */ 33 #define NFA_MAX_STATES 100000 34 #define NFA_TOO_EXPENSIVE -1 35 36 /* Which regexp engine to use? Needed for vim_regcomp(). 37 * Must match with 'regexpengine'. */ 38 #define AUTOMATIC_ENGINE 0 39 #define BACKTRACKING_ENGINE 1 40 #define NFA_ENGINE 2 41 42 typedef struct regengine regengine_T; 43 44 /* 45 * Structure returned by vim_regcomp() to pass on to vim_regexec(). 46 * This is the general structure. For the actual matcher, two specific 47 * structures are used. See code below. 48 */ 49 typedef struct regprog 50 { 51 regengine_T *engine; 52 unsigned regflags; 53 unsigned re_engine; /* automatic, backtracking or nfa engine */ 54 unsigned re_flags; /* second argument for vim_regcomp() */ 55 } regprog_T; 56 57 /* 58 * Structure used by the back track matcher. 59 * These fields are only to be used in regexp.c! 60 * See regexp.c for an explanation. 61 */ 62 typedef struct 63 { 64 /* These four members implement regprog_T */ 65 regengine_T *engine; 66 unsigned regflags; 67 unsigned re_engine; 68 unsigned re_flags; /* second argument for vim_regcomp() */ 69 70 int regstart; 71 char_u reganch; 72 char_u *regmust; 73 int regmlen; 74 #ifdef FEAT_SYN_HL 75 char_u reghasz; 76 #endif 77 char_u program[1]; /* actually longer.. */ 78 } bt_regprog_T; 79 80 /* 81 * Structure representing a NFA state. 82 * A NFA state may have no outgoing edge, when it is a NFA_MATCH state. 83 */ 84 typedef struct nfa_state nfa_state_T; 85 struct nfa_state 86 { 87 int c; 88 nfa_state_T *out; 89 nfa_state_T *out1; 90 int id; 91 int lastlist[2]; /* 0: normal, 1: recursive */ 92 int val; 93 }; 94 95 /* 96 * Structure used by the NFA matcher. 97 */ 98 typedef struct 99 { 100 /* These three members implement regprog_T */ 101 regengine_T *engine; 102 unsigned regflags; 103 unsigned re_engine; 104 unsigned re_flags; /* second argument for vim_regcomp() */ 105 106 nfa_state_T *start; /* points into state[] */ 107 108 int reganch; /* pattern starts with ^ */ 109 int regstart; /* char at start of pattern */ 110 char_u *match_text; /* plain text to match with */ 111 112 int has_zend; /* pattern contains \ze */ 113 int has_backref; /* pattern contains \1 .. \9 */ 114 #ifdef FEAT_SYN_HL 115 int reghasz; 116 #endif 117 char_u *pattern; 118 int nsubexp; /* number of () */ 119 int nstate; 120 nfa_state_T state[1]; /* actually longer.. */ 121 } nfa_regprog_T; 122 123 /* 124 * Structure to be used for single-line matching. 125 * Sub-match "no" starts at "startp[no]" and ends just before "endp[no]". 126 * When there is no match, the pointer is NULL. 127 */ 128 typedef struct 129 { 130 regprog_T *regprog; 131 char_u *startp[NSUBEXP]; 132 char_u *endp[NSUBEXP]; 133 int rm_ic; 134 } regmatch_T; 135 136 /* 137 * Structure to be used for multi-line matching. 138 * Sub-match "no" starts in line "startpos[no].lnum" column "startpos[no].col" 139 * and ends in line "endpos[no].lnum" just before column "endpos[no].col". 140 * The line numbers are relative to the first line, thus startpos[0].lnum is 141 * always 0. 142 * When there is no match, the line number is -1. 143 */ 144 typedef struct 145 { 146 regprog_T *regprog; 147 lpos_T startpos[NSUBEXP]; 148 lpos_T endpos[NSUBEXP]; 149 int rmm_ic; 150 colnr_T rmm_maxcol; /* when not zero: maximum column */ 151 } regmmatch_T; 152 153 /* 154 * Structure used to store external references: "\z\(\)" to "\z\1". 155 * Use a reference count to avoid the need to copy this around. When it goes 156 * from 1 to zero the matches need to be freed. 157 */ 158 typedef struct 159 { 160 short refcnt; 161 char_u *matches[NSUBEXP]; 162 } reg_extmatch_T; 163 164 struct regengine 165 { 166 regprog_T *(*regcomp)(char_u*, int); 167 void (*regfree)(regprog_T *); 168 int (*regexec_nl)(regmatch_T*, char_u*, colnr_T, int); 169 long (*regexec_multi)(regmmatch_T*, win_T*, buf_T*, linenr_T, colnr_T, proftime_T*); 170 char_u *expr; 171 }; 172 173 #endif /* _REGEXP_H */ 174