1 /*
2 * Copyright (C) 1984-2021 Mark Nudelman
3 *
4 * You may distribute under the terms of either the GNU General Public
5 * License or the Less License, as specified in the README file.
6 *
7 * For more information, see the README file.
8 */
9
10 /*
11 * Routines to do pattern matching.
12 */
13
14 #include "less.h"
15
16 extern int caseless;
17 extern int utf_mode;
18
19 /*
20 * Compile a search pattern, for future use by match_pattern.
21 */
22 static int
compile_pattern2(pattern,search_type,comp_pattern,show_error)23 compile_pattern2(pattern, search_type, comp_pattern, show_error)
24 char *pattern;
25 int search_type;
26 PATTERN_TYPE *comp_pattern;
27 int show_error;
28 {
29 if (search_type & SRCH_NO_REGEX)
30 return (0);
31 {
32 #if HAVE_GNU_REGEX
33 struct re_pattern_buffer *comp = (struct re_pattern_buffer *)
34 ecalloc(1, sizeof(struct re_pattern_buffer));
35 re_set_syntax(RE_SYNTAX_POSIX_EXTENDED);
36 if (re_compile_pattern(pattern, strlen(pattern), comp))
37 {
38 free(comp);
39 if (show_error)
40 error("Invalid pattern", NULL_PARG);
41 return (-1);
42 }
43 if (*comp_pattern != NULL)
44 {
45 regfree(*comp_pattern);
46 free(*comp_pattern);
47 }
48 *comp_pattern = comp;
49 #endif
50 #if HAVE_POSIX_REGCOMP
51 regex_t *comp = (regex_t *) ecalloc(1, sizeof(regex_t));
52 if (regcomp(comp, pattern, REGCOMP_FLAG))
53 {
54 free(comp);
55 if (show_error)
56 error("Invalid pattern", NULL_PARG);
57 return (-1);
58 }
59 if (*comp_pattern != NULL)
60 {
61 regfree(*comp_pattern);
62 free(*comp_pattern);
63 }
64 *comp_pattern = comp;
65 #endif
66 #if HAVE_PCRE
67 constant char *errstring;
68 int erroffset;
69 PARG parg;
70 pcre *comp = pcre_compile(pattern,
71 (utf_mode) ? PCRE_UTF8 | PCRE_NO_UTF8_CHECK : 0,
72 &errstring, &erroffset, NULL);
73 if (comp == NULL)
74 {
75 parg.p_string = (char *) errstring;
76 if (show_error)
77 error("%s", &parg);
78 return (-1);
79 }
80 *comp_pattern = comp;
81 #endif
82 #if HAVE_PCRE2
83 int errcode;
84 PCRE2_SIZE erroffset;
85 PARG parg;
86 pcre2_code *comp = pcre2_compile((PCRE2_SPTR)pattern, strlen(pattern),
87 0, &errcode, &erroffset, NULL);
88 if (comp == NULL)
89 {
90 if (show_error)
91 {
92 char msg[160];
93 pcre2_get_error_message(errcode, (PCRE2_UCHAR*)msg, sizeof(msg));
94 parg.p_string = msg;
95 error("%s", &parg);
96 }
97 return (-1);
98 }
99 *comp_pattern = comp;
100 #endif
101 #if HAVE_RE_COMP
102 PARG parg;
103 if ((parg.p_string = re_comp(pattern)) != NULL)
104 {
105 if (show_error)
106 error("%s", &parg);
107 return (-1);
108 }
109 *comp_pattern = 1;
110 #endif
111 #if HAVE_REGCMP
112 char *comp;
113 if ((comp = regcmp(pattern, 0)) == NULL)
114 {
115 if (show_error)
116 error("Invalid pattern", NULL_PARG);
117 return (-1);
118 }
119 if (comp_pattern != NULL)
120 free(*comp_pattern);
121 *comp_pattern = comp;
122 #endif
123 #if HAVE_V8_REGCOMP
124 struct regexp *comp;
125 reg_show_error = show_error;
126 comp = regcomp(pattern);
127 reg_show_error = 1;
128 if (comp == NULL)
129 {
130 /*
131 * regcomp has already printed an error message
132 * via regerror().
133 */
134 return (-1);
135 }
136 if (*comp_pattern != NULL)
137 free(*comp_pattern);
138 *comp_pattern = comp;
139 #endif
140 }
141 return (0);
142 }
143
144 /*
145 * Like compile_pattern2, but convert the pattern to lowercase if necessary.
146 */
147 public int
compile_pattern(pattern,search_type,show_error,comp_pattern)148 compile_pattern(pattern, search_type, show_error, comp_pattern)
149 char *pattern;
150 int search_type;
151 int show_error;
152 PATTERN_TYPE *comp_pattern;
153 {
154 char *cvt_pattern;
155 int result;
156
157 if (caseless != OPT_ONPLUS)
158 cvt_pattern = pattern;
159 else
160 {
161 cvt_pattern = (char*) ecalloc(1, cvt_length(strlen(pattern), CVT_TO_LC));
162 cvt_text(cvt_pattern, pattern, (int *)NULL, (int *)NULL, CVT_TO_LC);
163 }
164 result = compile_pattern2(cvt_pattern, search_type, comp_pattern, show_error);
165 if (cvt_pattern != pattern)
166 free(cvt_pattern);
167 return (result);
168 }
169
170 /*
171 * Forget that we have a compiled pattern.
172 */
173 public void
uncompile_pattern(pattern)174 uncompile_pattern(pattern)
175 PATTERN_TYPE *pattern;
176 {
177 #if HAVE_GNU_REGEX
178 if (*pattern != NULL)
179 {
180 regfree(*pattern);
181 free(*pattern);
182 }
183 *pattern = NULL;
184 #endif
185 #if HAVE_POSIX_REGCOMP
186 if (*pattern != NULL)
187 {
188 regfree(*pattern);
189 free(*pattern);
190 }
191 *pattern = NULL;
192 #endif
193 #if HAVE_PCRE
194 if (*pattern != NULL)
195 pcre_free(*pattern);
196 *pattern = NULL;
197 #endif
198 #if HAVE_PCRE2
199 if (*pattern != NULL)
200 pcre2_code_free(*pattern);
201 *pattern = NULL;
202 #endif
203 #if HAVE_RE_COMP
204 *pattern = 0;
205 #endif
206 #if HAVE_REGCMP
207 if (*pattern != NULL)
208 free(*pattern);
209 *pattern = NULL;
210 #endif
211 #if HAVE_V8_REGCOMP
212 if (*pattern != NULL)
213 free(*pattern);
214 *pattern = NULL;
215 #endif
216 }
217
218 #if 0
219 /*
220 * Can a pattern be successfully compiled?
221 */
222 public int
223 valid_pattern(pattern)
224 char *pattern;
225 {
226 PATTERN_TYPE comp_pattern;
227 int result;
228
229 SET_NULL_PATTERN(comp_pattern);
230 result = compile_pattern2(pattern, 0, &comp_pattern, 0);
231 if (result != 0)
232 return (0);
233 uncompile_pattern(&comp_pattern);
234 return (1);
235 }
236 #endif
237
238 /*
239 * Is a compiled pattern null?
240 */
241 public int
is_null_pattern(pattern)242 is_null_pattern(pattern)
243 PATTERN_TYPE pattern;
244 {
245 #if HAVE_GNU_REGEX
246 return (pattern == NULL);
247 #endif
248 #if HAVE_POSIX_REGCOMP
249 return (pattern == NULL);
250 #endif
251 #if HAVE_PCRE
252 return (pattern == NULL);
253 #endif
254 #if HAVE_PCRE2
255 return (pattern == NULL);
256 #endif
257 #if HAVE_RE_COMP
258 return (pattern == 0);
259 #endif
260 #if HAVE_REGCMP
261 return (pattern == NULL);
262 #endif
263 #if HAVE_V8_REGCOMP
264 return (pattern == NULL);
265 #endif
266 #if NO_REGEX
267 return (pattern == NULL);
268 #endif
269 }
270
271 /*
272 * Simple pattern matching function.
273 * It supports no metacharacters like *, etc.
274 */
275 static int
match(pattern,pattern_len,buf,buf_len,pfound,pend)276 match(pattern, pattern_len, buf, buf_len, pfound, pend)
277 char *pattern;
278 int pattern_len;
279 char *buf;
280 int buf_len;
281 char **pfound, **pend;
282 {
283 char *pp, *lp;
284 char *pattern_end = pattern + pattern_len;
285 char *buf_end = buf + buf_len;
286
287 for ( ; buf < buf_end; buf++)
288 {
289 for (pp = pattern, lp = buf; ; pp++, lp++)
290 {
291 char cp = *pp;
292 char cl = *lp;
293 if (caseless == OPT_ONPLUS && ASCII_IS_UPPER(cp))
294 cp = ASCII_TO_LOWER(cp);
295 if (cp != cl)
296 break;
297 if (pp == pattern_end || lp == buf_end)
298 break;
299 }
300 if (pp == pattern_end)
301 {
302 if (pfound != NULL)
303 *pfound = buf;
304 if (pend != NULL)
305 *pend = lp;
306 return (1);
307 }
308 }
309 return (0);
310 }
311
312 /*
313 * Perform a pattern match with the previously compiled pattern.
314 * Set sp and ep to the start and end of the matched string.
315 */
316 public int
match_pattern(pattern,tpattern,line,line_len,sp,ep,notbol,search_type)317 match_pattern(pattern, tpattern, line, line_len, sp, ep, notbol, search_type)
318 PATTERN_TYPE pattern;
319 char *tpattern;
320 char *line;
321 int line_len;
322 char **sp;
323 char **ep;
324 int notbol;
325 int search_type;
326 {
327 int matched;
328
329 *sp = *ep = NULL;
330 #if NO_REGEX
331 search_type |= SRCH_NO_REGEX;
332 #endif
333 if (search_type & SRCH_NO_REGEX)
334 matched = match(tpattern, strlen(tpattern), line, line_len, sp, ep);
335 else
336 {
337 #if HAVE_GNU_REGEX
338 {
339 struct re_registers search_regs;
340 pattern->not_bol = notbol;
341 pattern->regs_allocated = REGS_UNALLOCATED;
342 matched = re_search(pattern, line, line_len, 0, line_len, &search_regs) >= 0;
343 if (matched)
344 {
345 *sp = line + search_regs.start[0];
346 *ep = line + search_regs.end[0];
347 }
348 }
349 #endif
350 #if HAVE_POSIX_REGCOMP
351 {
352 regmatch_t rm;
353 int flags = (notbol) ? REG_NOTBOL : 0;
354 #ifdef REG_STARTEND
355 flags |= REG_STARTEND;
356 rm.rm_so = 0;
357 rm.rm_eo = line_len;
358 #endif
359 matched = !regexec(pattern, line, 1, &rm, flags);
360 if (matched)
361 {
362 #ifndef __WATCOMC__
363 *sp = line + rm.rm_so;
364 *ep = line + rm.rm_eo;
365 #else
366 *sp = rm.rm_sp;
367 *ep = rm.rm_ep;
368 #endif
369 }
370 }
371 #endif
372 #if HAVE_PCRE
373 {
374 int flags = (notbol) ? PCRE_NOTBOL : 0;
375 int ovector[3];
376 matched = pcre_exec(pattern, NULL, line, line_len,
377 0, flags, ovector, 3) >= 0;
378 if (matched)
379 {
380 *sp = line + ovector[0];
381 *ep = line + ovector[1];
382 }
383 }
384 #endif
385 #if HAVE_PCRE2
386 {
387 int flags = (notbol) ? PCRE2_NOTBOL : 0;
388 pcre2_match_data *md = pcre2_match_data_create(3, NULL);
389 matched = pcre2_match(pattern, (PCRE2_SPTR)line, line_len,
390 0, flags, md, NULL) >= 0;
391 if (matched)
392 {
393 PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(md);
394 *sp = line + ovector[0];
395 *ep = line + ovector[1];
396 }
397 pcre2_match_data_free(md);
398 }
399 #endif
400 #if HAVE_RE_COMP
401 matched = (re_exec(line) == 1);
402 /*
403 * re_exec doesn't seem to provide a way to get the matched string.
404 */
405 *sp = *ep = NULL;
406 #endif
407 #if HAVE_REGCMP
408 *ep = regex(pattern, line);
409 matched = (*ep != NULL);
410 if (matched)
411 *sp = __loc1;
412 #endif
413 #if HAVE_V8_REGCOMP
414 #if HAVE_REGEXEC2
415 matched = regexec2(pattern, line, notbol);
416 #else
417 matched = regexec(pattern, line);
418 #endif
419 if (matched)
420 {
421 *sp = pattern->startp[0];
422 *ep = pattern->endp[0];
423 }
424 #endif
425 }
426 matched = (!(search_type & SRCH_NO_MATCH) && matched) ||
427 ((search_type & SRCH_NO_MATCH) && !matched);
428 return (matched);
429 }
430
431 /*
432 * Return the name of the pattern matching library.
433 */
434 public char *
pattern_lib_name(VOID_PARAM)435 pattern_lib_name(VOID_PARAM)
436 {
437 #if HAVE_GNU_REGEX
438 return ("GNU");
439 #else
440 #if HAVE_POSIX_REGCOMP
441 return ("POSIX");
442 #else
443 #if HAVE_PCRE2
444 return ("PCRE2");
445 #else
446 #if HAVE_PCRE
447 return ("PCRE");
448 #else
449 #if HAVE_RE_COMP
450 return ("BSD");
451 #else
452 #if HAVE_REGCMP
453 return ("V8");
454 #else
455 #if HAVE_V8_REGCOMP
456 return ("Spencer V8");
457 #else
458 return ("no");
459 #endif
460 #endif
461 #endif
462 #endif
463 #endif
464 #endif
465 #endif
466 }
467