1 /*
2 * keyvalue - PCRE matching and substitution for mod_redirect and mod_rewrite
3 *
4 * Fully-rewritten from original
5 * Copyright(c) 2018 Glenn Strauss gstrauss()gluelogic.com All rights reserved
6 * License: BSD 3-clause (same as lighttpd)
7 */
8 #include "first.h"
9
10 #include "keyvalue.h"
11 #include "plugin_config.h" /* struct cond_match_t */
12 #include "burl.h"
13 #include "log.h"
14
15 #include <stdlib.h>
16 #include <string.h>
17
18 #ifdef HAVE_PCRE2_H
19 #define PCRE2_CODE_UNIT_WIDTH 8
20 #include <pcre2.h>
21 #elif defined(HAVE_PCRE_H)
22 #include <pcre.h>
23 #ifndef PCRE_STUDY_JIT_COMPILE
24 #define PCRE_STUDY_JIT_COMPILE 0
25 #define pcre_free_study(x) pcre_free(x)
26 #endif
27 #endif
28
29 #ifdef HAVE_PCRE2_H
30 static struct pcre2_real_match_data_8 *keyvalue_match_data;
31 #endif
32
33 typedef struct pcre_keyvalue {
34 #ifdef HAVE_PCRE2_H
35 pcre2_code *code;
36 struct pcre2_real_match_data_8 *match_data;
37 #elif defined(HAVE_PCRE_H)
38 pcre *key;
39 pcre_extra *key_extra;
40 #endif
41 buffer value;
42 } pcre_keyvalue;
43
pcre_keyvalue_buffer_init(void)44 pcre_keyvalue_buffer *pcre_keyvalue_buffer_init(void) {
45 return ck_calloc(1, sizeof(pcre_keyvalue_buffer));
46 }
47
pcre_keyvalue_buffer_append(log_error_st * errh,pcre_keyvalue_buffer * kvb,const buffer * key,const buffer * value,const int pcre_jit)48 int pcre_keyvalue_buffer_append(log_error_st *errh, pcre_keyvalue_buffer *kvb, const buffer *key, const buffer *value, const int pcre_jit) {
49
50 pcre_keyvalue *kv;
51
52 if (!(kvb->used & (4-1))) /*(allocate in groups of 4)*/
53 ck_realloc_u32((void **)&kvb->kv,kvb->used,4,sizeof(*kvb->kv));
54
55 kv = kvb->kv + kvb->used++;
56
57 /* copy persistent config data, and elide free() in free_data below */
58 memcpy(&kv->value, value, sizeof(buffer));
59 /*buffer_copy_buffer(&kv->value, value);*/
60
61 #ifdef HAVE_PCRE
62
63 #ifdef HAVE_PCRE2_H
64
65 int errcode;
66 PCRE2_SIZE erroff;
67 PCRE2_UCHAR errbuf[1024];
68
69 kv->code = pcre2_compile((PCRE2_SPTR)BUF_PTR_LEN(key),
70 PCRE2_UTF, &errcode, &erroff, NULL);
71 if (NULL == kv->code) {
72 pcre2_get_error_message(errcode, errbuf, sizeof(errbuf));
73 log_error(errh, __FILE__, __LINE__,
74 "pcre2_compile: %s at offset %zu, regex: %s",
75 (char *)errbuf, erroff, key->ptr);
76 return 0;
77 }
78
79 if (pcre_jit) {
80 errcode = pcre2_jit_compile(kv->code, PCRE2_JIT_COMPLETE);
81 if (0 != errcode && errcode != PCRE2_ERROR_JIT_BADOPTION) {
82 pcre2_get_error_message(errcode, errbuf, sizeof(errbuf));
83 log_error(errh, __FILE__, __LINE__,
84 "pcre2_jit_compile: %s, regex: %s", (char *)errbuf, key->ptr);
85 /*return 0;*/
86 }
87 }
88
89 uint32_t captures;
90 errcode = pcre2_pattern_info(kv->code, PCRE2_INFO_CAPTURECOUNT, &captures);
91 if (0 != errcode) {
92 pcre2_get_error_message(errcode, errbuf, sizeof(errbuf));
93 log_error(errh, __FILE__, __LINE__,
94 "pcre2_pattern_info: %s, regex: %s", (char *)errbuf, key->ptr);
95 return 0;
96 }
97 else if (captures > 19) {
98 log_error(errh, __FILE__, __LINE__,
99 "Too many captures in regex, "
100 "use (?:...) instead of (...): %s", key->ptr);
101 return 0;
102 }
103
104 #if 1 /*(share single keyvalue_match_data among all keyvalue regexes)*/
105 if (NULL == keyvalue_match_data) {
106 keyvalue_match_data = pcre2_match_data_create(20, NULL);
107 force_assert(keyvalue_match_data);
108 }
109 kv->match_data = keyvalue_match_data;
110 #else
111 kv->match_data = pcre2_match_data_create_from_pattern(kv->code, NULL);
112 force_assert(kv->match_data);
113 #endif
114
115 #elif defined(HAVE_PCRE_H)
116
117 const char *errptr;
118 int erroff;
119
120 kv->key_extra = NULL;
121
122 if (NULL == (kv->key = pcre_compile(key->ptr,
123 0, &errptr, &erroff, NULL))) {
124
125 log_error(errh, __FILE__, __LINE__,
126 "rexexp compilation error at %s", errptr);
127 return 0;
128 }
129
130 const int study_options = pcre_jit ? PCRE_STUDY_JIT_COMPILE : 0;
131 if (NULL == (kv->key_extra = pcre_study(kv->key, study_options, &errptr))
132 && errptr != NULL) {
133 log_error(errh, __FILE__, __LINE__,
134 "studying regex failed: %s -> %s\n",
135 key->ptr, errptr);
136 return 0;
137 }
138
139 #endif
140
141 #else /* !HAVE_PCRE */
142
143 if (!buffer_is_blank(key)) {
144 static int logged_message = 0;
145 if (logged_message) return 1;
146 logged_message = 1;
147 log_error(errh, __FILE__, __LINE__,
148 "pcre support is missing, please install libpcre and the headers");
149 UNUSED(pcre_jit);
150 }
151
152 #endif /* !HAVE_PCRE */
153
154 return 1;
155 }
156
pcre_keyvalue_buffer_free(pcre_keyvalue_buffer * kvb)157 void pcre_keyvalue_buffer_free(pcre_keyvalue_buffer *kvb) {
158 #ifdef HAVE_PCRE
159 pcre_keyvalue *kv = kvb->kv;
160 for (int i = 0, used = (int)kvb->used; i < used; ++i, ++kv) {
161 #ifdef HAVE_PCRE2_H
162 if (kv->code) pcre2_code_free(kv->code);
163 #if 1
164 if (keyvalue_match_data) {
165 pcre2_match_data_free(keyvalue_match_data);
166 keyvalue_match_data = NULL;
167 }
168 #else
169 if (kv->match_data) pcre2_match_data_free(kv->match_data);
170 #endif
171 #elif defined(HAVE_PCRE_H)
172 if (kv->key) pcre_free(kv->key);
173 if (kv->key_extra) pcre_free_study(kv->key_extra);
174 /*free (kv->value.ptr);*//*(see pcre_keyvalue_buffer_append)*/
175 #endif
176 }
177 #endif
178
179 if (kvb->kv) free(kvb->kv);
180 free(kvb);
181 }
182
183 #ifdef HAVE_PCRE
184
pcre_keyvalue_buffer_append_match(buffer * b,const pcre_keyvalue_ctx * ctx,unsigned int num,int flags)185 static void pcre_keyvalue_buffer_append_match(buffer *b, const pcre_keyvalue_ctx *ctx, unsigned int num, int flags) {
186 if (num < (unsigned int)ctx->n) { /* n is always > 0 */
187 #ifdef HAVE_PCRE2_H
188 const PCRE2_SIZE *ovec = (PCRE2_SIZE *)ctx->ovec;
189 #elif defined(HAVE_PCRE_H)
190 const int *ovec = (int *)ctx->ovec;
191 #endif
192 const size_t off = (size_t)ovec[(num <<= 1)]; /*(num *= 2)*/
193 const size_t len = (size_t)ovec[num+1] - off;
194 burl_append(b, ctx->subject + off, len, flags);
195 }
196 }
197
pcre_keyvalue_buffer_append_ctxmatch(buffer * b,const pcre_keyvalue_ctx * ctx,unsigned int num,int flags)198 static void pcre_keyvalue_buffer_append_ctxmatch(buffer *b, const pcre_keyvalue_ctx *ctx, unsigned int num, int flags) {
199 const struct cond_match_t * const cache = ctx->cache;
200 if (!cache) return; /* no enclosing match context */
201 if (num < (unsigned int)cache->captures) {
202 #ifdef HAVE_PCRE2_H
203 const PCRE2_SIZE *ovec = (PCRE2_SIZE *)cache->matches;
204 #elif defined(HAVE_PCRE_H)
205 const int *ovec = (int *)cache->matches;
206 #endif
207 const size_t off = (size_t)ovec[(num <<= 1)]; /*(num *= 2)*/
208 const size_t len = (size_t)ovec[num+1] - off;
209 burl_append(b, cache->comp_value->ptr + off, len, flags);
210 }
211 }
212
213 #endif /* HAVE_PCRE */
214
pcre_keyvalue_buffer_subst_ext(buffer * b,const char * pattern,const pcre_keyvalue_ctx * ctx)215 static int pcre_keyvalue_buffer_subst_ext(buffer *b, const char *pattern, const pcre_keyvalue_ctx *ctx) {
216 const unsigned char *p = (unsigned char *)pattern+2;/* +2 past ${} or %{} */
217 int flags = 0;
218 while (!light_isdigit(*p) && *p != '}' && *p != '\0') {
219 if (0) {
220 }
221 else if (p[0] == 'e' && p[1] == 's' && p[2] == 'c') {
222 p+=3;
223 if (p[0] == ':') {
224 flags |= BURL_ENCODE_ALL;
225 p+=1;
226 }
227 else if (0 == strncmp((const char *)p, "ape:", 4)) {
228 flags |= BURL_ENCODE_ALL;
229 p+=4;
230 }
231 else if (0 == strncmp((const char *)p, "nde:", 4)) {
232 flags |= BURL_ENCODE_NDE;
233 p+=4;
234 }
235 else if (0 == strncmp((const char *)p, "psnde:", 6)) {
236 flags |= BURL_ENCODE_PSNDE;
237 p+=6;
238 }
239 else { /* skip unrecognized esc... */
240 p = (const unsigned char *)strchr((const char *)p, ':');
241 if (NULL == p) return -1;
242 ++p;
243 }
244 }
245 else if (p[0] == 'n' && p[1] == 'o') {
246 p+=2;
247 if (0 == strncmp((const char *)p, "esc:", 4)) {
248 flags |= BURL_ENCODE_NONE;
249 p+=4;
250 }
251 else if (0 == strncmp((const char *)p, "escape:", 7)) {
252 flags |= BURL_ENCODE_NONE;
253 p+=7;
254 }
255 else { /* skip unrecognized no... */
256 p = (const unsigned char *)strchr((const char *)p, ':');
257 if (NULL == p) return -1;
258 ++p;
259 }
260 }
261 else if (p[0] == 't' && p[1] == 'o') {
262 p+=2;
263 if (0 == strncmp((const char *)p, "lower:", 6)) {
264 flags |= BURL_TOLOWER;
265 p+=6;
266 }
267 else if (0 == strncmp((const char *)p, "upper:", 6)) {
268 flags |= BURL_TOLOWER;
269 p+=6;
270 }
271 else { /* skip unrecognized to... */
272 p = (const unsigned char *)strchr((const char *)p, ':');
273 if (NULL == p) return -1;
274 ++p;
275 }
276 }
277 else if (p[0] == 'u' && p[1] == 'r' && p[2] == 'l' && p[3] == '.') {
278 const struct burl_parts_t * const burl = ctx->burl;
279 p+=4;
280 if (0 == strncmp((const char *)p, "scheme}", 7)) {
281 if (burl->scheme)
282 burl_append(b, BUF_PTR_LEN(burl->scheme), flags);
283 p+=6;
284 }
285 else if (0 == strncmp((const char *)p, "authority}", 10)) {
286 if (burl->authority)
287 burl_append(b, BUF_PTR_LEN(burl->authority), flags);
288 p+=9;
289 }
290 else if (0 == strncmp((const char *)p, "port}", 5)) {
291 buffer_append_int(b, (int)burl->port);
292 p+=4;
293 }
294 else if (0 == strncmp((const char *)p, "path}", 5)) {
295 const buffer * const target = burl->path;
296 const uint32_t len = buffer_clen(target);
297 const char * const ptr = target->ptr;
298 const char * const qmark = memchr(ptr, '?', len);
299 burl_append(b, ptr, qmark ? (uint32_t)(qmark-ptr) : len, flags);
300 p+=4;
301 }
302 else if (0 == strncmp((const char *)p, "query}", 6)) {
303 if (burl->query)
304 burl_append(b, BUF_PTR_LEN(burl->query), flags);
305 p+=5;
306 }
307 else { /* skip unrecognized url.* */
308 p = (const unsigned char *)strchr((const char *)p, '}');
309 if (NULL == p) return -1;
310 }
311 break;
312 }
313 else if (p[0] == 'q' && p[1] == 's' && p[2] == 'a' && p[3] == '}') {
314 const buffer *qs = ctx->burl->query;
315 if (qs && !buffer_is_unset(qs)) {
316 if (NULL != strchr(b->ptr, '?')) {
317 if (!buffer_is_blank(qs))
318 buffer_append_char(b, '&');
319 }
320 else {
321 buffer_append_char(b, '?');
322 }
323 burl_append(b, BUF_PTR_LEN(qs), flags);
324 }
325 p+=3;
326 break;
327 }
328 else if (p[0] == 'e' && p[1] == 'n' && p[2] == 'c'
329 && 0 == strncmp((const char *)p+3, "b64u:", 5)) {
330 flags |= BURL_ENCODE_B64U;
331 p+=8;
332 }
333 else if (p[0] == 'd' && p[1] == 'e' && p[2] == 'c'
334 && 0 == strncmp((const char *)p+3, "b64u:", 5)) {
335 flags |= BURL_DECODE_B64U;
336 p+=8;
337 }
338 else ++p; /* skip unrecognized char */
339 }
340 if (*p == '\0') return -1;
341 if (*p != '}') { /* light_isdigit(*p) */
342 unsigned int num = *p - '0';
343 ++p;
344 if (light_isdigit(*p)) num = num * 10 + (*p++ - '0');
345 if (*p != '}') {
346 p = (const unsigned char *)strchr((const char *)p, '}');
347 if (NULL == p) return -1;
348 }
349 if (0 == flags) flags = BURL_ENCODE_PSNDE; /* default */
350 #ifdef HAVE_PCRE
351 pattern[0] == '$' /*(else '%')*/
352 ? pcre_keyvalue_buffer_append_match(b, ctx, num, flags)
353 : pcre_keyvalue_buffer_append_ctxmatch(b, ctx, num, flags);
354 #endif
355 }
356 return (int)(p + 1 - (unsigned char *)pattern - 2);
357 }
358
pcre_keyvalue_buffer_subst(buffer * b,const buffer * patternb,const pcre_keyvalue_ctx * ctx)359 static void pcre_keyvalue_buffer_subst(buffer *b, const buffer *patternb, const pcre_keyvalue_ctx *ctx) {
360 const char *pattern = patternb->ptr;
361 const size_t pattern_len = buffer_clen(patternb);
362 size_t start = 0;
363
364 /* search for $... or %... pattern substitutions */
365
366 buffer_clear(b);
367
368 for (size_t k = 0; k + 1 < pattern_len; ++k) {
369 if (pattern[k] == '$' || pattern[k] == '%') {
370
371 buffer_append_string_len(b, pattern + start, k - start);
372
373 if (pattern[k + 1] == '{') {
374 int num = pcre_keyvalue_buffer_subst_ext(b, pattern+k, ctx);
375 if (num < 0) return; /* error; truncate result */
376 k += (size_t)num;
377 } else if (light_isdigit(((unsigned char *)pattern)[k + 1])) {
378 #ifdef HAVE_PCRE
379 unsigned int num = (unsigned int)pattern[k + 1] - '0';
380 pattern[k] == '$' /*(else '%')*/
381 ? pcre_keyvalue_buffer_append_match(b, ctx, num, 0)
382 : pcre_keyvalue_buffer_append_ctxmatch(b, ctx, num, 0);
383 #endif
384 } else {
385 /* enable escape: "%%" => "%", "%a" => "%a", "$$" => "$" */
386 buffer_append_string_len(b, pattern+k, pattern[k] == pattern[k+1] ? 1 : 2);
387 }
388
389 k++;
390 start = k + 1;
391 }
392 }
393
394 buffer_append_string_len(b, pattern + start, pattern_len - start);
395 }
396
pcre_keyvalue_buffer_process(const pcre_keyvalue_buffer * kvb,pcre_keyvalue_ctx * ctx,const buffer * input,buffer * result)397 handler_t pcre_keyvalue_buffer_process(const pcre_keyvalue_buffer *kvb, pcre_keyvalue_ctx *ctx, const buffer *input, buffer *result) {
398 const pcre_keyvalue *kv = kvb->kv;
399 for (int i = 0, used = (int)kvb->used; i < used; ++i, ++kv) {
400 #ifdef HAVE_PCRE
401 #ifdef HAVE_PCRE2_H
402 int n = pcre2_match(kv->code, (PCRE2_SPTR)BUF_PTR_LEN(input),
403 0, 0, kv->match_data, NULL);
404 #else
405 #define N 20
406 int ovec[N * 3];
407 #undef N
408 int n = pcre_exec(kv->key, kv->key_extra, BUF_PTR_LEN(input),
409 0, 0, ovec, sizeof(ovec)/sizeof(int));
410 #endif
411 #else
412 int n = 1;
413 #endif
414 if (n < 0) {
415 #ifdef HAVE_PCRE
416 #ifdef HAVE_PCRE2_H
417 if (n != PCRE2_ERROR_NOMATCH)
418 #else
419 if (n != PCRE_ERROR_NOMATCH)
420 #endif
421 #endif
422 return HANDLER_ERROR;
423 }
424 else if (buffer_is_blank(&kv->value)) {
425 /* short-circuit if blank replacement pattern
426 * (do not attempt to match against remaining kvb rules) */
427 ctx->m = i;
428 return HANDLER_GO_ON;
429 }
430 else { /* it matched */
431 ctx->m = i;
432 ctx->n = n;
433 ctx->subject = input->ptr;
434 #ifdef HAVE_PCRE
435 #ifdef HAVE_PCRE2_H
436 ctx->ovec = pcre2_get_ovector_pointer(kv->match_data);
437 #else
438 ctx->ovec = ovec;
439 #endif
440 #endif
441 pcre_keyvalue_buffer_subst(result, &kv->value, ctx);
442 return HANDLER_FINISHED;
443 }
444 }
445
446 return HANDLER_GO_ON;
447 }
448
449
450 /* modified from burl_normalize_basic() to handle %% extra encoding layer */
451
452 /* c (char) and n (nibble) MUST be unsigned integer types */
453 #define li_cton(c,n) \
454 (((n) = (c) - '0') <= 9 || (((n) = ((c)&0xdf) - 'A') <= 5 ? ((n) += 10) : 0))
455
pcre_keyvalue_burl_percent_toupper(buffer * b)456 static void pcre_keyvalue_burl_percent_toupper (buffer *b)
457 {
458 const unsigned char * const s = (unsigned char *)b->ptr;
459 const int used = (int)buffer_clen(b);
460 unsigned int n1, n2;
461 for (int i = 0; i < used; ++i) {
462 if (s[i]=='%' && li_cton(s[i+1],n1) && li_cton(s[i+2],n2)) {
463 if (s[i+1] >= 'a') b->ptr[i+1] &= 0xdf; /* uppercase hex */
464 if (s[i+2] >= 'a') b->ptr[i+2] &= 0xdf; /* uppercase hex */
465 i+=2;
466 }
467 }
468 }
469
pcre_keyvalue_burl_percent_percent_toupper(buffer * b)470 static void pcre_keyvalue_burl_percent_percent_toupper (buffer *b)
471 {
472 const unsigned char * const s = (unsigned char *)b->ptr;
473 const int used = (int)buffer_clen(b);
474 unsigned int n1, n2;
475 for (int i = 0; i < used; ++i) {
476 if (s[i] == '%' && s[i+1]=='%'
477 && li_cton(s[i+2],n1) && li_cton(s[i+3],n2)) {
478 if (s[i+2] >= 'a') b->ptr[i+2] &= 0xdf; /* uppercase hex */
479 if (s[i+3] >= 'a') b->ptr[i+3] &= 0xdf; /* uppercase hex */
480 i+=3;
481 }
482 }
483 }
484
485 static const char hex_chars_uc[] = "0123456789ABCDEF";
486
pcre_keyvalue_burl_percent_high_UTF8(buffer * b,buffer * t)487 static void pcre_keyvalue_burl_percent_high_UTF8 (buffer *b, buffer *t)
488 {
489 const unsigned char * const s = (unsigned char *)b->ptr;
490 unsigned char *p;
491 const int used = (int)buffer_clen(b);
492 unsigned int count = 0, j = 0;
493 for (int i = 0; i < used; ++i) {
494 if (s[i] > 0x7F) ++count;
495 }
496 if (0 == count) return;
497
498 p = (unsigned char *)buffer_string_prepare_copy(t, used+(count*2));
499 for (int i = 0; i < used; ++i, ++j) {
500 if (s[i] <= 0x7F)
501 p[j] = s[i];
502 else {
503 p[j] = '%';
504 p[++j] = hex_chars_uc[(s[i] >> 4) & 0xF];
505 p[++j] = hex_chars_uc[s[i] & 0xF];
506 }
507 }
508 buffer_copy_string_len(b, (char *)p, (size_t)j);
509 }
510
pcre_keyvalue_burl_percent_percent_high_UTF8(buffer * b,buffer * t)511 static void pcre_keyvalue_burl_percent_percent_high_UTF8 (buffer *b, buffer *t)
512 {
513 const unsigned char * const s = (unsigned char *)b->ptr;
514 unsigned char *p;
515 const int used = (int)buffer_clen(b);
516 unsigned int count = 0, j = 0;
517 for (int i = 0; i < used; ++i) {
518 if (s[i] > 0x7F) ++count;
519 }
520 if (0 == count) return;
521
522 p = (unsigned char *)buffer_string_prepare_copy(t, used+(count*3));
523 for (int i = 0; i < used; ++i, ++j) {
524 if (s[i] <= 0x7F)
525 p[j] = s[i];
526 else {
527 p[j] = '%';
528 p[++j] = '%';
529 p[++j] = hex_chars_uc[(s[i] >> 4) & 0xF];
530 p[++j] = hex_chars_uc[s[i] & 0xF];
531 }
532 }
533 buffer_copy_string_len(b, (char *)p, (size_t)j);
534 }
535
536 /* Basic normalization of regex and regex replacement to mirror some of
537 * the normalizations performed on request URI (for better compatibility).
538 * Note: not currently attempting to replace unnecessary percent-encoding
539 * (would need to know if regex was intended to match url-path or
540 * query-string or both, and then would have to regex-escape if those
541 * chars where special regex chars such as . * + ? ( ) [ ] | and more)
542 * Not attempting to percent-encode chars which should be encoded, again
543 * since regex might target url-path, query-string, or both, and we would
544 * have to avoid percent-encoding special regex chars.
545 * Also not attempting to detect unnecessarily regex-escape in, e.g. %\x\x
546 * Preserve improper %-encoded sequences which are not %XX (using hex chars)
547 * Intentionally not performing path simplification (e.g. ./ ../)
548 * If regex-specific normalizations begin to be made to k here,
549 * must revisit callers, e.g. one configfile.c use on non-regex string.
550 * "%%" (percent_percent) is used in regex replacement strings since
551 * otherwise "%n" is used to indicate regex backreference where n is number.
552 */
553
pcre_keyvalue_burl_normalize_key(buffer * k,buffer * t)554 void pcre_keyvalue_burl_normalize_key (buffer *k, buffer *t)
555 {
556 pcre_keyvalue_burl_percent_toupper(k);
557 pcre_keyvalue_burl_percent_high_UTF8(k, t);
558 }
559
pcre_keyvalue_burl_normalize_value(buffer * v,buffer * t)560 void pcre_keyvalue_burl_normalize_value (buffer *v, buffer *t)
561 {
562 pcre_keyvalue_burl_percent_percent_toupper(v);
563 pcre_keyvalue_burl_percent_percent_high_UTF8(v, t);
564 }
565