1c18f442aSGlenn Strauss /*
2c18f442aSGlenn Strauss * keyvalue - PCRE matching and substitution for mod_redirect and mod_rewrite
3c18f442aSGlenn Strauss *
4c18f442aSGlenn Strauss * Fully-rewritten from original
5c18f442aSGlenn Strauss * Copyright(c) 2018 Glenn Strauss gstrauss()gluelogic.com All rights reserved
6c18f442aSGlenn Strauss * License: BSD 3-clause (same as lighttpd)
7c18f442aSGlenn Strauss */
88abd06a7SGlenn Strauss #include "first.h"
98abd06a7SGlenn Strauss
1022e8b456SStefan Bühler #include "keyvalue.h"
11c193da34SGlenn Strauss #include "plugin_config.h" /* struct cond_match_t */
12255269d7SGlenn Strauss #include "burl.h"
1338f2d1ddSStefan Bühler #include "log.h"
1422e8b456SStefan Bühler
15bcdc6a3bSJan Kneschke #include <stdlib.h>
16bcdc6a3bSJan Kneschke #include <string.h>
17bcdc6a3bSJan Kneschke
187512d82cSGlenn Strauss #ifdef HAVE_PCRE2_H
197512d82cSGlenn Strauss #define PCRE2_CODE_UNIT_WIDTH 8
207512d82cSGlenn Strauss #include <pcre2.h>
217512d82cSGlenn Strauss #elif defined(HAVE_PCRE_H)
2204d76e7aSGlenn Strauss #include <pcre.h>
237b9c5addSGlenn Strauss #ifndef PCRE_STUDY_JIT_COMPILE
247b9c5addSGlenn Strauss #define PCRE_STUDY_JIT_COMPILE 0
257b9c5addSGlenn Strauss #define pcre_free_study(x) pcre_free(x)
267b9c5addSGlenn Strauss #endif
2704d76e7aSGlenn Strauss #endif
2804d76e7aSGlenn Strauss
297512d82cSGlenn Strauss #ifdef HAVE_PCRE2_H
307512d82cSGlenn Strauss static struct pcre2_real_match_data_8 *keyvalue_match_data;
317512d82cSGlenn Strauss #endif
327512d82cSGlenn Strauss
33cf9474aaSGlenn Strauss typedef struct pcre_keyvalue {
347512d82cSGlenn Strauss #ifdef HAVE_PCRE2_H
357512d82cSGlenn Strauss pcre2_code *code;
367512d82cSGlenn Strauss struct pcre2_real_match_data_8 *match_data;
377512d82cSGlenn Strauss #elif defined(HAVE_PCRE_H)
38cf9474aaSGlenn Strauss pcre *key;
39cf9474aaSGlenn Strauss pcre_extra *key_extra;
40cf9474aaSGlenn Strauss #endif
4163a6e52cSGlenn Strauss buffer value;
42cf9474aaSGlenn Strauss } pcre_keyvalue;
43cf9474aaSGlenn Strauss
pcre_keyvalue_buffer_init(void)44bcdc6a3bSJan Kneschke pcre_keyvalue_buffer *pcre_keyvalue_buffer_init(void) {
45*5e14db43SGlenn Strauss return ck_calloc(1, sizeof(pcre_keyvalue_buffer));
46bcdc6a3bSJan Kneschke }
47bcdc6a3bSJan Kneschke
pcre_keyvalue_buffer_append(log_error_st * errh,pcre_keyvalue_buffer * kvb,const buffer * key,const buffer * value,const int pcre_jit)487b9c5addSGlenn Strauss int pcre_keyvalue_buffer_append(log_error_st *errh, pcre_keyvalue_buffer *kvb, const buffer *key, const buffer *value, const int pcre_jit) {
497512d82cSGlenn Strauss
5075c3a839SJan Kneschke pcre_keyvalue *kv;
51bcdc6a3bSJan Kneschke
52c412bb59SGlenn Strauss if (!(kvb->used & (4-1))) /*(allocate in groups of 4)*/
53c412bb59SGlenn Strauss ck_realloc_u32((void **)&kvb->kv,kvb->used,4,sizeof(*kvb->kv));
54bcdc6a3bSJan Kneschke
5563a6e52cSGlenn Strauss kv = kvb->kv + kvb->used++;
5663a6e52cSGlenn Strauss
5763a6e52cSGlenn Strauss /* copy persistent config data, and elide free() in free_data below */
5863a6e52cSGlenn Strauss memcpy(&kv->value, value, sizeof(buffer));
5963a6e52cSGlenn Strauss /*buffer_copy_buffer(&kv->value, value);*/
6063a6e52cSGlenn Strauss
6163beba3aSGlenn Strauss #ifdef HAVE_PCRE
6263beba3aSGlenn Strauss
637512d82cSGlenn Strauss #ifdef HAVE_PCRE2_H
647512d82cSGlenn Strauss
657512d82cSGlenn Strauss int errcode;
667512d82cSGlenn Strauss PCRE2_SIZE erroff;
677512d82cSGlenn Strauss PCRE2_UCHAR errbuf[1024];
687512d82cSGlenn Strauss
697512d82cSGlenn Strauss kv->code = pcre2_compile((PCRE2_SPTR)BUF_PTR_LEN(key),
707512d82cSGlenn Strauss PCRE2_UTF, &errcode, &erroff, NULL);
717512d82cSGlenn Strauss if (NULL == kv->code) {
727512d82cSGlenn Strauss pcre2_get_error_message(errcode, errbuf, sizeof(errbuf));
737512d82cSGlenn Strauss log_error(errh, __FILE__, __LINE__,
747512d82cSGlenn Strauss "pcre2_compile: %s at offset %zu, regex: %s",
757512d82cSGlenn Strauss (char *)errbuf, erroff, key->ptr);
767512d82cSGlenn Strauss return 0;
777512d82cSGlenn Strauss }
787512d82cSGlenn Strauss
797512d82cSGlenn Strauss if (pcre_jit) {
807512d82cSGlenn Strauss errcode = pcre2_jit_compile(kv->code, PCRE2_JIT_COMPLETE);
81f58e8ddaSGlenn Strauss if (0 != errcode && errcode != PCRE2_ERROR_JIT_BADOPTION) {
827512d82cSGlenn Strauss pcre2_get_error_message(errcode, errbuf, sizeof(errbuf));
837512d82cSGlenn Strauss log_error(errh, __FILE__, __LINE__,
847512d82cSGlenn Strauss "pcre2_jit_compile: %s, regex: %s", (char *)errbuf, key->ptr);
857512d82cSGlenn Strauss /*return 0;*/
867512d82cSGlenn Strauss }
877512d82cSGlenn Strauss }
887512d82cSGlenn Strauss
897512d82cSGlenn Strauss uint32_t captures;
907512d82cSGlenn Strauss errcode = pcre2_pattern_info(kv->code, PCRE2_INFO_CAPTURECOUNT, &captures);
917512d82cSGlenn Strauss if (0 != errcode) {
927512d82cSGlenn Strauss pcre2_get_error_message(errcode, errbuf, sizeof(errbuf));
937512d82cSGlenn Strauss log_error(errh, __FILE__, __LINE__,
947512d82cSGlenn Strauss "pcre2_pattern_info: %s, regex: %s", (char *)errbuf, key->ptr);
957512d82cSGlenn Strauss return 0;
967512d82cSGlenn Strauss }
977512d82cSGlenn Strauss else if (captures > 19) {
987512d82cSGlenn Strauss log_error(errh, __FILE__, __LINE__,
997512d82cSGlenn Strauss "Too many captures in regex, "
1007512d82cSGlenn Strauss "use (?:...) instead of (...): %s", key->ptr);
1017512d82cSGlenn Strauss return 0;
1027512d82cSGlenn Strauss }
1037512d82cSGlenn Strauss
1047512d82cSGlenn Strauss #if 1 /*(share single keyvalue_match_data among all keyvalue regexes)*/
1057512d82cSGlenn Strauss if (NULL == keyvalue_match_data) {
1067512d82cSGlenn Strauss keyvalue_match_data = pcre2_match_data_create(20, NULL);
1077512d82cSGlenn Strauss force_assert(keyvalue_match_data);
1087512d82cSGlenn Strauss }
1097512d82cSGlenn Strauss kv->match_data = keyvalue_match_data;
1107512d82cSGlenn Strauss #else
1117512d82cSGlenn Strauss kv->match_data = pcre2_match_data_create_from_pattern(kv->code, NULL);
1127512d82cSGlenn Strauss force_assert(kv->match_data);
1137512d82cSGlenn Strauss #endif
1147512d82cSGlenn Strauss
1157512d82cSGlenn Strauss #elif defined(HAVE_PCRE_H)
1167512d82cSGlenn Strauss
1177512d82cSGlenn Strauss const char *errptr;
1187512d82cSGlenn Strauss int erroff;
1197512d82cSGlenn Strauss
1207512d82cSGlenn Strauss kv->key_extra = NULL;
1217512d82cSGlenn Strauss
122cf9474aaSGlenn Strauss if (NULL == (kv->key = pcre_compile(key->ptr,
123bcdc6a3bSJan Kneschke 0, &errptr, &erroff, NULL))) {
124bcdc6a3bSJan Kneschke
12563a6e52cSGlenn Strauss log_error(errh, __FILE__, __LINE__,
12663a6e52cSGlenn Strauss "rexexp compilation error at %s", errptr);
12763a6e52cSGlenn Strauss return 0;
128bcdc6a3bSJan Kneschke }
129bcdc6a3bSJan Kneschke
1307b9c5addSGlenn Strauss const int study_options = pcre_jit ? PCRE_STUDY_JIT_COMPILE : 0;
1317b9c5addSGlenn Strauss if (NULL == (kv->key_extra = pcre_study(kv->key, study_options, &errptr))
1327b9c5addSGlenn Strauss && errptr != NULL) {
1337b9c5addSGlenn Strauss log_error(errh, __FILE__, __LINE__,
1347b9c5addSGlenn Strauss "studying regex failed: %s -> %s\n",
1357b9c5addSGlenn Strauss key->ptr, errptr);
13663a6e52cSGlenn Strauss return 0;
13775c3a839SJan Kneschke }
1387512d82cSGlenn Strauss
1397512d82cSGlenn Strauss #endif
1407512d82cSGlenn Strauss
1417512d82cSGlenn Strauss #else /* !HAVE_PCRE */
1427512d82cSGlenn Strauss
14363beba3aSGlenn Strauss if (!buffer_is_blank(key)) {
144cf9474aaSGlenn Strauss static int logged_message = 0;
14563a6e52cSGlenn Strauss if (logged_message) return 1;
146cf9474aaSGlenn Strauss logged_message = 1;
14763a6e52cSGlenn Strauss log_error(errh, __FILE__, __LINE__,
148cf9474aaSGlenn Strauss "pcre support is missing, please install libpcre and the headers");
1497b9c5addSGlenn Strauss UNUSED(pcre_jit);
15063beba3aSGlenn Strauss }
1517512d82cSGlenn Strauss
1527512d82cSGlenn Strauss #endif /* !HAVE_PCRE */
153cf9474aaSGlenn Strauss
15463a6e52cSGlenn Strauss return 1;
155bcdc6a3bSJan Kneschke }
156bcdc6a3bSJan Kneschke
pcre_keyvalue_buffer_free(pcre_keyvalue_buffer * kvb)157bcdc6a3bSJan Kneschke void pcre_keyvalue_buffer_free(pcre_keyvalue_buffer *kvb) {
1587512d82cSGlenn Strauss #ifdef HAVE_PCRE
1597db817c5SGlenn Strauss pcre_keyvalue *kv = kvb->kv;
1607db817c5SGlenn Strauss for (int i = 0, used = (int)kvb->used; i < used; ++i, ++kv) {
1617512d82cSGlenn Strauss #ifdef HAVE_PCRE2_H
1627512d82cSGlenn Strauss if (kv->code) pcre2_code_free(kv->code);
1637512d82cSGlenn Strauss #if 1
1647512d82cSGlenn Strauss if (keyvalue_match_data) {
1657512d82cSGlenn Strauss pcre2_match_data_free(keyvalue_match_data);
1667512d82cSGlenn Strauss keyvalue_match_data = NULL;
1677512d82cSGlenn Strauss }
1687512d82cSGlenn Strauss #else
1697512d82cSGlenn Strauss if (kv->match_data) pcre2_match_data_free(kv->match_data);
1707512d82cSGlenn Strauss #endif
1717512d82cSGlenn Strauss #elif defined(HAVE_PCRE_H)
17275c3a839SJan Kneschke if (kv->key) pcre_free(kv->key);
1737b9c5addSGlenn Strauss if (kv->key_extra) pcre_free_study(kv->key_extra);
17463a6e52cSGlenn Strauss /*free (kv->value.ptr);*//*(see pcre_keyvalue_buffer_append)*/
1757512d82cSGlenn Strauss #endif
176bcdc6a3bSJan Kneschke }
17763beba3aSGlenn Strauss #endif
178bcdc6a3bSJan Kneschke
179bcdc6a3bSJan Kneschke if (kvb->kv) free(kvb->kv);
180bcdc6a3bSJan Kneschke free(kvb);
181bcdc6a3bSJan Kneschke }
182cb371557SGlenn Strauss
1837512d82cSGlenn Strauss #ifdef HAVE_PCRE
1847512d82cSGlenn Strauss
pcre_keyvalue_buffer_append_match(buffer * b,const pcre_keyvalue_ctx * ctx,unsigned int num,int flags)1857db817c5SGlenn Strauss static void pcre_keyvalue_buffer_append_match(buffer *b, const pcre_keyvalue_ctx *ctx, unsigned int num, int flags) {
1867db817c5SGlenn Strauss if (num < (unsigned int)ctx->n) { /* n is always > 0 */
1877512d82cSGlenn Strauss #ifdef HAVE_PCRE2_H
1887512d82cSGlenn Strauss const PCRE2_SIZE *ovec = (PCRE2_SIZE *)ctx->ovec;
1897512d82cSGlenn Strauss #elif defined(HAVE_PCRE_H)
1907db817c5SGlenn Strauss const int *ovec = (int *)ctx->ovec;
1917512d82cSGlenn Strauss #endif
1927db817c5SGlenn Strauss const size_t off = (size_t)ovec[(num <<= 1)]; /*(num *= 2)*/
1937db817c5SGlenn Strauss const size_t len = (size_t)ovec[num+1] - off;
1947db817c5SGlenn Strauss burl_append(b, ctx->subject + off, len, flags);
195255269d7SGlenn Strauss }
196255269d7SGlenn Strauss }
197255269d7SGlenn Strauss
pcre_keyvalue_buffer_append_ctxmatch(buffer * b,const pcre_keyvalue_ctx * ctx,unsigned int num,int flags)198af3df29aSGlenn Strauss static void pcre_keyvalue_buffer_append_ctxmatch(buffer *b, const pcre_keyvalue_ctx *ctx, unsigned int num, int flags) {
199c193da34SGlenn Strauss const struct cond_match_t * const cache = ctx->cache;
200255269d7SGlenn Strauss if (!cache) return; /* no enclosing match context */
2017db817c5SGlenn Strauss if (num < (unsigned int)cache->captures) {
2027512d82cSGlenn Strauss #ifdef HAVE_PCRE2_H
2037512d82cSGlenn Strauss const PCRE2_SIZE *ovec = (PCRE2_SIZE *)cache->matches;
2047512d82cSGlenn Strauss #elif defined(HAVE_PCRE_H)
2057db817c5SGlenn Strauss const int *ovec = (int *)cache->matches;
2067512d82cSGlenn Strauss #endif
2077db817c5SGlenn Strauss const size_t off = (size_t)ovec[(num <<= 1)]; /*(num *= 2)*/
2087db817c5SGlenn Strauss const size_t len = (size_t)ovec[num+1] - off;
2097db817c5SGlenn Strauss burl_append(b, cache->comp_value->ptr + off, len, flags);
210255269d7SGlenn Strauss }
211255269d7SGlenn Strauss }
212255269d7SGlenn Strauss
21363beba3aSGlenn Strauss #endif /* HAVE_PCRE */
21463beba3aSGlenn Strauss
pcre_keyvalue_buffer_subst_ext(buffer * b,const char * pattern,const pcre_keyvalue_ctx * ctx)2157db817c5SGlenn Strauss static int pcre_keyvalue_buffer_subst_ext(buffer *b, const char *pattern, const pcre_keyvalue_ctx *ctx) {
216255269d7SGlenn Strauss const unsigned char *p = (unsigned char *)pattern+2;/* +2 past ${} or %{} */
217255269d7SGlenn Strauss int flags = 0;
218255269d7SGlenn Strauss while (!light_isdigit(*p) && *p != '}' && *p != '\0') {
219255269d7SGlenn Strauss if (0) {
220255269d7SGlenn Strauss }
221255269d7SGlenn Strauss else if (p[0] == 'e' && p[1] == 's' && p[2] == 'c') {
222255269d7SGlenn Strauss p+=3;
223255269d7SGlenn Strauss if (p[0] == ':') {
224255269d7SGlenn Strauss flags |= BURL_ENCODE_ALL;
225255269d7SGlenn Strauss p+=1;
226255269d7SGlenn Strauss }
227255269d7SGlenn Strauss else if (0 == strncmp((const char *)p, "ape:", 4)) {
228255269d7SGlenn Strauss flags |= BURL_ENCODE_ALL;
229255269d7SGlenn Strauss p+=4;
230255269d7SGlenn Strauss }
231255269d7SGlenn Strauss else if (0 == strncmp((const char *)p, "nde:", 4)) {
232255269d7SGlenn Strauss flags |= BURL_ENCODE_NDE;
233255269d7SGlenn Strauss p+=4;
234255269d7SGlenn Strauss }
235255269d7SGlenn Strauss else if (0 == strncmp((const char *)p, "psnde:", 6)) {
236255269d7SGlenn Strauss flags |= BURL_ENCODE_PSNDE;
237255269d7SGlenn Strauss p+=6;
238255269d7SGlenn Strauss }
239255269d7SGlenn Strauss else { /* skip unrecognized esc... */
240255269d7SGlenn Strauss p = (const unsigned char *)strchr((const char *)p, ':');
241255269d7SGlenn Strauss if (NULL == p) return -1;
242255269d7SGlenn Strauss ++p;
243255269d7SGlenn Strauss }
244255269d7SGlenn Strauss }
245255269d7SGlenn Strauss else if (p[0] == 'n' && p[1] == 'o') {
246255269d7SGlenn Strauss p+=2;
247255269d7SGlenn Strauss if (0 == strncmp((const char *)p, "esc:", 4)) {
248255269d7SGlenn Strauss flags |= BURL_ENCODE_NONE;
249255269d7SGlenn Strauss p+=4;
250255269d7SGlenn Strauss }
251255269d7SGlenn Strauss else if (0 == strncmp((const char *)p, "escape:", 7)) {
252255269d7SGlenn Strauss flags |= BURL_ENCODE_NONE;
253255269d7SGlenn Strauss p+=7;
254255269d7SGlenn Strauss }
255255269d7SGlenn Strauss else { /* skip unrecognized no... */
256255269d7SGlenn Strauss p = (const unsigned char *)strchr((const char *)p, ':');
257255269d7SGlenn Strauss if (NULL == p) return -1;
258255269d7SGlenn Strauss ++p;
259255269d7SGlenn Strauss }
260255269d7SGlenn Strauss }
261255269d7SGlenn Strauss else if (p[0] == 't' && p[1] == 'o') {
262255269d7SGlenn Strauss p+=2;
263255269d7SGlenn Strauss if (0 == strncmp((const char *)p, "lower:", 6)) {
264255269d7SGlenn Strauss flags |= BURL_TOLOWER;
265255269d7SGlenn Strauss p+=6;
266255269d7SGlenn Strauss }
267255269d7SGlenn Strauss else if (0 == strncmp((const char *)p, "upper:", 6)) {
268255269d7SGlenn Strauss flags |= BURL_TOLOWER;
269255269d7SGlenn Strauss p+=6;
270255269d7SGlenn Strauss }
271255269d7SGlenn Strauss else { /* skip unrecognized to... */
272255269d7SGlenn Strauss p = (const unsigned char *)strchr((const char *)p, ':');
273255269d7SGlenn Strauss if (NULL == p) return -1;
274255269d7SGlenn Strauss ++p;
275255269d7SGlenn Strauss }
276255269d7SGlenn Strauss }
277255269d7SGlenn Strauss else if (p[0] == 'u' && p[1] == 'r' && p[2] == 'l' && p[3] == '.') {
278af3df29aSGlenn Strauss const struct burl_parts_t * const burl = ctx->burl;
279255269d7SGlenn Strauss p+=4;
280255269d7SGlenn Strauss if (0 == strncmp((const char *)p, "scheme}", 7)) {
281af3df29aSGlenn Strauss if (burl->scheme)
282af3df29aSGlenn Strauss burl_append(b, BUF_PTR_LEN(burl->scheme), flags);
283255269d7SGlenn Strauss p+=6;
284255269d7SGlenn Strauss }
285255269d7SGlenn Strauss else if (0 == strncmp((const char *)p, "authority}", 10)) {
286af3df29aSGlenn Strauss if (burl->authority)
287af3df29aSGlenn Strauss burl_append(b, BUF_PTR_LEN(burl->authority), flags);
288255269d7SGlenn Strauss p+=9;
289255269d7SGlenn Strauss }
290255269d7SGlenn Strauss else if (0 == strncmp((const char *)p, "port}", 5)) {
291af3df29aSGlenn Strauss buffer_append_int(b, (int)burl->port);
292255269d7SGlenn Strauss p+=4;
293255269d7SGlenn Strauss }
294255269d7SGlenn Strauss else if (0 == strncmp((const char *)p, "path}", 5)) {
295af3df29aSGlenn Strauss const buffer * const target = burl->path;
296af3df29aSGlenn Strauss const uint32_t len = buffer_clen(target);
297a0029b21SGlenn Strauss const char * const ptr = target->ptr;
298a0029b21SGlenn Strauss const char * const qmark = memchr(ptr, '?', len);
299a0029b21SGlenn Strauss burl_append(b, ptr, qmark ? (uint32_t)(qmark-ptr) : len, flags);
300255269d7SGlenn Strauss p+=4;
301255269d7SGlenn Strauss }
302255269d7SGlenn Strauss else if (0 == strncmp((const char *)p, "query}", 6)) {
303af3df29aSGlenn Strauss if (burl->query)
304af3df29aSGlenn Strauss burl_append(b, BUF_PTR_LEN(burl->query), flags);
305255269d7SGlenn Strauss p+=5;
306255269d7SGlenn Strauss }
307255269d7SGlenn Strauss else { /* skip unrecognized url.* */
308255269d7SGlenn Strauss p = (const unsigned char *)strchr((const char *)p, '}');
309255269d7SGlenn Strauss if (NULL == p) return -1;
310255269d7SGlenn Strauss }
311255269d7SGlenn Strauss break;
312255269d7SGlenn Strauss }
313255269d7SGlenn Strauss else if (p[0] == 'q' && p[1] == 's' && p[2] == 'a' && p[3] == '}') {
314255269d7SGlenn Strauss const buffer *qs = ctx->burl->query;
315af3df29aSGlenn Strauss if (qs && !buffer_is_unset(qs)) {
316255269d7SGlenn Strauss if (NULL != strchr(b->ptr, '?')) {
317af3df29aSGlenn Strauss if (!buffer_is_blank(qs))
318f2610d23SGlenn Strauss buffer_append_char(b, '&');
319255269d7SGlenn Strauss }
320255269d7SGlenn Strauss else {
321f2610d23SGlenn Strauss buffer_append_char(b, '?');
322255269d7SGlenn Strauss }
323af3df29aSGlenn Strauss burl_append(b, BUF_PTR_LEN(qs), flags);
324255269d7SGlenn Strauss }
325255269d7SGlenn Strauss p+=3;
326255269d7SGlenn Strauss break;
327255269d7SGlenn Strauss }
328f832b711SGlenn Strauss else if (p[0] == 'e' && p[1] == 'n' && p[2] == 'c'
329f832b711SGlenn Strauss && 0 == strncmp((const char *)p+3, "b64u:", 5)) {
330f832b711SGlenn Strauss flags |= BURL_ENCODE_B64U;
331f832b711SGlenn Strauss p+=8;
332f832b711SGlenn Strauss }
333f832b711SGlenn Strauss else if (p[0] == 'd' && p[1] == 'e' && p[2] == 'c'
334f832b711SGlenn Strauss && 0 == strncmp((const char *)p+3, "b64u:", 5)) {
335f832b711SGlenn Strauss flags |= BURL_DECODE_B64U;
336f832b711SGlenn Strauss p+=8;
337f832b711SGlenn Strauss }
338255269d7SGlenn Strauss else ++p; /* skip unrecognized char */
339255269d7SGlenn Strauss }
340255269d7SGlenn Strauss if (*p == '\0') return -1;
341255269d7SGlenn Strauss if (*p != '}') { /* light_isdigit(*p) */
342255269d7SGlenn Strauss unsigned int num = *p - '0';
343255269d7SGlenn Strauss ++p;
344255269d7SGlenn Strauss if (light_isdigit(*p)) num = num * 10 + (*p++ - '0');
345255269d7SGlenn Strauss if (*p != '}') {
346255269d7SGlenn Strauss p = (const unsigned char *)strchr((const char *)p, '}');
347255269d7SGlenn Strauss if (NULL == p) return -1;
348255269d7SGlenn Strauss }
349255269d7SGlenn Strauss if (0 == flags) flags = BURL_ENCODE_PSNDE; /* default */
35063beba3aSGlenn Strauss #ifdef HAVE_PCRE
351255269d7SGlenn Strauss pattern[0] == '$' /*(else '%')*/
3527db817c5SGlenn Strauss ? pcre_keyvalue_buffer_append_match(b, ctx, num, flags)
353255269d7SGlenn Strauss : pcre_keyvalue_buffer_append_ctxmatch(b, ctx, num, flags);
35463beba3aSGlenn Strauss #endif
355255269d7SGlenn Strauss }
356255269d7SGlenn Strauss return (int)(p + 1 - (unsigned char *)pattern - 2);
357255269d7SGlenn Strauss }
358255269d7SGlenn Strauss
pcre_keyvalue_buffer_subst(buffer * b,const buffer * patternb,const pcre_keyvalue_ctx * ctx)3597db817c5SGlenn Strauss static void pcre_keyvalue_buffer_subst(buffer *b, const buffer *patternb, const pcre_keyvalue_ctx *ctx) {
360cb371557SGlenn Strauss const char *pattern = patternb->ptr;
361af3df29aSGlenn Strauss const size_t pattern_len = buffer_clen(patternb);
362cb371557SGlenn Strauss size_t start = 0;
363cb371557SGlenn Strauss
364cb371557SGlenn Strauss /* search for $... or %... pattern substitutions */
365cb371557SGlenn Strauss
366f69bd9cdSGlenn Strauss buffer_clear(b);
367cb371557SGlenn Strauss
368cb371557SGlenn Strauss for (size_t k = 0; k + 1 < pattern_len; ++k) {
369cb371557SGlenn Strauss if (pattern[k] == '$' || pattern[k] == '%') {
370cb371557SGlenn Strauss
371cb371557SGlenn Strauss buffer_append_string_len(b, pattern + start, k - start);
372cb371557SGlenn Strauss
373255269d7SGlenn Strauss if (pattern[k + 1] == '{') {
3747db817c5SGlenn Strauss int num = pcre_keyvalue_buffer_subst_ext(b, pattern+k, ctx);
375255269d7SGlenn Strauss if (num < 0) return; /* error; truncate result */
376255269d7SGlenn Strauss k += (size_t)num;
377255269d7SGlenn Strauss } else if (light_isdigit(((unsigned char *)pattern)[k + 1])) {
37863beba3aSGlenn Strauss #ifdef HAVE_PCRE
379255269d7SGlenn Strauss unsigned int num = (unsigned int)pattern[k + 1] - '0';
380255269d7SGlenn Strauss pattern[k] == '$' /*(else '%')*/
3817db817c5SGlenn Strauss ? pcre_keyvalue_buffer_append_match(b, ctx, num, 0)
382255269d7SGlenn Strauss : pcre_keyvalue_buffer_append_ctxmatch(b, ctx, num, 0);
38363beba3aSGlenn Strauss #endif
384255269d7SGlenn Strauss } else {
385cb371557SGlenn Strauss /* enable escape: "%%" => "%", "%a" => "%a", "$$" => "$" */
386cb371557SGlenn Strauss buffer_append_string_len(b, pattern+k, pattern[k] == pattern[k+1] ? 1 : 2);
387cb371557SGlenn Strauss }
388cb371557SGlenn Strauss
389cb371557SGlenn Strauss k++;
390cb371557SGlenn Strauss start = k + 1;
391cb371557SGlenn Strauss }
392cb371557SGlenn Strauss }
393cb371557SGlenn Strauss
394cb371557SGlenn Strauss buffer_append_string_len(b, pattern + start, pattern_len - start);
395cb371557SGlenn Strauss }
396cf9474aaSGlenn Strauss
pcre_keyvalue_buffer_process(const pcre_keyvalue_buffer * kvb,pcre_keyvalue_ctx * ctx,const buffer * input,buffer * result)39763a6e52cSGlenn Strauss handler_t pcre_keyvalue_buffer_process(const pcre_keyvalue_buffer *kvb, pcre_keyvalue_ctx *ctx, const buffer *input, buffer *result) {
3987db817c5SGlenn Strauss const pcre_keyvalue *kv = kvb->kv;
3997db817c5SGlenn Strauss for (int i = 0, used = (int)kvb->used; i < used; ++i, ++kv) {
40063beba3aSGlenn Strauss #ifdef HAVE_PCRE
4017512d82cSGlenn Strauss #ifdef HAVE_PCRE2_H
4027512d82cSGlenn Strauss int n = pcre2_match(kv->code, (PCRE2_SPTR)BUF_PTR_LEN(input),
4037512d82cSGlenn Strauss 0, 0, kv->match_data, NULL);
4047512d82cSGlenn Strauss #else
405ba5026aaSGlenn Strauss #define N 20
406cf9474aaSGlenn Strauss int ovec[N * 3];
407cf9474aaSGlenn Strauss #undef N
408af3df29aSGlenn Strauss int n = pcre_exec(kv->key, kv->key_extra, BUF_PTR_LEN(input),
409cf9474aaSGlenn Strauss 0, 0, ovec, sizeof(ovec)/sizeof(int));
4107512d82cSGlenn Strauss #endif
41163beba3aSGlenn Strauss #else
41263beba3aSGlenn Strauss int n = 1;
41363beba3aSGlenn Strauss #endif
414cf9474aaSGlenn Strauss if (n < 0) {
41563beba3aSGlenn Strauss #ifdef HAVE_PCRE
4167512d82cSGlenn Strauss #ifdef HAVE_PCRE2_H
4177512d82cSGlenn Strauss if (n != PCRE2_ERROR_NOMATCH)
4187512d82cSGlenn Strauss #else
4197512d82cSGlenn Strauss if (n != PCRE_ERROR_NOMATCH)
4207512d82cSGlenn Strauss #endif
42163beba3aSGlenn Strauss #endif
422cf9474aaSGlenn Strauss return HANDLER_ERROR;
423cf9474aaSGlenn Strauss }
424af3df29aSGlenn Strauss else if (buffer_is_blank(&kv->value)) {
425cf9474aaSGlenn Strauss /* short-circuit if blank replacement pattern
426cf9474aaSGlenn Strauss * (do not attempt to match against remaining kvb rules) */
427cf9474aaSGlenn Strauss ctx->m = i;
428cf9474aaSGlenn Strauss return HANDLER_GO_ON;
429cf9474aaSGlenn Strauss }
430cf9474aaSGlenn Strauss else { /* it matched */
431cf9474aaSGlenn Strauss ctx->m = i;
4327db817c5SGlenn Strauss ctx->n = n;
4337db817c5SGlenn Strauss ctx->subject = input->ptr;
43463beba3aSGlenn Strauss #ifdef HAVE_PCRE
4357512d82cSGlenn Strauss #ifdef HAVE_PCRE2_H
4367512d82cSGlenn Strauss ctx->ovec = pcre2_get_ovector_pointer(kv->match_data);
4377512d82cSGlenn Strauss #else
4387db817c5SGlenn Strauss ctx->ovec = ovec;
4397512d82cSGlenn Strauss #endif
44063beba3aSGlenn Strauss #endif
4417db817c5SGlenn Strauss pcre_keyvalue_buffer_subst(result, &kv->value, ctx);
442cf9474aaSGlenn Strauss return HANDLER_FINISHED;
443cf9474aaSGlenn Strauss }
444cf9474aaSGlenn Strauss }
445cf9474aaSGlenn Strauss
446cf9474aaSGlenn Strauss return HANDLER_GO_ON;
447cf9474aaSGlenn Strauss }
4487512d82cSGlenn Strauss
4493eb7902eSGlenn Strauss
4503eb7902eSGlenn Strauss /* modified from burl_normalize_basic() to handle %% extra encoding layer */
4513eb7902eSGlenn Strauss
4523eb7902eSGlenn Strauss /* c (char) and n (nibble) MUST be unsigned integer types */
4533eb7902eSGlenn Strauss #define li_cton(c,n) \
4543eb7902eSGlenn Strauss (((n) = (c) - '0') <= 9 || (((n) = ((c)&0xdf) - 'A') <= 5 ? ((n) += 10) : 0))
4553eb7902eSGlenn Strauss
pcre_keyvalue_burl_percent_toupper(buffer * b)4563eb7902eSGlenn Strauss static void pcre_keyvalue_burl_percent_toupper (buffer *b)
4573eb7902eSGlenn Strauss {
4583eb7902eSGlenn Strauss const unsigned char * const s = (unsigned char *)b->ptr;
459af3df29aSGlenn Strauss const int used = (int)buffer_clen(b);
4603eb7902eSGlenn Strauss unsigned int n1, n2;
4613eb7902eSGlenn Strauss for (int i = 0; i < used; ++i) {
4623eb7902eSGlenn Strauss if (s[i]=='%' && li_cton(s[i+1],n1) && li_cton(s[i+2],n2)) {
4633eb7902eSGlenn Strauss if (s[i+1] >= 'a') b->ptr[i+1] &= 0xdf; /* uppercase hex */
4643eb7902eSGlenn Strauss if (s[i+2] >= 'a') b->ptr[i+2] &= 0xdf; /* uppercase hex */
4653eb7902eSGlenn Strauss i+=2;
4663eb7902eSGlenn Strauss }
4673eb7902eSGlenn Strauss }
4683eb7902eSGlenn Strauss }
4693eb7902eSGlenn Strauss
pcre_keyvalue_burl_percent_percent_toupper(buffer * b)4703eb7902eSGlenn Strauss static void pcre_keyvalue_burl_percent_percent_toupper (buffer *b)
4713eb7902eSGlenn Strauss {
4723eb7902eSGlenn Strauss const unsigned char * const s = (unsigned char *)b->ptr;
473af3df29aSGlenn Strauss const int used = (int)buffer_clen(b);
4743eb7902eSGlenn Strauss unsigned int n1, n2;
4753eb7902eSGlenn Strauss for (int i = 0; i < used; ++i) {
4763eb7902eSGlenn Strauss if (s[i] == '%' && s[i+1]=='%'
4773eb7902eSGlenn Strauss && li_cton(s[i+2],n1) && li_cton(s[i+3],n2)) {
4783eb7902eSGlenn Strauss if (s[i+2] >= 'a') b->ptr[i+2] &= 0xdf; /* uppercase hex */
4793eb7902eSGlenn Strauss if (s[i+3] >= 'a') b->ptr[i+3] &= 0xdf; /* uppercase hex */
4803eb7902eSGlenn Strauss i+=3;
4813eb7902eSGlenn Strauss }
4823eb7902eSGlenn Strauss }
4833eb7902eSGlenn Strauss }
4843eb7902eSGlenn Strauss
4853eb7902eSGlenn Strauss static const char hex_chars_uc[] = "0123456789ABCDEF";
4863eb7902eSGlenn Strauss
pcre_keyvalue_burl_percent_high_UTF8(buffer * b,buffer * t)4873eb7902eSGlenn Strauss static void pcre_keyvalue_burl_percent_high_UTF8 (buffer *b, buffer *t)
4883eb7902eSGlenn Strauss {
4893eb7902eSGlenn Strauss const unsigned char * const s = (unsigned char *)b->ptr;
4903eb7902eSGlenn Strauss unsigned char *p;
491af3df29aSGlenn Strauss const int used = (int)buffer_clen(b);
4923eb7902eSGlenn Strauss unsigned int count = 0, j = 0;
4933eb7902eSGlenn Strauss for (int i = 0; i < used; ++i) {
4943eb7902eSGlenn Strauss if (s[i] > 0x7F) ++count;
4953eb7902eSGlenn Strauss }
4963eb7902eSGlenn Strauss if (0 == count) return;
4973eb7902eSGlenn Strauss
4983eb7902eSGlenn Strauss p = (unsigned char *)buffer_string_prepare_copy(t, used+(count*2));
4993eb7902eSGlenn Strauss for (int i = 0; i < used; ++i, ++j) {
5003eb7902eSGlenn Strauss if (s[i] <= 0x7F)
5013eb7902eSGlenn Strauss p[j] = s[i];
5023eb7902eSGlenn Strauss else {
5033eb7902eSGlenn Strauss p[j] = '%';
5043eb7902eSGlenn Strauss p[++j] = hex_chars_uc[(s[i] >> 4) & 0xF];
5053eb7902eSGlenn Strauss p[++j] = hex_chars_uc[s[i] & 0xF];
5063eb7902eSGlenn Strauss }
5073eb7902eSGlenn Strauss }
508160baff9SGlenn Strauss buffer_copy_string_len(b, (char *)p, (size_t)j);
5093eb7902eSGlenn Strauss }
5103eb7902eSGlenn Strauss
pcre_keyvalue_burl_percent_percent_high_UTF8(buffer * b,buffer * t)5113eb7902eSGlenn Strauss static void pcre_keyvalue_burl_percent_percent_high_UTF8 (buffer *b, buffer *t)
5123eb7902eSGlenn Strauss {
5133eb7902eSGlenn Strauss const unsigned char * const s = (unsigned char *)b->ptr;
5143eb7902eSGlenn Strauss unsigned char *p;
515af3df29aSGlenn Strauss const int used = (int)buffer_clen(b);
5163eb7902eSGlenn Strauss unsigned int count = 0, j = 0;
5173eb7902eSGlenn Strauss for (int i = 0; i < used; ++i) {
5183eb7902eSGlenn Strauss if (s[i] > 0x7F) ++count;
5193eb7902eSGlenn Strauss }
5203eb7902eSGlenn Strauss if (0 == count) return;
5213eb7902eSGlenn Strauss
5223eb7902eSGlenn Strauss p = (unsigned char *)buffer_string_prepare_copy(t, used+(count*3));
5233eb7902eSGlenn Strauss for (int i = 0; i < used; ++i, ++j) {
5243eb7902eSGlenn Strauss if (s[i] <= 0x7F)
5253eb7902eSGlenn Strauss p[j] = s[i];
5263eb7902eSGlenn Strauss else {
5273eb7902eSGlenn Strauss p[j] = '%';
5283eb7902eSGlenn Strauss p[++j] = '%';
5293eb7902eSGlenn Strauss p[++j] = hex_chars_uc[(s[i] >> 4) & 0xF];
5303eb7902eSGlenn Strauss p[++j] = hex_chars_uc[s[i] & 0xF];
5313eb7902eSGlenn Strauss }
5323eb7902eSGlenn Strauss }
533160baff9SGlenn Strauss buffer_copy_string_len(b, (char *)p, (size_t)j);
5343eb7902eSGlenn Strauss }
5353eb7902eSGlenn Strauss
5363eb7902eSGlenn Strauss /* Basic normalization of regex and regex replacement to mirror some of
5373eb7902eSGlenn Strauss * the normalizations performed on request URI (for better compatibility).
5383eb7902eSGlenn Strauss * Note: not currently attempting to replace unnecessary percent-encoding
5393eb7902eSGlenn Strauss * (would need to know if regex was intended to match url-path or
5403eb7902eSGlenn Strauss * query-string or both, and then would have to regex-escape if those
5413eb7902eSGlenn Strauss * chars where special regex chars such as . * + ? ( ) [ ] | and more)
5423eb7902eSGlenn Strauss * Not attempting to percent-encode chars which should be encoded, again
5433eb7902eSGlenn Strauss * since regex might target url-path, query-string, or both, and we would
5443eb7902eSGlenn Strauss * have to avoid percent-encoding special regex chars.
5453eb7902eSGlenn Strauss * Also not attempting to detect unnecessarily regex-escape in, e.g. %\x\x
5463eb7902eSGlenn Strauss * Preserve improper %-encoded sequences which are not %XX (using hex chars)
5473eb7902eSGlenn Strauss * Intentionally not performing path simplification (e.g. ./ ../)
5483eb7902eSGlenn Strauss * If regex-specific normalizations begin to be made to k here,
5493eb7902eSGlenn Strauss * must revisit callers, e.g. one configfile.c use on non-regex string.
5503eb7902eSGlenn Strauss * "%%" (percent_percent) is used in regex replacement strings since
5513eb7902eSGlenn Strauss * otherwise "%n" is used to indicate regex backreference where n is number.
5523eb7902eSGlenn Strauss */
5533eb7902eSGlenn Strauss
pcre_keyvalue_burl_normalize_key(buffer * k,buffer * t)5543eb7902eSGlenn Strauss void pcre_keyvalue_burl_normalize_key (buffer *k, buffer *t)
5553eb7902eSGlenn Strauss {
5563eb7902eSGlenn Strauss pcre_keyvalue_burl_percent_toupper(k);
5573eb7902eSGlenn Strauss pcre_keyvalue_burl_percent_high_UTF8(k, t);
5583eb7902eSGlenn Strauss }
5593eb7902eSGlenn Strauss
pcre_keyvalue_burl_normalize_value(buffer * v,buffer * t)5603eb7902eSGlenn Strauss void pcre_keyvalue_burl_normalize_value (buffer *v, buffer *t)
5613eb7902eSGlenn Strauss {
5623eb7902eSGlenn Strauss pcre_keyvalue_burl_percent_percent_toupper(v);
5633eb7902eSGlenn Strauss pcre_keyvalue_burl_percent_percent_high_UTF8(v, t);
5643eb7902eSGlenn Strauss }
565