xref: /lighttpd1.4/src/keyvalue.c (revision 5e14db43)
1c18f442aSGlenn Strauss /*
2c18f442aSGlenn Strauss  * keyvalue - PCRE matching and substitution for mod_redirect and mod_rewrite
3c18f442aSGlenn Strauss  *
4c18f442aSGlenn Strauss  * Fully-rewritten from original
5c18f442aSGlenn Strauss  * Copyright(c) 2018 Glenn Strauss gstrauss()gluelogic.com  All rights reserved
6c18f442aSGlenn Strauss  * License: BSD 3-clause (same as lighttpd)
7c18f442aSGlenn Strauss  */
88abd06a7SGlenn Strauss #include "first.h"
98abd06a7SGlenn Strauss 
1022e8b456SStefan Bühler #include "keyvalue.h"
11c193da34SGlenn Strauss #include "plugin_config.h" /* struct cond_match_t */
12255269d7SGlenn Strauss #include "burl.h"
1338f2d1ddSStefan Bühler #include "log.h"
1422e8b456SStefan Bühler 
15bcdc6a3bSJan Kneschke #include <stdlib.h>
16bcdc6a3bSJan Kneschke #include <string.h>
17bcdc6a3bSJan Kneschke 
187512d82cSGlenn Strauss #ifdef HAVE_PCRE2_H
197512d82cSGlenn Strauss #define PCRE2_CODE_UNIT_WIDTH 8
207512d82cSGlenn Strauss #include <pcre2.h>
217512d82cSGlenn Strauss #elif defined(HAVE_PCRE_H)
2204d76e7aSGlenn Strauss #include <pcre.h>
237b9c5addSGlenn Strauss #ifndef PCRE_STUDY_JIT_COMPILE
247b9c5addSGlenn Strauss #define PCRE_STUDY_JIT_COMPILE 0
257b9c5addSGlenn Strauss #define pcre_free_study(x) pcre_free(x)
267b9c5addSGlenn Strauss #endif
2704d76e7aSGlenn Strauss #endif
2804d76e7aSGlenn Strauss 
297512d82cSGlenn Strauss #ifdef HAVE_PCRE2_H
307512d82cSGlenn Strauss static struct pcre2_real_match_data_8 *keyvalue_match_data;
317512d82cSGlenn Strauss #endif
327512d82cSGlenn Strauss 
33cf9474aaSGlenn Strauss typedef struct pcre_keyvalue {
347512d82cSGlenn Strauss   #ifdef HAVE_PCRE2_H
357512d82cSGlenn Strauss 	pcre2_code *code;
367512d82cSGlenn Strauss 	struct pcre2_real_match_data_8 *match_data;
377512d82cSGlenn Strauss   #elif defined(HAVE_PCRE_H)
38cf9474aaSGlenn Strauss 	pcre *key;
39cf9474aaSGlenn Strauss 	pcre_extra *key_extra;
40cf9474aaSGlenn Strauss   #endif
4163a6e52cSGlenn Strauss 	buffer value;
42cf9474aaSGlenn Strauss } pcre_keyvalue;
43cf9474aaSGlenn Strauss 
pcre_keyvalue_buffer_init(void)44bcdc6a3bSJan Kneschke pcre_keyvalue_buffer *pcre_keyvalue_buffer_init(void) {
45*5e14db43SGlenn Strauss 	return ck_calloc(1, sizeof(pcre_keyvalue_buffer));
46bcdc6a3bSJan Kneschke }
47bcdc6a3bSJan Kneschke 
pcre_keyvalue_buffer_append(log_error_st * errh,pcre_keyvalue_buffer * kvb,const buffer * key,const buffer * value,const int pcre_jit)487b9c5addSGlenn Strauss int pcre_keyvalue_buffer_append(log_error_st *errh, pcre_keyvalue_buffer *kvb, const buffer *key, const buffer *value, const int pcre_jit) {
497512d82cSGlenn Strauss 
5075c3a839SJan Kneschke 	pcre_keyvalue *kv;
51bcdc6a3bSJan Kneschke 
52c412bb59SGlenn Strauss 	if (!(kvb->used & (4-1))) /*(allocate in groups of 4)*/
53c412bb59SGlenn Strauss 		ck_realloc_u32((void **)&kvb->kv,kvb->used,4,sizeof(*kvb->kv));
54bcdc6a3bSJan Kneschke 
5563a6e52cSGlenn Strauss 	kv = kvb->kv + kvb->used++;
5663a6e52cSGlenn Strauss 
5763a6e52cSGlenn Strauss         /* copy persistent config data, and elide free() in free_data below */
5863a6e52cSGlenn Strauss 	memcpy(&kv->value, value, sizeof(buffer));
5963a6e52cSGlenn Strauss 	/*buffer_copy_buffer(&kv->value, value);*/
6063a6e52cSGlenn Strauss 
6163beba3aSGlenn Strauss   #ifdef HAVE_PCRE
6263beba3aSGlenn Strauss 
637512d82cSGlenn Strauss    #ifdef HAVE_PCRE2_H
647512d82cSGlenn Strauss 
657512d82cSGlenn Strauss 	int errcode;
667512d82cSGlenn Strauss 	PCRE2_SIZE erroff;
677512d82cSGlenn Strauss 	PCRE2_UCHAR errbuf[1024];
687512d82cSGlenn Strauss 
697512d82cSGlenn Strauss 	kv->code = pcre2_compile((PCRE2_SPTR)BUF_PTR_LEN(key),
707512d82cSGlenn Strauss 	                         PCRE2_UTF, &errcode, &erroff, NULL);
717512d82cSGlenn Strauss 	if (NULL == kv->code) {
727512d82cSGlenn Strauss 		pcre2_get_error_message(errcode, errbuf, sizeof(errbuf));
737512d82cSGlenn Strauss 		log_error(errh, __FILE__, __LINE__,
747512d82cSGlenn Strauss 		          "pcre2_compile: %s at offset %zu, regex: %s",
757512d82cSGlenn Strauss 		          (char *)errbuf, erroff, key->ptr);
767512d82cSGlenn Strauss 		return 0;
777512d82cSGlenn Strauss 	}
787512d82cSGlenn Strauss 
797512d82cSGlenn Strauss 	if (pcre_jit) {
807512d82cSGlenn Strauss 		errcode = pcre2_jit_compile(kv->code, PCRE2_JIT_COMPLETE);
81f58e8ddaSGlenn Strauss 		if (0 != errcode && errcode != PCRE2_ERROR_JIT_BADOPTION) {
827512d82cSGlenn Strauss 			pcre2_get_error_message(errcode, errbuf, sizeof(errbuf));
837512d82cSGlenn Strauss 			log_error(errh, __FILE__, __LINE__,
847512d82cSGlenn Strauss 			  "pcre2_jit_compile: %s, regex: %s", (char *)errbuf, key->ptr);
857512d82cSGlenn Strauss 			/*return 0;*/
867512d82cSGlenn Strauss 		}
877512d82cSGlenn Strauss 	}
887512d82cSGlenn Strauss 
897512d82cSGlenn Strauss 	uint32_t captures;
907512d82cSGlenn Strauss 	errcode = pcre2_pattern_info(kv->code, PCRE2_INFO_CAPTURECOUNT, &captures);
917512d82cSGlenn Strauss 	if (0 != errcode) {
927512d82cSGlenn Strauss 		pcre2_get_error_message(errcode, errbuf, sizeof(errbuf));
937512d82cSGlenn Strauss 		log_error(errh, __FILE__, __LINE__,
947512d82cSGlenn Strauss 		  "pcre2_pattern_info: %s, regex: %s", (char *)errbuf, key->ptr);
957512d82cSGlenn Strauss 		return 0;
967512d82cSGlenn Strauss 	}
977512d82cSGlenn Strauss 	else if (captures > 19) {
987512d82cSGlenn Strauss 		log_error(errh, __FILE__, __LINE__,
997512d82cSGlenn Strauss 		  "Too many captures in regex, "
1007512d82cSGlenn Strauss 		  "use (?:...) instead of (...): %s", key->ptr);
1017512d82cSGlenn Strauss 		return 0;
1027512d82cSGlenn Strauss 	}
1037512d82cSGlenn Strauss 
1047512d82cSGlenn Strauss     #if 1 /*(share single keyvalue_match_data among all keyvalue regexes)*/
1057512d82cSGlenn Strauss 	if (NULL == keyvalue_match_data) {
1067512d82cSGlenn Strauss 		keyvalue_match_data = pcre2_match_data_create(20, NULL);
1077512d82cSGlenn Strauss 		force_assert(keyvalue_match_data);
1087512d82cSGlenn Strauss 	}
1097512d82cSGlenn Strauss 	kv->match_data = keyvalue_match_data;
1107512d82cSGlenn Strauss     #else
1117512d82cSGlenn Strauss 	kv->match_data = pcre2_match_data_create_from_pattern(kv->code, NULL);
1127512d82cSGlenn Strauss 	force_assert(kv->match_data);
1137512d82cSGlenn Strauss     #endif
1147512d82cSGlenn Strauss 
1157512d82cSGlenn Strauss    #elif defined(HAVE_PCRE_H)
1167512d82cSGlenn Strauss 
1177512d82cSGlenn Strauss 	const char *errptr;
1187512d82cSGlenn Strauss 	int erroff;
1197512d82cSGlenn Strauss 
1207512d82cSGlenn Strauss 	kv->key_extra = NULL;
1217512d82cSGlenn Strauss 
122cf9474aaSGlenn Strauss 	if (NULL == (kv->key = pcre_compile(key->ptr,
123bcdc6a3bSJan Kneschke 					  0, &errptr, &erroff, NULL))) {
124bcdc6a3bSJan Kneschke 
12563a6e52cSGlenn Strauss 		log_error(errh, __FILE__, __LINE__,
12663a6e52cSGlenn Strauss 		  "rexexp compilation error at %s", errptr);
12763a6e52cSGlenn Strauss 		return 0;
128bcdc6a3bSJan Kneschke 	}
129bcdc6a3bSJan Kneschke 
1307b9c5addSGlenn Strauss 	const int study_options = pcre_jit ? PCRE_STUDY_JIT_COMPILE : 0;
1317b9c5addSGlenn Strauss 	if (NULL == (kv->key_extra = pcre_study(kv->key, study_options, &errptr))
1327b9c5addSGlenn Strauss 	    && errptr != NULL) {
1337b9c5addSGlenn Strauss 		log_error(errh, __FILE__, __LINE__,
1347b9c5addSGlenn Strauss 		  "studying regex failed: %s -> %s\n",
1357b9c5addSGlenn Strauss 		  key->ptr, errptr);
13663a6e52cSGlenn Strauss 		return 0;
13775c3a839SJan Kneschke 	}
1387512d82cSGlenn Strauss 
1397512d82cSGlenn Strauss    #endif
1407512d82cSGlenn Strauss 
1417512d82cSGlenn Strauss   #else  /* !HAVE_PCRE */
1427512d82cSGlenn Strauss 
14363beba3aSGlenn Strauss     if (!buffer_is_blank(key)) {
144cf9474aaSGlenn Strauss 	static int logged_message = 0;
14563a6e52cSGlenn Strauss 	if (logged_message) return 1;
146cf9474aaSGlenn Strauss 	logged_message = 1;
14763a6e52cSGlenn Strauss 	log_error(errh, __FILE__, __LINE__,
148cf9474aaSGlenn Strauss 	  "pcre support is missing, please install libpcre and the headers");
1497b9c5addSGlenn Strauss 	UNUSED(pcre_jit);
15063beba3aSGlenn Strauss     }
1517512d82cSGlenn Strauss 
1527512d82cSGlenn Strauss   #endif /* !HAVE_PCRE */
153cf9474aaSGlenn Strauss 
15463a6e52cSGlenn Strauss 	return 1;
155bcdc6a3bSJan Kneschke }
156bcdc6a3bSJan Kneschke 
pcre_keyvalue_buffer_free(pcre_keyvalue_buffer * kvb)157bcdc6a3bSJan Kneschke void pcre_keyvalue_buffer_free(pcre_keyvalue_buffer *kvb) {
1587512d82cSGlenn Strauss   #ifdef HAVE_PCRE
1597db817c5SGlenn Strauss 	pcre_keyvalue *kv = kvb->kv;
1607db817c5SGlenn Strauss 	for (int i = 0, used = (int)kvb->used; i < used; ++i, ++kv) {
1617512d82cSGlenn Strauss 	  #ifdef HAVE_PCRE2_H
1627512d82cSGlenn Strauss 		if (kv->code) pcre2_code_free(kv->code);
1637512d82cSGlenn Strauss 	   #if 1
1647512d82cSGlenn Strauss 		if (keyvalue_match_data) {
1657512d82cSGlenn Strauss 			pcre2_match_data_free(keyvalue_match_data);
1667512d82cSGlenn Strauss 			keyvalue_match_data = NULL;
1677512d82cSGlenn Strauss 		}
1687512d82cSGlenn Strauss 	   #else
1697512d82cSGlenn Strauss 		if (kv->match_data) pcre2_match_data_free(kv->match_data);
1707512d82cSGlenn Strauss 	   #endif
1717512d82cSGlenn Strauss 	  #elif defined(HAVE_PCRE_H)
17275c3a839SJan Kneschke 		if (kv->key) pcre_free(kv->key);
1737b9c5addSGlenn Strauss 		if (kv->key_extra) pcre_free_study(kv->key_extra);
17463a6e52cSGlenn Strauss 		/*free (kv->value.ptr);*//*(see pcre_keyvalue_buffer_append)*/
1757512d82cSGlenn Strauss 	  #endif
176bcdc6a3bSJan Kneschke 	}
17763beba3aSGlenn Strauss   #endif
178bcdc6a3bSJan Kneschke 
179bcdc6a3bSJan Kneschke 	if (kvb->kv) free(kvb->kv);
180bcdc6a3bSJan Kneschke 	free(kvb);
181bcdc6a3bSJan Kneschke }
182cb371557SGlenn Strauss 
1837512d82cSGlenn Strauss #ifdef HAVE_PCRE
1847512d82cSGlenn Strauss 
pcre_keyvalue_buffer_append_match(buffer * b,const pcre_keyvalue_ctx * ctx,unsigned int num,int flags)1857db817c5SGlenn Strauss static void pcre_keyvalue_buffer_append_match(buffer *b, const pcre_keyvalue_ctx *ctx, unsigned int num, int flags) {
1867db817c5SGlenn Strauss     if (num < (unsigned int)ctx->n) { /* n is always > 0 */
1877512d82cSGlenn Strauss       #ifdef HAVE_PCRE2_H
1887512d82cSGlenn Strauss         const PCRE2_SIZE *ovec = (PCRE2_SIZE *)ctx->ovec;
1897512d82cSGlenn Strauss       #elif defined(HAVE_PCRE_H)
1907db817c5SGlenn Strauss         const int *ovec = (int *)ctx->ovec;
1917512d82cSGlenn Strauss       #endif
1927db817c5SGlenn Strauss         const size_t off = (size_t)ovec[(num <<= 1)]; /*(num *= 2)*/
1937db817c5SGlenn Strauss         const size_t len = (size_t)ovec[num+1] - off;
1947db817c5SGlenn Strauss         burl_append(b, ctx->subject + off, len, flags);
195255269d7SGlenn Strauss     }
196255269d7SGlenn Strauss }
197255269d7SGlenn Strauss 
pcre_keyvalue_buffer_append_ctxmatch(buffer * b,const pcre_keyvalue_ctx * ctx,unsigned int num,int flags)198af3df29aSGlenn Strauss static void pcre_keyvalue_buffer_append_ctxmatch(buffer *b, const pcre_keyvalue_ctx *ctx, unsigned int num, int flags) {
199c193da34SGlenn Strauss     const struct cond_match_t * const cache = ctx->cache;
200255269d7SGlenn Strauss     if (!cache) return; /* no enclosing match context */
2017db817c5SGlenn Strauss     if (num < (unsigned int)cache->captures) {
2027512d82cSGlenn Strauss       #ifdef HAVE_PCRE2_H
2037512d82cSGlenn Strauss         const PCRE2_SIZE *ovec = (PCRE2_SIZE *)cache->matches;
2047512d82cSGlenn Strauss       #elif defined(HAVE_PCRE_H)
2057db817c5SGlenn Strauss         const int *ovec = (int *)cache->matches;
2067512d82cSGlenn Strauss       #endif
2077db817c5SGlenn Strauss         const size_t off = (size_t)ovec[(num <<= 1)]; /*(num *= 2)*/
2087db817c5SGlenn Strauss         const size_t len = (size_t)ovec[num+1] - off;
2097db817c5SGlenn Strauss         burl_append(b, cache->comp_value->ptr + off, len, flags);
210255269d7SGlenn Strauss     }
211255269d7SGlenn Strauss }
212255269d7SGlenn Strauss 
21363beba3aSGlenn Strauss #endif /* HAVE_PCRE */
21463beba3aSGlenn Strauss 
pcre_keyvalue_buffer_subst_ext(buffer * b,const char * pattern,const pcre_keyvalue_ctx * ctx)2157db817c5SGlenn Strauss static int pcre_keyvalue_buffer_subst_ext(buffer *b, const char *pattern, const pcre_keyvalue_ctx *ctx) {
216255269d7SGlenn Strauss     const unsigned char *p = (unsigned char *)pattern+2;/* +2 past ${} or %{} */
217255269d7SGlenn Strauss     int flags = 0;
218255269d7SGlenn Strauss     while (!light_isdigit(*p) && *p != '}' && *p != '\0') {
219255269d7SGlenn Strauss         if (0) {
220255269d7SGlenn Strauss         }
221255269d7SGlenn Strauss         else if (p[0] == 'e' && p[1] == 's' && p[2] == 'c') {
222255269d7SGlenn Strauss             p+=3;
223255269d7SGlenn Strauss             if (p[0] == ':') {
224255269d7SGlenn Strauss                 flags |= BURL_ENCODE_ALL;
225255269d7SGlenn Strauss                 p+=1;
226255269d7SGlenn Strauss             }
227255269d7SGlenn Strauss             else if (0 == strncmp((const char *)p, "ape:", 4)) {
228255269d7SGlenn Strauss                 flags |= BURL_ENCODE_ALL;
229255269d7SGlenn Strauss                 p+=4;
230255269d7SGlenn Strauss             }
231255269d7SGlenn Strauss             else if (0 == strncmp((const char *)p, "nde:", 4)) {
232255269d7SGlenn Strauss                 flags |= BURL_ENCODE_NDE;
233255269d7SGlenn Strauss                 p+=4;
234255269d7SGlenn Strauss             }
235255269d7SGlenn Strauss             else if (0 == strncmp((const char *)p, "psnde:", 6)) {
236255269d7SGlenn Strauss                 flags |= BURL_ENCODE_PSNDE;
237255269d7SGlenn Strauss                 p+=6;
238255269d7SGlenn Strauss             }
239255269d7SGlenn Strauss             else { /* skip unrecognized esc... */
240255269d7SGlenn Strauss                 p = (const unsigned char *)strchr((const char *)p, ':');
241255269d7SGlenn Strauss                 if (NULL == p) return -1;
242255269d7SGlenn Strauss                 ++p;
243255269d7SGlenn Strauss             }
244255269d7SGlenn Strauss         }
245255269d7SGlenn Strauss         else if (p[0] == 'n' && p[1] == 'o') {
246255269d7SGlenn Strauss             p+=2;
247255269d7SGlenn Strauss             if (0 == strncmp((const char *)p, "esc:", 4)) {
248255269d7SGlenn Strauss                 flags |= BURL_ENCODE_NONE;
249255269d7SGlenn Strauss                 p+=4;
250255269d7SGlenn Strauss             }
251255269d7SGlenn Strauss             else if (0 == strncmp((const char *)p, "escape:", 7)) {
252255269d7SGlenn Strauss                 flags |= BURL_ENCODE_NONE;
253255269d7SGlenn Strauss                 p+=7;
254255269d7SGlenn Strauss             }
255255269d7SGlenn Strauss             else { /* skip unrecognized no... */
256255269d7SGlenn Strauss                 p = (const unsigned char *)strchr((const char *)p, ':');
257255269d7SGlenn Strauss                 if (NULL == p) return -1;
258255269d7SGlenn Strauss                 ++p;
259255269d7SGlenn Strauss             }
260255269d7SGlenn Strauss         }
261255269d7SGlenn Strauss         else if (p[0] == 't' && p[1] == 'o') {
262255269d7SGlenn Strauss             p+=2;
263255269d7SGlenn Strauss             if (0 == strncmp((const char *)p, "lower:", 6)) {
264255269d7SGlenn Strauss                 flags |= BURL_TOLOWER;
265255269d7SGlenn Strauss                 p+=6;
266255269d7SGlenn Strauss             }
267255269d7SGlenn Strauss             else if (0 == strncmp((const char *)p, "upper:", 6)) {
268255269d7SGlenn Strauss                 flags |= BURL_TOLOWER;
269255269d7SGlenn Strauss                 p+=6;
270255269d7SGlenn Strauss             }
271255269d7SGlenn Strauss             else { /* skip unrecognized to... */
272255269d7SGlenn Strauss                 p = (const unsigned char *)strchr((const char *)p, ':');
273255269d7SGlenn Strauss                 if (NULL == p) return -1;
274255269d7SGlenn Strauss                 ++p;
275255269d7SGlenn Strauss             }
276255269d7SGlenn Strauss         }
277255269d7SGlenn Strauss         else if (p[0] == 'u' && p[1] == 'r' && p[2] == 'l' && p[3] == '.') {
278af3df29aSGlenn Strauss             const struct burl_parts_t * const burl = ctx->burl;
279255269d7SGlenn Strauss             p+=4;
280255269d7SGlenn Strauss             if (0 == strncmp((const char *)p, "scheme}", 7)) {
281af3df29aSGlenn Strauss                 if (burl->scheme)
282af3df29aSGlenn Strauss                     burl_append(b, BUF_PTR_LEN(burl->scheme), flags);
283255269d7SGlenn Strauss                 p+=6;
284255269d7SGlenn Strauss             }
285255269d7SGlenn Strauss             else if (0 == strncmp((const char *)p, "authority}", 10)) {
286af3df29aSGlenn Strauss                 if (burl->authority)
287af3df29aSGlenn Strauss                     burl_append(b, BUF_PTR_LEN(burl->authority), flags);
288255269d7SGlenn Strauss                 p+=9;
289255269d7SGlenn Strauss             }
290255269d7SGlenn Strauss             else if (0 == strncmp((const char *)p, "port}", 5)) {
291af3df29aSGlenn Strauss                 buffer_append_int(b, (int)burl->port);
292255269d7SGlenn Strauss                 p+=4;
293255269d7SGlenn Strauss             }
294255269d7SGlenn Strauss             else if (0 == strncmp((const char *)p, "path}", 5)) {
295af3df29aSGlenn Strauss                 const buffer * const target = burl->path;
296af3df29aSGlenn Strauss                 const uint32_t len = buffer_clen(target);
297a0029b21SGlenn Strauss                 const char * const ptr = target->ptr;
298a0029b21SGlenn Strauss                 const char * const qmark = memchr(ptr, '?', len);
299a0029b21SGlenn Strauss                 burl_append(b, ptr, qmark ? (uint32_t)(qmark-ptr) : len, flags);
300255269d7SGlenn Strauss                 p+=4;
301255269d7SGlenn Strauss             }
302255269d7SGlenn Strauss             else if (0 == strncmp((const char *)p, "query}", 6)) {
303af3df29aSGlenn Strauss                 if (burl->query)
304af3df29aSGlenn Strauss                     burl_append(b, BUF_PTR_LEN(burl->query), flags);
305255269d7SGlenn Strauss                 p+=5;
306255269d7SGlenn Strauss             }
307255269d7SGlenn Strauss             else { /* skip unrecognized url.* */
308255269d7SGlenn Strauss                 p = (const unsigned char *)strchr((const char *)p, '}');
309255269d7SGlenn Strauss                 if (NULL == p) return -1;
310255269d7SGlenn Strauss             }
311255269d7SGlenn Strauss             break;
312255269d7SGlenn Strauss         }
313255269d7SGlenn Strauss         else if (p[0] == 'q' && p[1] == 's' && p[2] == 'a' && p[3] == '}') {
314255269d7SGlenn Strauss             const buffer *qs = ctx->burl->query;
315af3df29aSGlenn Strauss             if (qs && !buffer_is_unset(qs)) {
316255269d7SGlenn Strauss                 if (NULL != strchr(b->ptr, '?')) {
317af3df29aSGlenn Strauss                     if (!buffer_is_blank(qs))
318f2610d23SGlenn Strauss                         buffer_append_char(b, '&');
319255269d7SGlenn Strauss                 }
320255269d7SGlenn Strauss                 else {
321f2610d23SGlenn Strauss                     buffer_append_char(b, '?');
322255269d7SGlenn Strauss                 }
323af3df29aSGlenn Strauss                 burl_append(b, BUF_PTR_LEN(qs), flags);
324255269d7SGlenn Strauss             }
325255269d7SGlenn Strauss             p+=3;
326255269d7SGlenn Strauss             break;
327255269d7SGlenn Strauss         }
328f832b711SGlenn Strauss         else if (p[0] == 'e' && p[1] == 'n' && p[2] == 'c'
329f832b711SGlenn Strauss                  && 0 == strncmp((const char *)p+3, "b64u:", 5)) {
330f832b711SGlenn Strauss             flags |= BURL_ENCODE_B64U;
331f832b711SGlenn Strauss             p+=8;
332f832b711SGlenn Strauss         }
333f832b711SGlenn Strauss         else if (p[0] == 'd' && p[1] == 'e' && p[2] == 'c'
334f832b711SGlenn Strauss                  && 0 == strncmp((const char *)p+3, "b64u:", 5)) {
335f832b711SGlenn Strauss             flags |= BURL_DECODE_B64U;
336f832b711SGlenn Strauss             p+=8;
337f832b711SGlenn Strauss         }
338255269d7SGlenn Strauss         else ++p;  /* skip unrecognized char */
339255269d7SGlenn Strauss     }
340255269d7SGlenn Strauss     if (*p == '\0') return -1;
341255269d7SGlenn Strauss     if (*p != '}') { /* light_isdigit(*p) */
342255269d7SGlenn Strauss         unsigned int num = *p - '0';
343255269d7SGlenn Strauss         ++p;
344255269d7SGlenn Strauss         if (light_isdigit(*p)) num = num * 10 + (*p++ - '0');
345255269d7SGlenn Strauss         if (*p != '}') {
346255269d7SGlenn Strauss             p = (const unsigned char *)strchr((const char *)p, '}');
347255269d7SGlenn Strauss             if (NULL == p) return -1;
348255269d7SGlenn Strauss         }
349255269d7SGlenn Strauss         if (0 == flags) flags = BURL_ENCODE_PSNDE; /* default */
35063beba3aSGlenn Strauss       #ifdef HAVE_PCRE
351255269d7SGlenn Strauss         pattern[0] == '$' /*(else '%')*/
3527db817c5SGlenn Strauss           ? pcre_keyvalue_buffer_append_match(b, ctx, num, flags)
353255269d7SGlenn Strauss           : pcre_keyvalue_buffer_append_ctxmatch(b, ctx, num, flags);
35463beba3aSGlenn Strauss       #endif
355255269d7SGlenn Strauss     }
356255269d7SGlenn Strauss     return (int)(p + 1 - (unsigned char *)pattern - 2);
357255269d7SGlenn Strauss }
358255269d7SGlenn Strauss 
pcre_keyvalue_buffer_subst(buffer * b,const buffer * patternb,const pcre_keyvalue_ctx * ctx)3597db817c5SGlenn Strauss static void pcre_keyvalue_buffer_subst(buffer *b, const buffer *patternb, const pcre_keyvalue_ctx *ctx) {
360cb371557SGlenn Strauss 	const char *pattern = patternb->ptr;
361af3df29aSGlenn Strauss 	const size_t pattern_len = buffer_clen(patternb);
362cb371557SGlenn Strauss 	size_t start = 0;
363cb371557SGlenn Strauss 
364cb371557SGlenn Strauss 	/* search for $... or %... pattern substitutions */
365cb371557SGlenn Strauss 
366f69bd9cdSGlenn Strauss 	buffer_clear(b);
367cb371557SGlenn Strauss 
368cb371557SGlenn Strauss 	for (size_t k = 0; k + 1 < pattern_len; ++k) {
369cb371557SGlenn Strauss 		if (pattern[k] == '$' || pattern[k] == '%') {
370cb371557SGlenn Strauss 
371cb371557SGlenn Strauss 			buffer_append_string_len(b, pattern + start, k - start);
372cb371557SGlenn Strauss 
373255269d7SGlenn Strauss 			if (pattern[k + 1] == '{') {
3747db817c5SGlenn Strauss 				int num = pcre_keyvalue_buffer_subst_ext(b, pattern+k, ctx);
375255269d7SGlenn Strauss 				if (num < 0) return; /* error; truncate result */
376255269d7SGlenn Strauss 				k += (size_t)num;
377255269d7SGlenn Strauss 			} else if (light_isdigit(((unsigned char *)pattern)[k + 1])) {
37863beba3aSGlenn Strauss 			  #ifdef HAVE_PCRE
379255269d7SGlenn Strauss 				unsigned int num = (unsigned int)pattern[k + 1] - '0';
380255269d7SGlenn Strauss 				pattern[k] == '$' /*(else '%')*/
3817db817c5SGlenn Strauss 				  ? pcre_keyvalue_buffer_append_match(b, ctx, num, 0)
382255269d7SGlenn Strauss 				  : pcre_keyvalue_buffer_append_ctxmatch(b, ctx, num, 0);
38363beba3aSGlenn Strauss 			  #endif
384255269d7SGlenn Strauss 			} else {
385cb371557SGlenn Strauss 				/* enable escape: "%%" => "%", "%a" => "%a", "$$" => "$" */
386cb371557SGlenn Strauss 				buffer_append_string_len(b, pattern+k, pattern[k] == pattern[k+1] ? 1 : 2);
387cb371557SGlenn Strauss 			}
388cb371557SGlenn Strauss 
389cb371557SGlenn Strauss 			k++;
390cb371557SGlenn Strauss 			start = k + 1;
391cb371557SGlenn Strauss 		}
392cb371557SGlenn Strauss 	}
393cb371557SGlenn Strauss 
394cb371557SGlenn Strauss 	buffer_append_string_len(b, pattern + start, pattern_len - start);
395cb371557SGlenn Strauss }
396cf9474aaSGlenn Strauss 
pcre_keyvalue_buffer_process(const pcre_keyvalue_buffer * kvb,pcre_keyvalue_ctx * ctx,const buffer * input,buffer * result)39763a6e52cSGlenn Strauss handler_t pcre_keyvalue_buffer_process(const pcre_keyvalue_buffer *kvb, pcre_keyvalue_ctx *ctx, const buffer *input, buffer *result) {
3987db817c5SGlenn Strauss     const pcre_keyvalue *kv = kvb->kv;
3997db817c5SGlenn Strauss     for (int i = 0, used = (int)kvb->used; i < used; ++i, ++kv) {
40063beba3aSGlenn Strauss      #ifdef HAVE_PCRE
4017512d82cSGlenn Strauss       #ifdef HAVE_PCRE2_H
4027512d82cSGlenn Strauss         int n = pcre2_match(kv->code, (PCRE2_SPTR)BUF_PTR_LEN(input),
4037512d82cSGlenn Strauss                             0, 0, kv->match_data, NULL);
4047512d82cSGlenn Strauss       #else
405ba5026aaSGlenn Strauss         #define N 20
406cf9474aaSGlenn Strauss         int ovec[N * 3];
407cf9474aaSGlenn Strauss         #undef N
408af3df29aSGlenn Strauss         int n = pcre_exec(kv->key, kv->key_extra, BUF_PTR_LEN(input),
409cf9474aaSGlenn Strauss                           0, 0, ovec, sizeof(ovec)/sizeof(int));
4107512d82cSGlenn Strauss       #endif
41163beba3aSGlenn Strauss      #else
41263beba3aSGlenn Strauss         int n = 1;
41363beba3aSGlenn Strauss      #endif
414cf9474aaSGlenn Strauss         if (n < 0) {
41563beba3aSGlenn Strauss          #ifdef HAVE_PCRE
4167512d82cSGlenn Strauss           #ifdef HAVE_PCRE2_H
4177512d82cSGlenn Strauss             if (n != PCRE2_ERROR_NOMATCH)
4187512d82cSGlenn Strauss           #else
4197512d82cSGlenn Strauss             if (n != PCRE_ERROR_NOMATCH)
4207512d82cSGlenn Strauss           #endif
42163beba3aSGlenn Strauss          #endif
422cf9474aaSGlenn Strauss                 return HANDLER_ERROR;
423cf9474aaSGlenn Strauss         }
424af3df29aSGlenn Strauss         else if (buffer_is_blank(&kv->value)) {
425cf9474aaSGlenn Strauss             /* short-circuit if blank replacement pattern
426cf9474aaSGlenn Strauss              * (do not attempt to match against remaining kvb rules) */
427cf9474aaSGlenn Strauss             ctx->m = i;
428cf9474aaSGlenn Strauss             return HANDLER_GO_ON;
429cf9474aaSGlenn Strauss         }
430cf9474aaSGlenn Strauss         else { /* it matched */
431cf9474aaSGlenn Strauss             ctx->m = i;
4327db817c5SGlenn Strauss             ctx->n = n;
4337db817c5SGlenn Strauss             ctx->subject = input->ptr;
43463beba3aSGlenn Strauss          #ifdef HAVE_PCRE
4357512d82cSGlenn Strauss           #ifdef HAVE_PCRE2_H
4367512d82cSGlenn Strauss             ctx->ovec = pcre2_get_ovector_pointer(kv->match_data);
4377512d82cSGlenn Strauss           #else
4387db817c5SGlenn Strauss             ctx->ovec = ovec;
4397512d82cSGlenn Strauss           #endif
44063beba3aSGlenn Strauss          #endif
4417db817c5SGlenn Strauss             pcre_keyvalue_buffer_subst(result, &kv->value, ctx);
442cf9474aaSGlenn Strauss             return HANDLER_FINISHED;
443cf9474aaSGlenn Strauss         }
444cf9474aaSGlenn Strauss     }
445cf9474aaSGlenn Strauss 
446cf9474aaSGlenn Strauss     return HANDLER_GO_ON;
447cf9474aaSGlenn Strauss }
4487512d82cSGlenn Strauss 
4493eb7902eSGlenn Strauss 
4503eb7902eSGlenn Strauss /* modified from burl_normalize_basic() to handle %% extra encoding layer */
4513eb7902eSGlenn Strauss 
4523eb7902eSGlenn Strauss /* c (char) and n (nibble) MUST be unsigned integer types */
4533eb7902eSGlenn Strauss #define li_cton(c,n) \
4543eb7902eSGlenn Strauss   (((n) = (c) - '0') <= 9 || (((n) = ((c)&0xdf) - 'A') <= 5 ? ((n) += 10) : 0))
4553eb7902eSGlenn Strauss 
pcre_keyvalue_burl_percent_toupper(buffer * b)4563eb7902eSGlenn Strauss static void pcre_keyvalue_burl_percent_toupper (buffer *b)
4573eb7902eSGlenn Strauss {
4583eb7902eSGlenn Strauss     const unsigned char * const s = (unsigned char *)b->ptr;
459af3df29aSGlenn Strauss     const int used = (int)buffer_clen(b);
4603eb7902eSGlenn Strauss     unsigned int n1, n2;
4613eb7902eSGlenn Strauss     for (int i = 0; i < used; ++i) {
4623eb7902eSGlenn Strauss         if (s[i]=='%' && li_cton(s[i+1],n1) && li_cton(s[i+2],n2)) {
4633eb7902eSGlenn Strauss             if (s[i+1] >= 'a') b->ptr[i+1] &= 0xdf; /* uppercase hex */
4643eb7902eSGlenn Strauss             if (s[i+2] >= 'a') b->ptr[i+2] &= 0xdf; /* uppercase hex */
4653eb7902eSGlenn Strauss             i+=2;
4663eb7902eSGlenn Strauss         }
4673eb7902eSGlenn Strauss     }
4683eb7902eSGlenn Strauss }
4693eb7902eSGlenn Strauss 
pcre_keyvalue_burl_percent_percent_toupper(buffer * b)4703eb7902eSGlenn Strauss static void pcre_keyvalue_burl_percent_percent_toupper (buffer *b)
4713eb7902eSGlenn Strauss {
4723eb7902eSGlenn Strauss     const unsigned char * const s = (unsigned char *)b->ptr;
473af3df29aSGlenn Strauss     const int used = (int)buffer_clen(b);
4743eb7902eSGlenn Strauss     unsigned int n1, n2;
4753eb7902eSGlenn Strauss     for (int i = 0; i < used; ++i) {
4763eb7902eSGlenn Strauss         if (s[i] == '%' && s[i+1]=='%'
4773eb7902eSGlenn Strauss             && li_cton(s[i+2],n1) && li_cton(s[i+3],n2)) {
4783eb7902eSGlenn Strauss             if (s[i+2] >= 'a') b->ptr[i+2] &= 0xdf; /* uppercase hex */
4793eb7902eSGlenn Strauss             if (s[i+3] >= 'a') b->ptr[i+3] &= 0xdf; /* uppercase hex */
4803eb7902eSGlenn Strauss             i+=3;
4813eb7902eSGlenn Strauss         }
4823eb7902eSGlenn Strauss     }
4833eb7902eSGlenn Strauss }
4843eb7902eSGlenn Strauss 
4853eb7902eSGlenn Strauss static const char hex_chars_uc[] = "0123456789ABCDEF";
4863eb7902eSGlenn Strauss 
pcre_keyvalue_burl_percent_high_UTF8(buffer * b,buffer * t)4873eb7902eSGlenn Strauss static void pcre_keyvalue_burl_percent_high_UTF8 (buffer *b, buffer *t)
4883eb7902eSGlenn Strauss {
4893eb7902eSGlenn Strauss     const unsigned char * const s = (unsigned char *)b->ptr;
4903eb7902eSGlenn Strauss     unsigned char *p;
491af3df29aSGlenn Strauss     const int used = (int)buffer_clen(b);
4923eb7902eSGlenn Strauss     unsigned int count = 0, j = 0;
4933eb7902eSGlenn Strauss     for (int i = 0; i < used; ++i) {
4943eb7902eSGlenn Strauss         if (s[i] > 0x7F) ++count;
4953eb7902eSGlenn Strauss     }
4963eb7902eSGlenn Strauss     if (0 == count) return;
4973eb7902eSGlenn Strauss 
4983eb7902eSGlenn Strauss     p = (unsigned char *)buffer_string_prepare_copy(t, used+(count*2));
4993eb7902eSGlenn Strauss     for (int i = 0; i < used; ++i, ++j) {
5003eb7902eSGlenn Strauss         if (s[i] <= 0x7F)
5013eb7902eSGlenn Strauss             p[j] = s[i];
5023eb7902eSGlenn Strauss         else {
5033eb7902eSGlenn Strauss             p[j]   = '%';
5043eb7902eSGlenn Strauss             p[++j] = hex_chars_uc[(s[i] >> 4) & 0xF];
5053eb7902eSGlenn Strauss             p[++j] = hex_chars_uc[s[i] & 0xF];
5063eb7902eSGlenn Strauss         }
5073eb7902eSGlenn Strauss     }
508160baff9SGlenn Strauss     buffer_copy_string_len(b, (char *)p, (size_t)j);
5093eb7902eSGlenn Strauss }
5103eb7902eSGlenn Strauss 
pcre_keyvalue_burl_percent_percent_high_UTF8(buffer * b,buffer * t)5113eb7902eSGlenn Strauss static void pcre_keyvalue_burl_percent_percent_high_UTF8 (buffer *b, buffer *t)
5123eb7902eSGlenn Strauss {
5133eb7902eSGlenn Strauss     const unsigned char * const s = (unsigned char *)b->ptr;
5143eb7902eSGlenn Strauss     unsigned char *p;
515af3df29aSGlenn Strauss     const int used = (int)buffer_clen(b);
5163eb7902eSGlenn Strauss     unsigned int count = 0, j = 0;
5173eb7902eSGlenn Strauss     for (int i = 0; i < used; ++i) {
5183eb7902eSGlenn Strauss         if (s[i] > 0x7F) ++count;
5193eb7902eSGlenn Strauss     }
5203eb7902eSGlenn Strauss     if (0 == count) return;
5213eb7902eSGlenn Strauss 
5223eb7902eSGlenn Strauss     p = (unsigned char *)buffer_string_prepare_copy(t, used+(count*3));
5233eb7902eSGlenn Strauss     for (int i = 0; i < used; ++i, ++j) {
5243eb7902eSGlenn Strauss         if (s[i] <= 0x7F)
5253eb7902eSGlenn Strauss             p[j] = s[i];
5263eb7902eSGlenn Strauss         else {
5273eb7902eSGlenn Strauss             p[j]   = '%';
5283eb7902eSGlenn Strauss             p[++j] = '%';
5293eb7902eSGlenn Strauss             p[++j] = hex_chars_uc[(s[i] >> 4) & 0xF];
5303eb7902eSGlenn Strauss             p[++j] = hex_chars_uc[s[i] & 0xF];
5313eb7902eSGlenn Strauss         }
5323eb7902eSGlenn Strauss     }
533160baff9SGlenn Strauss     buffer_copy_string_len(b, (char *)p, (size_t)j);
5343eb7902eSGlenn Strauss }
5353eb7902eSGlenn Strauss 
5363eb7902eSGlenn Strauss /* Basic normalization of regex and regex replacement to mirror some of
5373eb7902eSGlenn Strauss  * the normalizations performed on request URI (for better compatibility).
5383eb7902eSGlenn Strauss  * Note: not currently attempting to replace unnecessary percent-encoding
5393eb7902eSGlenn Strauss  * (would need to know if regex was intended to match url-path or
5403eb7902eSGlenn Strauss  *  query-string or both, and then would have to regex-escape if those
5413eb7902eSGlenn Strauss  *  chars where special regex chars such as . * + ? ( ) [ ] | and more)
5423eb7902eSGlenn Strauss  * Not attempting to percent-encode chars which should be encoded, again
5433eb7902eSGlenn Strauss  * since regex might target url-path, query-string, or both, and we would
5443eb7902eSGlenn Strauss  * have to avoid percent-encoding special regex chars.
5453eb7902eSGlenn Strauss  * Also not attempting to detect unnecessarily regex-escape in, e.g. %\x\x
5463eb7902eSGlenn Strauss  * Preserve improper %-encoded sequences which are not %XX (using hex chars)
5473eb7902eSGlenn Strauss  * Intentionally not performing path simplification (e.g. ./ ../)
5483eb7902eSGlenn Strauss  * If regex-specific normalizations begin to be made to k here,
5493eb7902eSGlenn Strauss  * must revisit callers, e.g. one configfile.c use on non-regex string.
5503eb7902eSGlenn Strauss  * "%%" (percent_percent) is used in regex replacement strings since
5513eb7902eSGlenn Strauss  * otherwise "%n" is used to indicate regex backreference where n is number.
5523eb7902eSGlenn Strauss  */
5533eb7902eSGlenn Strauss 
pcre_keyvalue_burl_normalize_key(buffer * k,buffer * t)5543eb7902eSGlenn Strauss void pcre_keyvalue_burl_normalize_key (buffer *k, buffer *t)
5553eb7902eSGlenn Strauss {
5563eb7902eSGlenn Strauss     pcre_keyvalue_burl_percent_toupper(k);
5573eb7902eSGlenn Strauss     pcre_keyvalue_burl_percent_high_UTF8(k, t);
5583eb7902eSGlenn Strauss }
5593eb7902eSGlenn Strauss 
pcre_keyvalue_burl_normalize_value(buffer * v,buffer * t)5603eb7902eSGlenn Strauss void pcre_keyvalue_burl_normalize_value (buffer *v, buffer *t)
5613eb7902eSGlenn Strauss {
5623eb7902eSGlenn Strauss     pcre_keyvalue_burl_percent_percent_toupper(v);
5633eb7902eSGlenn Strauss     pcre_keyvalue_burl_percent_percent_high_UTF8(v, t);
5643eb7902eSGlenn Strauss }
565