xref: /lighttpd1.4/src/buffer.c (revision 11864d29)
1 #include "first.h"
2 
3 #include "buffer.h"
4 
5 #include <stdlib.h>
6 #include <string.h>
7 #include "sys-time.h"   /* strftime() */
8 
9 static const char hex_chars_lc[] = "0123456789abcdef";
10 static const char hex_chars_uc[] = "0123456789ABCDEF";
11 
12 
13 __attribute_noinline__
14 buffer* buffer_init(void) {
15 	buffer * const b = calloc(1, sizeof(*b));
16 	force_assert(b);
17 	return b;
18 }
19 
20 void buffer_free(buffer *b) {
21 	if (NULL == b) return;
22 	free(b->ptr);
23 	free(b);
24 }
25 
26 void buffer_free_ptr(buffer *b) {
27 	free(b->ptr);
28 	b->ptr = NULL;
29 	b->used = 0;
30 	b->size = 0;
31 }
32 
33 void buffer_move(buffer * restrict b, buffer * restrict src) {
34 	buffer tmp;
35 	buffer_clear(b);
36 	tmp = *src; *src = *b; *b = tmp;
37 }
38 
39 /* make sure buffer is at least "size" big + 1 for '\0'. keep old data */
40 __attribute_cold__
41 __attribute_noinline__
42 __attribute_nonnull__()
43 __attribute_returns_nonnull__
44 static char* buffer_realloc(buffer * const restrict b, const size_t len) {
45     #define BUFFER_PIECE_SIZE 64uL  /*(must be power-of-2)*/
46     size_t sz = (len + 1 + BUFFER_PIECE_SIZE-1) & ~(BUFFER_PIECE_SIZE-1);
47     force_assert(sz > len);
48     if ((sz & (sz-1)) && sz < INT_MAX) {/* not power-2; huge val not expected */
49         /*(optimizer should recognize this and use ffs or clz or equivalent)*/
50         const size_t psz = sz;
51         for (sz = 256; sz < psz; sz <<= 1) ;
52     }
53     sz |= 1; /*(extra +1 for '\0' when needed buffer size is exact power-2)*/
54 
55     b->size = sz;
56     b->ptr = realloc(b->ptr, sz);
57 
58     force_assert(NULL != b->ptr);
59     return b->ptr;
60 }
61 
62 __attribute_cold__
63 __attribute_noinline__
64 __attribute_nonnull__()
65 __attribute_returns_nonnull__
66 static char* buffer_alloc_replace(buffer * const restrict b, const size_t size) {
67     /*(discard old data so realloc() does not copy)*/
68     if (NULL != b->ptr) {
69         free(b->ptr);
70         b->ptr = NULL;
71     }
72     /*(note: if size larger than one lshift, use size instead of power-2)*/
73     const size_t bsize2x = (b->size & ~1uL) << 1;
74     return buffer_realloc(b, bsize2x > size ? bsize2x-1 : size);
75 }
76 
77 char* buffer_string_prepare_copy(buffer * const b, const size_t size) {
78     b->used = 0;
79   #ifdef __COVERITY__ /*(b->ptr is not NULL if b->size is not 0)*/
80     force_assert(size >= b->size || b->ptr);
81   #endif
82     return (size < b->size)
83       ? b->ptr
84       : buffer_alloc_replace(b, size);
85 }
86 
87 __attribute_cold__
88 __attribute_noinline__
89 __attribute_nonnull__()
90 __attribute_returns_nonnull__
91 static char* buffer_string_prepare_append_resize(buffer * const restrict b, const size_t size) {
92     if (b->used < 2) {  /* buffer_is_blank(b) */
93         char * const s = buffer_string_prepare_copy(b, size);
94         *s = '\0'; /*(for case (1 == b->used))*/
95         return s;
96     }
97 
98     /* not empty, b->used already includes a terminating 0 */
99     /*(note: if size larger than one lshift, use size instead of power-2)*/
100     const size_t bsize2x = (b->size & ~1uL) << 1;
101     const size_t req_size = (bsize2x - b->used > size)
102       ? bsize2x-1
103       : b->used + size;
104 
105     /* check for overflow: unsigned overflow is defined to wrap around */
106     force_assert(req_size >= b->used);
107 
108     return buffer_realloc(b, req_size) + b->used - 1;
109 }
110 
111 char* buffer_string_prepare_append(buffer * const b, const size_t size) {
112     const uint32_t len = b->used ? b->used-1 : 0;
113     return (b->size - len >= size + 1)
114       ? b->ptr + len
115       : buffer_string_prepare_append_resize(b, size);
116 }
117 
118 /*(prefer smaller code than inlining buffer_extend in many places in buffer.c)*/
119 __attribute_noinline__
120 char*
121 buffer_extend (buffer * const b, const size_t x)
122 {
123     /* extend buffer to append x (reallocate by power-2 (or larger), if needed)
124      * (combine buffer_string_prepare_append() and buffer_commit())
125      * (future: might make buffer.h static inline func for HTTP/1.1 performance)
126      * pre-sets '\0' byte and b->used (unlike buffer_string_prepare_append())*/
127   #if 0
128     char * const s = buffer_string_prepare_append(b, x);
129     b->used += x + (0 == b->used);
130   #else
131     const uint32_t len = b->used ? b->used-1 : 0;
132     char * const s = (b->size - len >= x + 1)
133       ? b->ptr + len
134       : buffer_string_prepare_append_resize(b, x);
135     b->used = len+x+1;
136   #endif
137     s[x] = '\0';
138     return s;
139 }
140 
141 void buffer_commit(buffer *b, size_t size)
142 {
143 	size_t sz = b->used;
144 	if (0 == sz) sz = 1;
145 
146 	if (size > 0) {
147 		/* check for overflow: unsigned overflow is defined to wrap around */
148 		sz += size;
149 		force_assert(sz > size);
150 	}
151 
152 	b->used = sz;
153 	b->ptr[sz - 1] = '\0';
154 }
155 
156 __attribute_cold__ /*(reduce code size due to inlining)*/
157 void buffer_copy_string(buffer * restrict b, const char * restrict s) {
158     if (__builtin_expect( (NULL == s), 0)) s = "";
159     buffer_copy_string_len(b, s, strlen(s));
160 }
161 
162 void buffer_copy_string_len(buffer * const restrict b, const char * const restrict s, const size_t len) {
163     b->used = len + 1;
164     char * const restrict d = (len < b->size)
165       ? b->ptr
166       : buffer_alloc_replace(b, len);
167     d[len] = '\0';
168     memcpy(d, s, len);
169 }
170 
171 __attribute_cold__ /*(reduce code size due to inlining)*/
172 void buffer_append_string(buffer * restrict b, const char * restrict s) {
173     if (__builtin_expect( (NULL == s), 0)) s = "";
174     buffer_append_string_len(b, s, strlen(s));
175 }
176 
177 /**
178  * append a string to the end of the buffer
179  *
180  * the resulting buffer is terminated with a '\0'
181  * s is treated as a un-terminated string (a \0 is handled a normal character)
182  *
183  * @param b a buffer
184  * @param s the string
185  * @param s_len size of the string (without the terminating \0)
186  */
187 
188 void buffer_append_string_len(buffer * const restrict b, const char * const restrict s, const size_t len) {
189     memcpy(buffer_extend(b, len), s, len);
190 }
191 
192 void buffer_append_str2(buffer * const restrict b, const char * const s1, const size_t len1, const char * const s2, const size_t len2) {
193     char * const restrict s = buffer_extend(b, len1+len2);
194   #ifdef HAVE_MEMPCPY
195     mempcpy(mempcpy(s, s1, len1), s2, len2);
196   #else
197     memcpy(s,      s1, len1);
198     memcpy(s+len1, s2, len2);
199   #endif
200 }
201 
202 void buffer_append_str3(buffer * const restrict b, const char * const s1, const size_t len1, const char * const s2, const size_t len2, const char * const s3, const size_t len3) {
203     char * restrict s = buffer_extend(b, len1+len2+len3);
204   #ifdef HAVE_MEMPCPY
205     mempcpy(mempcpy(mempcpy(s, s1, len1), s2, len2), s3, len3);
206   #else
207     memcpy(s,         s1, len1);
208     memcpy((s+=len1), s2, len2);
209     memcpy((s+=len2), s3, len3);
210   #endif
211 }
212 
213 void buffer_append_iovec(buffer * const restrict b, const struct const_iovec * const iov, const size_t n) {
214     size_t len = 0;
215     for (size_t i = 0; i < n; ++i)
216         len += iov[i].iov_len;
217     char *s = buffer_extend(b, len);
218     for (size_t i = 0; i < n; ++i) {
219         if (0 == iov[i].iov_len) continue;
220       #ifdef HAVE_MEMPCPY
221         s = mempcpy(s, iov[i].iov_base, iov[i].iov_len);
222       #else
223         memcpy(s, iov[i].iov_base, iov[i].iov_len);
224         s += iov[i].iov_len;
225       #endif
226     }
227 }
228 
229 void buffer_append_path_len(buffer * restrict b, const char * restrict a, size_t alen) {
230     char * restrict s = buffer_string_prepare_append(b, alen+1);
231     const int aslash = (alen && a[0] == '/');
232     if (b->used > 1 && s[-1] == '/') {
233         if (aslash) {
234             ++a;
235             --alen;
236         }
237     }
238     else {
239         if (0 == b->used) b->used = 1;
240         if (!aslash) {
241             *s++ = '/';
242             ++b->used;
243         }
244     }
245     b->used += alen;
246     s[alen] = '\0';
247     memcpy(s, a, alen);
248 }
249 
250 void
251 buffer_copy_path_len2 (buffer * const restrict b, const char * const restrict s1, size_t len1, const char * const restrict s2, size_t len2)
252 {
253     /*(similar to buffer_copy_string_len(b, s1, len1) but combined allocation)*/
254     memcpy(buffer_string_prepare_copy(b, len1+len2+1), s1, len1);
255     b->used = len1 + 1;                    /*('\0' byte will be written below)*/
256 
257     buffer_append_path_len(b, s2, len2);/*(choice: not inlined, special-cased)*/
258 }
259 
260 void
261 buffer_copy_string_len_lc (buffer * const restrict b, const char * const restrict s, const size_t len)
262 {
263     char * const restrict d = buffer_string_prepare_copy(b, len);
264     b->used = len+1;
265     d[len] = '\0';
266     for (size_t i = 0; i < len; ++i)
267         d[i] = (!light_isupper(s[i])) ? s[i] : s[i] | 0x20;
268 }
269 
270 void buffer_append_uint_hex_lc(buffer *b, uintmax_t value) {
271 	char *buf;
272 	unsigned int shift = 0;
273 
274 	{
275 		uintmax_t copy = value;
276 		do {
277 			copy >>= 8;
278 			shift += 8; /* counting bits */
279 		} while (0 != copy);
280 	}
281 
282 	buf = buffer_extend(b, shift >> 2); /*nibbles (4 bits)*/
283 
284 	while (shift > 0) {
285 		shift -= 4;
286 		*(buf++) = hex_chars_lc[(value >> shift) & 0x0F];
287 	}
288 }
289 
290 __attribute_nonnull__()
291 __attribute_returns_nonnull__
292 static char* utostr(char buf[LI_ITOSTRING_LENGTH], uintmax_t val) {
293 	char *cur = buf+LI_ITOSTRING_LENGTH;
294 	uintmax_t x;
295 	do {
296 		*(--cur) = (char) ('0' + (int)(val - (x = val/10) * 10));
297 	} while (0 != (val = x));           /* val % 10 */
298 	return cur;
299 }
300 
301 __attribute_nonnull__()
302 __attribute_returns_nonnull__
303 static char* itostr(char buf[LI_ITOSTRING_LENGTH], intmax_t val) {
304 	/* absolute value not defined for INTMAX_MIN, but can take absolute
305 	 * value of any negative number via twos complement cast to unsigned.
306 	 * negative sign is prepended after (now unsigned) value is converted
307 	 * to string */
308 	uintmax_t uval = val >= 0 ? (uintmax_t)val : ((uintmax_t)~val) + 1;
309 	char *cur = utostr(buf, uval);
310 	if (val < 0) *(--cur) = '-';
311 
312 	return cur;
313 }
314 
315 void buffer_append_int(buffer *b, intmax_t val) {
316 	char buf[LI_ITOSTRING_LENGTH];
317 	const char * const str = itostr(buf, val);
318 	buffer_append_string_len(b, str, buf+sizeof(buf) - str);
319 }
320 
321 void buffer_append_strftime(buffer * const restrict b, const char * const restrict format, const struct tm * const restrict tm) {
322     /*(localtime_r() or gmtime_r() producing tm should not have failed)*/
323     if (__builtin_expect( (NULL == tm), 0)) return;
324 
325     /*(expecting typical format strings to result in < 64 bytes needed;
326      * skipping buffer_string_space() calculation and providing fixed size)*/
327     size_t rv = strftime(buffer_string_prepare_append(b, 63), 64, format, tm);
328 
329     /* 0 (in some apis) signals the string may have been too small;
330      * but the format could also just have lead to an empty string */
331     if (__builtin_expect( (0 == rv), 0) || __builtin_expect( (rv > 63), 0)) {
332         /* unexpected; give it a second try with a larger string */
333         rv = strftime(buffer_string_prepare_append(b, 4095), 4096, format, tm);
334         if (__builtin_expect( (rv > 4095), 0))/*(input format was ridiculous)*/
335             return;
336     }
337 
338     /*buffer_commit(b, rv);*/
339     b->used += (uint32_t)rv + (0 == b->used);
340 }
341 
342 
343 size_t li_itostrn(char *buf, size_t buf_len, intmax_t val) {
344 	char p_buf[LI_ITOSTRING_LENGTH];
345 	char* const str = itostr(p_buf, val);
346 	size_t len = (size_t)(p_buf+sizeof(p_buf)-str);
347 	force_assert(len <= buf_len);
348 	memcpy(buf, str, len);
349 	return len;
350 }
351 
352 size_t li_utostrn(char *buf, size_t buf_len, uintmax_t val) {
353 	char p_buf[LI_ITOSTRING_LENGTH];
354 	char* const str = utostr(p_buf, val);
355 	size_t len = (size_t)(p_buf+sizeof(p_buf)-str);
356 	force_assert(len <= buf_len);
357 	memcpy(buf, str, len);
358 	return len;
359 }
360 
361 #define li_ntox_lc(n) ((n) <= 9 ? (n) + '0' : (n) + 'a' - 10)
362 
363 /* c (char) and n (nibble) MUST be unsigned integer types */
364 #define li_cton(c,n) \
365   (((n) = (c) - '0') <= 9 || (((n) = ((c)&0xdf) - 'A') <= 5 ? ((n) += 10) : 0))
366 
367 /* converts hex char (0-9, A-Z, a-z) to decimal.
368  * returns 0xFF on invalid input.
369  */
370 char hex2int(unsigned char hex) {
371 	unsigned char n;
372 	return li_cton(hex,n) ? (char)n : 0xFF;
373 }
374 
375 int li_hex2bin (unsigned char * const bin, const size_t binlen, const char * const hexstr, const size_t len)
376 {
377     /* validate and transform 32-byte MD5 hex string to 16-byte binary MD5,
378      * or 64-byte SHA-256 or SHA-512-256 hex string to 32-byte binary digest */
379     if (len > (binlen << 1)) return -1;
380     for (int i = 0, ilen = (int)len; i < ilen; i+=2) {
381         int hi = hexstr[i];
382         int lo = hexstr[i+1];
383         if ('0' <= hi && hi <= '9')                    hi -= '0';
384         else if ((uint32_t)(hi |= 0x20)-'a' <= 'f'-'a')hi += -'a' + 10;
385         else                                           return -1;
386         if ('0' <= lo && lo <= '9')                    lo -= '0';
387         else if ((uint32_t)(lo |= 0x20)-'a' <= 'f'-'a')lo += -'a' + 10;
388         else                                           return -1;
389         bin[(i >> 1)] = (unsigned char)((hi << 4) | lo);
390     }
391     return 0;
392 }
393 
394 
395 __attribute_noinline__
396 int buffer_eq_icase_ssn(const char * const a, const char * const b, const size_t len) {
397     for (size_t i = 0; i < len; ++i) {
398         unsigned int ca = ((unsigned char *)a)[i];
399         unsigned int cb = ((unsigned char *)b)[i];
400         if (ca != cb) {
401             ca |= 0x20;
402             cb |= 0x20;
403             if (ca != cb) return 0;
404             if (!light_islower(ca)) return 0;
405             if (!light_islower(cb)) return 0;
406         }
407     }
408     return 1;
409 }
410 
411 int buffer_eq_icase_ss(const char * const a, const size_t alen, const char * const b, const size_t blen) {
412     /* 1 = equal; 0 = not equal */ /* short string sizes expected (< INT_MAX) */
413     return (alen == blen) ? buffer_eq_icase_ssn(a, b, blen) : 0;
414 }
415 
416 int buffer_eq_icase_slen(const buffer * const b, const char * const s, const size_t slen) {
417     /* Note: b must be initialized, i.e. 0 != b->used; uninitialized is not eq*/
418     /* 1 = equal; 0 = not equal */ /* short string sizes expected (< INT_MAX) */
419     return (b->used == slen + 1) ? buffer_eq_icase_ssn(b->ptr, s, slen) : 0;
420 }
421 
422 int buffer_eq_slen(const buffer * const b, const char * const s, const size_t slen) {
423     /* Note: b must be initialized, i.e. 0 != b->used; uninitialized is not eq*/
424     /* 1 = equal; 0 = not equal */ /* short string sizes expected (< INT_MAX) */
425     return (b->used == slen + 1 && 0 == memcmp(b->ptr, s, slen));
426 }
427 
428 
429 /**
430  * check if two buffer contain the same data
431  */
432 
433 int buffer_is_equal(const buffer *a, const buffer *b) {
434 	/* 1 = equal; 0 = not equal */
435 	return (a->used == b->used && 0 == memcmp(a->ptr, b->ptr, a->used));
436 }
437 
438 
439 void li_tohex_lc(char * const restrict buf, size_t buf_len, const char * const restrict s, size_t s_len) {
440 	force_assert(2 * s_len > s_len);
441 	force_assert(2 * s_len < buf_len);
442 
443 	for (size_t i = 0; i < s_len; ++i) {
444 		buf[2*i]   = hex_chars_lc[(s[i] >> 4) & 0x0F];
445 		buf[2*i+1] = hex_chars_lc[s[i] & 0x0F];
446 	}
447 	buf[2*s_len] = '\0';
448 }
449 
450 void li_tohex_uc(char * const restrict buf, size_t buf_len, const char * const restrict s, size_t s_len) {
451 	force_assert(2 * s_len > s_len);
452 	force_assert(2 * s_len < buf_len);
453 
454 	for (size_t i = 0; i < s_len; ++i) {
455 		buf[2*i]   = hex_chars_uc[(s[i] >> 4) & 0x0F];
456 		buf[2*i+1] = hex_chars_uc[s[i] & 0x0F];
457 	}
458 	buf[2*s_len] = '\0';
459 }
460 
461 
462 void buffer_substr_replace (buffer * const restrict b, const size_t offset,
463                             const size_t len, const buffer * const restrict replace)
464 {
465     const size_t blen = buffer_clen(b);
466     const size_t rlen = buffer_clen(replace);
467 
468     if (rlen > len) {
469         buffer_extend(b, blen-len+rlen);
470         memmove(b->ptr+offset+rlen, b->ptr+offset+len, blen-offset-len);
471     }
472 
473     memcpy(b->ptr+offset, replace->ptr, rlen);
474 
475     if (rlen < len) {
476         memmove(b->ptr+offset+rlen, b->ptr+offset+len, blen-offset-len);
477         buffer_truncate(b, blen-len+rlen);
478     }
479 }
480 
481 
482 void buffer_append_string_encoded_hex_lc(buffer * const restrict b, const char * const restrict s, size_t len) {
483     unsigned char * const p = (unsigned char *)buffer_extend(b, len*2);
484     for (size_t i = 0; i < len; ++i) {
485         p[(i<<1)]   = hex_chars_lc[(s[i] >> 4) & 0x0F];
486         p[(i<<1)+1] = hex_chars_lc[(s[i])      & 0x0F];
487     }
488 }
489 
490 void buffer_append_string_encoded_hex_uc(buffer * const restrict b, const char * const restrict s, size_t len) {
491     unsigned char * const p = (unsigned char *)buffer_extend(b, len*2);
492     for (size_t i = 0; i < len; ++i) {
493         p[(i<<1)]   = hex_chars_uc[(s[i] >> 4) & 0x0F];
494         p[(i<<1)+1] = hex_chars_uc[(s[i])      & 0x0F];
495     }
496 }
497 
498 
499 /* everything except: ! ( ) * - . 0-9 A-Z _ a-z */
500 static const char encoded_chars_rel_uri_part[] = {
501 	/*
502 	0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
503 	*/
504 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  00 -  0F control chars */
505 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  10 -  1F */
506 	1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1,  /*  20 -  2F space " # $ % & ' + , / */
507 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,  /*  30 -  3F : ; < = > ? */
508 	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  40 -  4F @ */
509 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,  /*  50 -  5F [ \ ] ^ */
510 	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  60 -  6F ` */
511 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,  /*  70 -  7F { | } DEL */
512 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  80 -  8F */
513 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  90 -  9F */
514 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  A0 -  AF */
515 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  B0 -  BF */
516 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  C0 -  CF */
517 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  D0 -  DF */
518 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  E0 -  EF */
519 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  F0 -  FF */
520 };
521 
522 /* everything except: ! ( ) * - . / 0-9 A-Z _ a-z */
523 static const char encoded_chars_rel_uri[] = {
524 	/*
525 	0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
526 	*/
527 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  00 -  0F control chars */
528 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  10 -  1F */
529 	1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0,  /*  20 -  2F space " # $ % & ' + , */
530 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,  /*  30 -  3F : ; < = > ? */
531 	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  40 -  4F @ */
532 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,  /*  50 -  5F [ \ ] ^ */
533 	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  60 -  6F ` */
534 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,  /*  70 -  7F { | } DEL */
535 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  80 -  8F */
536 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  90 -  9F */
537 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  A0 -  AF */
538 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  B0 -  BF */
539 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  C0 -  CF */
540 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  D0 -  DF */
541 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  E0 -  EF */
542 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  F0 -  FF */
543 };
544 
545 static const char encoded_chars_html[] = {
546 	/*
547 	0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
548 	*/
549 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  00 -  0F control chars */
550 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  10 -  1F */
551 	0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,  /*  20 -  2F " & ' */
552 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,  /*  30 -  3F < > */
553 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  40 -  4F */
554 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  50 -  5F */
555 	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  60 -  6F ` */
556 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,  /*  70 -  7F DEL */
557 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  80 -  8F */
558 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  90 -  9F */
559 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  A0 -  AF */
560 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  B0 -  BF */
561 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  C0 -  CF */
562 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  D0 -  DF */
563 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  E0 -  EF */
564 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  F0 -  FF */
565 };
566 
567 static const char encoded_chars_minimal_xml[] = {
568 	/*
569 	0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
570 	*/
571 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  00 -  0F control chars */
572 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  10 -  1F */
573 	0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,  /*  20 -  2F " & ' */
574 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,  /*  30 -  3F < > */
575 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  40 -  4F */
576 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  50 -  5F */
577 	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  60 -  6F ` */
578 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,  /*  70 -  7F DEL */
579 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  80 -  8F */
580 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  90 -  9F */
581 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  A0 -  AF */
582 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  B0 -  BF */
583 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  C0 -  CF */
584 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  D0 -  DF */
585 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  E0 -  EF */
586 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  F0 -  FF */
587 };
588 
589 
590 
591 void buffer_append_string_encoded(buffer * const restrict b, const char * const restrict s, size_t s_len, buffer_encoding_t encoding) {
592 	unsigned char *ds, *d;
593 	size_t d_len, ndx;
594 	const char *map = NULL;
595 
596 	switch(encoding) {
597 	case ENCODING_REL_URI:
598 		map = encoded_chars_rel_uri;
599 		break;
600 	case ENCODING_REL_URI_PART:
601 		map = encoded_chars_rel_uri_part;
602 		break;
603 	case ENCODING_HTML:
604 		map = encoded_chars_html;
605 		break;
606 	case ENCODING_MINIMAL_XML:
607 		map = encoded_chars_minimal_xml;
608 		break;
609 	}
610 
611 	/* count to-be-encoded-characters */
612 	for (ds = (unsigned char *)s, d_len = 0, ndx = 0; ndx < s_len; ds++, ndx++) {
613 		if (map[*ds & 0xFF]) {
614 			switch(encoding) {
615 			case ENCODING_REL_URI:
616 			case ENCODING_REL_URI_PART:
617 				d_len += 3;
618 				break;
619 			case ENCODING_HTML:
620 			case ENCODING_MINIMAL_XML:
621 				d_len += 6;
622 				break;
623 			}
624 		} else {
625 			d_len++;
626 		}
627 	}
628 
629 	d = (unsigned char*) buffer_extend(b, d_len);
630 
631 	if (d_len == s_len) { /*(short-circuit; nothing to encoded)*/
632 		memcpy(d, s, s_len);
633 		return;
634 	}
635 
636 	for (ds = (unsigned char *)s, d_len = 0, ndx = 0; ndx < s_len; ds++, ndx++) {
637 		if (map[*ds & 0xFF]) {
638 			switch(encoding) {
639 			case ENCODING_REL_URI:
640 			case ENCODING_REL_URI_PART:
641 				d[d_len++] = '%';
642 				d[d_len++] = hex_chars_uc[((*ds) >> 4) & 0x0F];
643 				d[d_len++] = hex_chars_uc[(*ds) & 0x0F];
644 				break;
645 			case ENCODING_HTML:
646 			case ENCODING_MINIMAL_XML:
647 				d[d_len++] = '&';
648 				d[d_len++] = '#';
649 				d[d_len++] = 'x';
650 				d[d_len++] = hex_chars_uc[((*ds) >> 4) & 0x0F];
651 				d[d_len++] = hex_chars_uc[(*ds) & 0x0F];
652 				d[d_len++] = ';';
653 				break;
654 			}
655 		} else {
656 			d[d_len++] = *ds;
657 		}
658 	}
659 }
660 
661 void buffer_append_string_c_escaped(buffer * const restrict b, const char * const restrict s, size_t s_len) {
662 	unsigned char *ds, *d;
663 	size_t d_len, ndx;
664 
665 	/* count to-be-encoded-characters */
666 	for (ds = (unsigned char *)s, d_len = 0, ndx = 0; ndx < s_len; ds++, ndx++) {
667 		if (__builtin_expect( (*ds >= ' ' && *ds <= '~'), 1))
668 			d_len++;
669 		else { /* CTLs or non-ASCII characters */
670 			switch (*ds) {
671 			case '\t':
672 			case '\r':
673 			case '\n':
674 				d_len += 2;
675 				break;
676 			default:
677 				d_len += 4; /* \xCC */
678 				break;
679 			}
680 		}
681 	}
682 
683 	d = (unsigned char*) buffer_extend(b, d_len);
684 
685 	if (d_len == s_len) { /*(short-circuit; nothing to encoded)*/
686 		memcpy(d, s, s_len);
687 		return;
688 	}
689 
690 	for (ds = (unsigned char *)s, d_len = 0, ndx = 0; ndx < s_len; ds++, ndx++) {
691 		if (__builtin_expect( (*ds >= ' ' && *ds <= '~'), 1))
692 			d[d_len++] = *ds;
693 		else { /* CTLs or non-ASCII characters */
694 			d[d_len++] = '\\';
695 			switch (*ds) {
696 			case '\t':
697 				d[d_len++] = 't';
698 				break;
699 			case '\r':
700 				d[d_len++] = 'r';
701 				break;
702 			case '\n':
703 				d[d_len++] = 'n';
704 				break;
705 			default:
706 				d[d_len++] = 'x';
707 				d[d_len++] = hex_chars_lc[(*ds) >> 4];
708 				d[d_len++] = hex_chars_lc[(*ds) & 0x0F];
709 				break;
710 			}
711 		}
712 	}
713 }
714 
715 
716 void
717 buffer_append_bs_escaped (buffer * const restrict b,
718                           const char * restrict s, const size_t len)
719 {
720     /* replaces non-printable chars with escaped string
721      * default: \xHH where HH is the hex representation of the byte
722      * exceptions: " => \", \ => \\, whitespace chars => \n \t etc. */
723     /* Intended for use escaping string to be surrounded by double-quotes */
724     /* Performs single pass over string and is optimized for ASCII;
725      * non-ASCII escaping might be slightly sped up by walking input twice,
726      * first to calculate escaped length and extend the destination b, and
727      * second to do the escaping. (This non-ASCII optim is not done here) */
728     buffer_string_prepare_append(b, len);
729     for (const char * const end = s+len; s < end; ++s) {
730         unsigned int c;
731         const char * const ptr = s;
732         do {
733             c = *(const unsigned char *)s;
734         } while (c >= ' ' && c <= '~' && c != '"' && c != '\\' && ++s < end);
735         if (s - ptr) buffer_append_string_len(b, ptr, s - ptr);
736 
737         if (s == end)
738             return;
739 
740         /* ('\a', '\v' shortcuts are technically not json-escaping) */
741         /* ('\0' is also omitted due to the possibility of string corruption if
742          *  the receiver supports decoding octal escapes (\000) and the escaped
743          *  string contains \0 followed by two digits not part of escaping)*/
744 
745         char *d;
746         switch (c) {
747           case '\a':case '\b':case '\t':case '\n':case '\v':case '\f':case '\r':
748             c = "0000000abtnvfr"[c];
749             __attribute_fallthrough__
750           case '"': case '\\':
751             d = buffer_extend(b, 2);
752             d[0] = '\\';
753             d[1] = c;
754             break;
755           default:
756             /* non printable char => \xHH */
757             d = buffer_extend(b, 4);
758             d[0] = '\\';
759             d[1] = 'x';
760             d[2] = hex_chars_uc[c >> 4];
761             d[3] = hex_chars_uc[c & 0xF];
762             break;
763         }
764     }
765 }
766 
767 
768 void
769 buffer_append_bs_escaped_json (buffer * const restrict b,
770                                const char * restrict s, const size_t len)
771 {
772     /* replaces non-printable chars with escaped string
773      * json: \u00HH where HH is the hex representation of the byte
774      * exceptions: " => \", \ => \\, whitespace chars => \n \t etc. */
775     /* Intended for use escaping string to be surrounded by double-quotes */
776     buffer_string_prepare_append(b, len);
777     for (const char * const end = s+len; s < end; ++s) {
778         unsigned int c;
779         const char * const ptr = s;
780         do {
781             c = *(const unsigned char *)s;
782         } while (c >= ' ' && c != '"' && c != '\\' && ++s < end);
783         if (s - ptr) buffer_append_string_len(b, ptr, s - ptr);
784 
785         if (s == end)
786             return;
787 
788         /* ('\a', '\v' shortcuts are technically not json-escaping) */
789         /* ('\0' is also omitted due to the possibility of string corruption if
790          *  the receiver supports decoding octal escapes (\000) and the escaped
791          *  string contains \0 followed by two digits not part of escaping)*/
792 
793         char *d;
794         switch (c) {
795           case '\a':case '\b':case '\t':case '\n':case '\v':case '\f':case '\r':
796             c = "0000000abtnvfr"[c];
797             __attribute_fallthrough__
798           case '"': case '\\':
799             d = buffer_extend(b, 2);
800             d[0] = '\\';
801             d[1] = c;
802             break;
803           default:
804             d = buffer_extend(b, 6);
805             d[0] = '\\';
806             d[1] = 'u';
807             d[2] = '0';
808             d[3] = '0';
809             d[4] = hex_chars_uc[c >> 4];
810             d[5] = hex_chars_uc[c & 0xF];
811             break;
812         }
813     }
814 }
815 
816 
817 /* decodes url-special-chars inplace.
818  * replaces non-printable characters with '_'
819  * (If this is used on a portion of query string, then query string should be
820  *  split on '&', and '+' replaced with ' ' before calling this routine)
821  */
822 
823 void buffer_urldecode_path(buffer * const b) {
824     const size_t len = buffer_clen(b);
825     char *src = len ? memchr(b->ptr, '%', len) : NULL;
826     if (NULL == src) return;
827 
828     char *dst = src;
829     do {
830         /* *src == '%' */
831         unsigned char high = ((unsigned char *)src)[1];
832         unsigned char low = high ? hex2int(((unsigned char *)src)[2]) : 0xFF;
833         if (0xFF != (high = hex2int(high)) && 0xFF != low) {
834             high = (high << 4) | low;   /* map ctrls to '_' */
835             *dst = (high >= 32 && high != 127) ? high : '_';
836             src += 2;
837         } /* else ignore this '%'; leave as-is and move on */
838 
839         while ((*++dst = *++src) != '%' && *src) ;
840     } while (*src);
841     b->used = (dst - b->ptr) + 1;
842 }
843 
844 int buffer_is_valid_UTF8(const buffer *b) {
845     /* https://www.w3.org/International/questions/qa-forms-utf-8 */
846     /*assert(b->used);*//*(b->ptr must exist and be '\0'-terminated)*/
847     const unsigned char *c = (unsigned char *)b->ptr;
848     while (*c) {
849 
850         /*(note: includes ctrls)*/
851         if (                         c[0] <  0x80 ) { ++c;  continue; }
852 
853         if (         0xc2 <= c[0] && c[0] <= 0xdf
854             &&       0x80 <= c[1] && c[1] <= 0xbf ) { c+=2; continue; }
855 
856         if ( (   (   0xe0 == c[0]
857                   && 0xa0 <= c[1] && c[1] <= 0xbf)
858               || (   0xe1 <= c[0] && c[0] <= 0xef && c[0] != 0xed
859                   && 0x80 <= c[1] && c[1] <= 0xbf)
860               || (   0xed == c[0]
861                   && 0x80 <= c[1] && c[1] <= 0x9f)   )
862             &&       0x80 <= c[2] && c[2] <= 0xbf ) { c+=3; continue; }
863 
864         if ( (   (   0xf0 == c[0]
865                   && 0x90 <= c[1] && c[1] <= 0xbf)
866               || (   0xf1 <= c[0] && c[0] <= 0xf3
867                   && 0x80 <= c[1] && c[1] <= 0xbf)
868               || (   0xf4 == c[0]
869                   && 0x80 <= c[1] && c[1] <= 0x8f)   )
870             &&       0x80 <= c[2] && c[2] <= 0xbf
871             &&       0x80 <= c[3] && c[3] <= 0xbf ) { c+=4; continue; }
872 
873         return 0; /* invalid */
874     }
875     return 1; /* valid */
876 }
877 
878 /* - special case: empty string returns empty string
879  * - on windows or cygwin: replace \ with /
880  * - strip leading spaces
881  * - prepends "/" if not present already
882  * - resolve "/../", "//" and "/./" the usual way:
883  *   the first one removes a preceding component, the other two
884  *   get compressed to "/".
885  * - "/." and "/.." at the end are similar, but always leave a trailing
886  *   "/"
887  *
888  * /blah/..         gets  /
889  * /blah/../foo     gets  /foo
890  * /abc/./xyz       gets  /abc/xyz
891  * /abc//xyz        gets  /abc/xyz
892  */
893 
894 void buffer_path_simplify(buffer *b)
895 {
896     char *out = b->ptr;
897     char * const end = b->ptr + b->used - 1;
898 
899     if (__builtin_expect( (buffer_is_blank(b)), 0)) {
900         buffer_blank(b);
901         return;
902     }
903 
904   #if defined(__WIN32) || defined(__CYGWIN__)
905     /* cygwin is treating \ and / the same, so we have to that too */
906     for (char *p = b->ptr; *p; p++) {
907         if (*p == '\\') *p = '/';
908     }
909   #endif
910 
911     *end = '/'; /*(end of path modified to avoid need to check '\0')*/
912 
913     char *walk = out;
914     if (__builtin_expect( (*walk == '/'), 1)) {
915         /* scan to detect (potential) need for path simplification
916          * (repeated '/' or "/.") */
917         do {
918             if (*++walk == '.' || *walk == '/')
919                 break;
920             do { ++walk; } while (*walk != '/');
921         } while (walk != end);
922         if (__builtin_expect( (walk == end), 1)) {
923             /* common case: no repeated '/' or "/." */
924             *end = '\0'; /* overwrite extra '/' added to end of path */
925             return;
926         }
927         out = walk-1;
928     }
929     else {
930         if (walk[0] == '.' && walk[1] == '/')
931             *out = *++walk;
932         else if (walk[0] == '.' && walk[1] == '.' && walk[2] == '/')
933             *out = *(walk += 2);
934         else {
935             while (*++walk != '/') ;
936             out = walk;
937         }
938         ++walk;
939     }
940 
941     while (walk <= end) {
942         /* previous char is '/' at this point (or start of string w/o '/') */
943         if (__builtin_expect( (walk[0] == '/'), 0)) {
944             /* skip repeated '/' (e.g. "///" -> "/") */
945             if (++walk < end)
946                 continue;
947             else {
948                 ++out;
949                 break;
950             }
951         }
952         else if (__builtin_expect( (walk[0] == '.'), 0)) {
953             /* handle "./" and "../" */
954             if (walk[1] == '.' && walk[2] == '/') {
955                 /* handle "../" */
956                 while (out > b->ptr && *--out != '/') ;
957                 *out = '/'; /*(in case path had not started with '/')*/
958                 if ((walk += 3) >= end) {
959                     ++out;
960                     break;
961                 }
962                 else
963                 continue;
964             }
965             else if (walk[1] == '/') {
966                 /* handle "./" */
967                 if ((walk += 2) >= end) {
968                     ++out;
969                     break;
970                 }
971                 continue;
972             }
973             else {
974                 /* accept "." if not part of "../" or "./" */
975                 *++out = '.';
976                 ++walk;
977             }
978         }
979 
980         while ((*++out = *walk++) != '/') ;
981     }
982     *out = *end = '\0'; /* overwrite extra '/' added to end of path */
983     b->used = (out - b->ptr) + 1;
984     /*buffer_truncate(b, out - b->ptr);*/
985 }
986 
987 void buffer_to_lower(buffer * const b) {
988     unsigned char * const restrict s = (unsigned char *)b->ptr;
989     const uint_fast32_t used = b->used;
990     for (uint_fast32_t i = 0; i < used; ++i) {
991         if (light_isupper(s[i])) s[i] |= 0x20;
992     }
993 }
994 
995 
996 void buffer_to_upper(buffer * const b) {
997     unsigned char * const restrict s = (unsigned char *)b->ptr;
998     const uint_fast32_t used = b->used;
999     for (uint_fast32_t i = 0; i < used; ++i) {
1000         if (light_islower(s[i])) s[i] &= 0xdf;
1001     }
1002 }
1003