xref: /lighttpd1.4/src/buffer.c (revision f98cc674)
1 #include "first.h"
2 
3 #include "buffer.h"
4 
5 #include <stdlib.h>
6 #include <string.h>
7 #include "sys-time.h"   /* strftime() */
8 
9 static const char hex_chars_lc[] = "0123456789abcdef";
10 static const char hex_chars_uc[] = "0123456789ABCDEF";
11 
12 
13 __attribute_noinline__
14 buffer* buffer_init(void) {
15 	buffer * const b = calloc(1, sizeof(*b));
16 	force_assert(b);
17 	return b;
18 }
19 
20 void buffer_free(buffer *b) {
21 	if (NULL == b) return;
22 	free(b->ptr);
23 	free(b);
24 }
25 
26 void buffer_free_ptr(buffer *b) {
27 	free(b->ptr);
28 	b->ptr = NULL;
29 	b->used = 0;
30 	b->size = 0;
31 }
32 
33 void buffer_move(buffer * restrict b, buffer * restrict src) {
34 	buffer tmp;
35 	buffer_clear(b);
36 	tmp = *src; *src = *b; *b = tmp;
37 }
38 
39 /* make sure buffer is at least "size" big + 1 for '\0'. keep old data */
40 __attribute_cold__
41 __attribute_noinline__
42 __attribute_nonnull__()
43 __attribute_returns_nonnull__
44 static char* buffer_realloc(buffer * const restrict b, const size_t len) {
45     #define BUFFER_PIECE_SIZE 64uL  /*(must be power-of-2)*/
46     size_t sz = (len + 1 + BUFFER_PIECE_SIZE-1) & ~(BUFFER_PIECE_SIZE-1);
47     force_assert(sz > len);
48     if ((sz & (sz-1)) && sz < INT_MAX) {/* not power-2; huge val not expected */
49         /*(optimizer should recognize this and use ffs or clz or equivalent)*/
50         const size_t psz = sz;
51         for (sz = 256; sz < psz; sz <<= 1) ;
52     }
53     sz |= 1; /*(extra +1 for '\0' when needed buffer size is exact power-2)*/
54 
55     b->size = sz;
56     b->ptr = realloc(b->ptr, sz);
57 
58     force_assert(NULL != b->ptr);
59     return b->ptr;
60 }
61 
62 __attribute_cold__
63 __attribute_noinline__
64 __attribute_nonnull__()
65 __attribute_returns_nonnull__
66 static char* buffer_alloc_replace(buffer * const restrict b, const size_t size) {
67     /*(discard old data so realloc() does not copy)*/
68     if (NULL != b->ptr) {
69         free(b->ptr);
70         b->ptr = NULL;
71     }
72     /*(note: if size larger than one lshift, use size instead of power-2)*/
73     const size_t bsize2x = (b->size & ~1uL) << 1;
74     return buffer_realloc(b, bsize2x > size ? bsize2x-1 : size);
75 }
76 
77 char* buffer_string_prepare_copy(buffer * const b, const size_t size) {
78     b->used = 0;
79   #ifdef __COVERITY__ /*(b->ptr is not NULL if b->size is not 0)*/
80     force_assert(size >= b->size || b->ptr);
81   #endif
82     return (size < b->size)
83       ? b->ptr
84       : buffer_alloc_replace(b, size);
85 }
86 
87 __attribute_cold__
88 __attribute_noinline__
89 __attribute_nonnull__()
90 __attribute_returns_nonnull__
91 static char* buffer_string_prepare_append_resize(buffer * const restrict b, const size_t size) {
92     if (b->used < 2) {  /* buffer_is_blank(b) */
93         char * const s = buffer_string_prepare_copy(b, size);
94         *s = '\0'; /*(for case (1 == b->used))*/
95         return s;
96     }
97 
98     /* not empty, b->used already includes a terminating 0 */
99     /*(note: if size larger than one lshift, use size instead of power-2)*/
100     const size_t bsize2x = (b->size & ~1uL) << 1;
101     const size_t req_size = (bsize2x - b->used > size)
102       ? bsize2x-1
103       : b->used + size;
104 
105     /* check for overflow: unsigned overflow is defined to wrap around */
106     force_assert(req_size >= b->used);
107 
108     return buffer_realloc(b, req_size) + b->used - 1;
109 }
110 
111 char* buffer_string_prepare_append(buffer * const b, const size_t size) {
112     const uint32_t len = b->used ? b->used-1 : 0;
113     return (b->size - len >= size + 1)
114       ? b->ptr + len
115       : buffer_string_prepare_append_resize(b, size);
116 }
117 
118 /*(prefer smaller code than inlining buffer_extend in many places in buffer.c)*/
119 __attribute_noinline__
120 char*
121 buffer_extend (buffer * const b, const size_t x)
122 {
123     /* extend buffer to append x (reallocate by power-2 (or larger), if needed)
124      * (combine buffer_string_prepare_append() and buffer_commit())
125      * (future: might make buffer.h static inline func for HTTP/1.1 performance)
126      * pre-sets '\0' byte and b->used (unlike buffer_string_prepare_append())*/
127   #if 0
128     char * const s = buffer_string_prepare_append(b, x);
129     b->used += x + (0 == b->used);
130   #else
131     const uint32_t len = b->used ? b->used-1 : 0;
132     char * const s = (b->size - len >= x + 1)
133       ? b->ptr + len
134       : buffer_string_prepare_append_resize(b, x);
135     b->used = len+x+1;
136   #endif
137     s[x] = '\0';
138     return s;
139 }
140 
141 void buffer_commit(buffer *b, size_t size)
142 {
143 	size_t sz = b->used;
144 	if (0 == sz) sz = 1;
145 
146 	if (size > 0) {
147 		/* check for overflow: unsigned overflow is defined to wrap around */
148 		sz += size;
149 		force_assert(sz > size);
150 	}
151 
152 	b->used = sz;
153 	b->ptr[sz - 1] = '\0';
154 }
155 
156 __attribute_cold__ /*(reduce code size due to inlining)*/
157 void buffer_copy_string(buffer * restrict b, const char * restrict s) {
158     if (__builtin_expect( (NULL == s), 0)) s = "";
159     buffer_copy_string_len(b, s, strlen(s));
160 }
161 
162 void buffer_copy_string_len(buffer * const restrict b, const char * const restrict s, const size_t len) {
163     b->used = len + 1;
164     char * const restrict d = (len < b->size)
165       ? b->ptr
166       : buffer_alloc_replace(b, len);
167     d[len] = '\0';
168     memcpy(d, s, len);
169 }
170 
171 __attribute_cold__ /*(reduce code size due to inlining)*/
172 void buffer_append_string(buffer * restrict b, const char * restrict s) {
173     if (__builtin_expect( (NULL == s), 0)) s = "";
174     buffer_append_string_len(b, s, strlen(s));
175 }
176 
177 /**
178  * append a string to the end of the buffer
179  *
180  * the resulting buffer is terminated with a '\0'
181  * s is treated as a un-terminated string (a \0 is handled a normal character)
182  *
183  * @param b a buffer
184  * @param s the string
185  * @param s_len size of the string (without the terminating \0)
186  */
187 
188 void buffer_append_string_len(buffer * const restrict b, const char * const restrict s, const size_t len) {
189     memcpy(buffer_extend(b, len), s, len);
190 }
191 
192 void buffer_append_str2(buffer * const restrict b, const char * const s1, const size_t len1, const char * const s2, const size_t len2) {
193     char * const restrict s = buffer_extend(b, len1+len2);
194   #ifdef HAVE_MEMPCPY
195     mempcpy(mempcpy(s, s1, len1), s2, len2);
196   #else
197     memcpy(s,      s1, len1);
198     memcpy(s+len1, s2, len2);
199   #endif
200 }
201 
202 void buffer_append_str3(buffer * const restrict b, const char * const s1, const size_t len1, const char * const s2, const size_t len2, const char * const s3, const size_t len3) {
203     char * restrict s = buffer_extend(b, len1+len2+len3);
204   #ifdef HAVE_MEMPCPY
205     mempcpy(mempcpy(mempcpy(s, s1, len1), s2, len2), s3, len3);
206   #else
207     memcpy(s,         s1, len1);
208     memcpy((s+=len1), s2, len2);
209     memcpy((s+=len2), s3, len3);
210   #endif
211 }
212 
213 void buffer_append_iovec(buffer * const restrict b, const struct const_iovec * const iov, const size_t n) {
214     size_t len = 0;
215     for (size_t i = 0; i < n; ++i)
216         len += iov[i].iov_len;
217     char *s = buffer_extend(b, len);
218     for (size_t i = 0; i < n; ++i) {
219         if (0 == iov[i].iov_len) continue;
220       #ifdef HAVE_MEMPCPY
221         s = mempcpy(s, iov[i].iov_base, iov[i].iov_len);
222       #else
223         memcpy(s, iov[i].iov_base, iov[i].iov_len);
224         s += iov[i].iov_len;
225       #endif
226     }
227 }
228 
229 void buffer_append_path_len(buffer * restrict b, const char * restrict a, size_t alen) {
230     char * restrict s = buffer_string_prepare_append(b, alen+1);
231     const int aslash = (alen && a[0] == '/');
232     if (b->used > 1 && s[-1] == '/') {
233         if (aslash) {
234             ++a;
235             --alen;
236         }
237     }
238     else {
239         if (0 == b->used) b->used = 1;
240         if (!aslash) {
241             *s++ = '/';
242             ++b->used;
243         }
244     }
245     b->used += alen;
246     s[alen] = '\0';
247     memcpy(s, a, alen);
248 }
249 
250 void
251 buffer_copy_path_len2 (buffer * const restrict b, const char * const restrict s1, size_t len1, const char * const restrict s2, size_t len2)
252 {
253     /*(similar to buffer_copy_string_len(b, s1, len1) but combined allocation)*/
254     memcpy(buffer_string_prepare_copy(b, len1+len2+1), s1, len1);
255     b->used = len1 + 1;                    /*('\0' byte will be written below)*/
256 
257     buffer_append_path_len(b, s2, len2);/*(choice: not inlined, special-cased)*/
258 }
259 
260 void
261 buffer_copy_string_len_lc (buffer * const restrict b, const char * const restrict s, const size_t len)
262 {
263     char * const restrict d = buffer_string_prepare_copy(b, len);
264     b->used = len+1;
265     d[len] = '\0';
266     for (size_t i = 0; i < len; ++i)
267         d[i] = (!light_isupper(s[i])) ? s[i] : s[i] | 0x20;
268 }
269 
270 void buffer_append_uint_hex_lc(buffer *b, uintmax_t value) {
271 	char *buf;
272 	unsigned int shift = 0;
273 
274 	{
275 		uintmax_t copy = value;
276 		do {
277 			copy >>= 8;
278 			shift += 8; /* counting bits */
279 		} while (0 != copy);
280 	}
281 
282 	buf = buffer_extend(b, shift >> 2); /*nibbles (4 bits)*/
283 
284 	while (shift > 0) {
285 		shift -= 4;
286 		*(buf++) = hex_chars_lc[(value >> shift) & 0x0F];
287 	}
288 }
289 
290 __attribute_nonnull__()
291 __attribute_returns_nonnull__
292 static char* utostr(char buf[LI_ITOSTRING_LENGTH], uintmax_t val) {
293 	char *cur = buf+LI_ITOSTRING_LENGTH;
294 	uintmax_t x;
295 	do {
296 		*(--cur) = (char) ('0' + (int)(val - (x = val/10) * 10));
297 	} while (0 != (val = x));           /* val % 10 */
298 	return cur;
299 }
300 
301 __attribute_nonnull__()
302 __attribute_returns_nonnull__
303 static char* itostr(char buf[LI_ITOSTRING_LENGTH], intmax_t val) {
304 	/* absolute value not defined for INTMAX_MIN, but can take absolute
305 	 * value of any negative number via twos complement cast to unsigned.
306 	 * negative sign is prepended after (now unsigned) value is converted
307 	 * to string */
308 	uintmax_t uval = val >= 0 ? (uintmax_t)val : ((uintmax_t)~val) + 1;
309 	char *cur = utostr(buf, uval);
310 	if (val < 0) *(--cur) = '-';
311 
312 	return cur;
313 }
314 
315 void buffer_append_int(buffer *b, intmax_t val) {
316 	char buf[LI_ITOSTRING_LENGTH];
317 	const char * const str = itostr(buf, val);
318 	buffer_append_string_len(b, str, buf+sizeof(buf) - str);
319 }
320 
321 void buffer_append_strftime(buffer * const restrict b, const char * const restrict format, const struct tm * const restrict tm) {
322     /*(localtime_r() or gmtime_r() producing tm should not have failed)*/
323     if (__builtin_expect( (NULL == tm), 0)) return;
324 
325     /*(expecting typical format strings to result in < 64 bytes needed;
326      * skipping buffer_string_space() calculation and providing fixed size)*/
327     size_t rv = strftime(buffer_string_prepare_append(b, 63), 64, format, tm);
328 
329     /* 0 (in some apis) signals the string may have been too small;
330      * but the format could also just have lead to an empty string */
331     if (__builtin_expect( (0 == rv), 0) || __builtin_expect( (rv > 63), 0)) {
332         /* unexpected; give it a second try with a larger string */
333         rv = strftime(buffer_string_prepare_append(b, 4095), 4096, format, tm);
334         if (__builtin_expect( (rv > 4095), 0))/*(input format was ridiculous)*/
335             return;
336     }
337 
338     /*buffer_commit(b, rv);*/
339     b->used += (uint32_t)rv + (0 == b->used);
340 }
341 
342 
343 size_t li_itostrn(char *buf, size_t buf_len, intmax_t val) {
344 	char p_buf[LI_ITOSTRING_LENGTH];
345 	char* const str = itostr(p_buf, val);
346 	size_t len = (size_t)(p_buf+sizeof(p_buf)-str);
347 	force_assert(len <= buf_len);
348 	memcpy(buf, str, len);
349 	return len;
350 }
351 
352 size_t li_utostrn(char *buf, size_t buf_len, uintmax_t val) {
353 	char p_buf[LI_ITOSTRING_LENGTH];
354 	char* const str = utostr(p_buf, val);
355 	size_t len = (size_t)(p_buf+sizeof(p_buf)-str);
356 	force_assert(len <= buf_len);
357 	memcpy(buf, str, len);
358 	return len;
359 }
360 
361 #define li_ntox_lc(n) ((n) <= 9 ? (n) + '0' : (n) + 'a' - 10)
362 
363 /* c (char) and n (nibble) MUST be unsigned integer types */
364 #define li_cton(c,n) \
365   (((n) = (c) - '0') <= 9 || (((n) = ((c)&0xdf) - 'A') <= 5 ? ((n) += 10) : 0))
366 
367 /* converts hex char (0-9, A-Z, a-z) to decimal.
368  * returns 0xFF on invalid input.
369  */
370 char hex2int(unsigned char hex) {
371 	unsigned char n;
372 	return li_cton(hex,n) ? (char)n : 0xFF;
373 }
374 
375 int li_hex2bin (unsigned char * const bin, const size_t binlen, const char * const hexstr, const size_t len)
376 {
377     /* validate and transform 32-byte MD5 hex string to 16-byte binary MD5,
378      * or 64-byte SHA-256 or SHA-512-256 hex string to 32-byte binary digest */
379     if (len > (binlen << 1)) return -1;
380     for (int i = 0, ilen = (int)len; i < ilen; i+=2) {
381         int hi = hexstr[i];
382         int lo = hexstr[i+1];
383         if ('0' <= hi && hi <= '9')                    hi -= '0';
384         else if ((uint32_t)(hi |= 0x20)-'a' <= 'f'-'a')hi += -'a' + 10;
385         else                                           return -1;
386         if ('0' <= lo && lo <= '9')                    lo -= '0';
387         else if ((uint32_t)(lo |= 0x20)-'a' <= 'f'-'a')lo += -'a' + 10;
388         else                                           return -1;
389         bin[(i >> 1)] = (unsigned char)((hi << 4) | lo);
390     }
391     return 0;
392 }
393 
394 
395 __attribute_noinline__
396 int buffer_eq_icase_ssn(const char * const a, const char * const b, const size_t len) {
397     for (size_t i = 0; i < len; ++i) {
398         unsigned int ca = ((unsigned char *)a)[i];
399         unsigned int cb = ((unsigned char *)b)[i];
400         if (ca != cb && ((ca ^ cb) != 0x20 || !light_isalpha(ca))) return 0;
401     }
402     return 1;
403 }
404 
405 int buffer_eq_icase_ss(const char * const a, const size_t alen, const char * const b, const size_t blen) {
406     /* 1 = equal; 0 = not equal */ /* short string sizes expected (< INT_MAX) */
407     return (alen == blen) ? buffer_eq_icase_ssn(a, b, blen) : 0;
408 }
409 
410 int buffer_eq_icase_slen(const buffer * const b, const char * const s, const size_t slen) {
411     /* Note: b must be initialized, i.e. 0 != b->used; uninitialized is not eq*/
412     /* 1 = equal; 0 = not equal */ /* short string sizes expected (< INT_MAX) */
413     return (b->used == slen + 1) ? buffer_eq_icase_ssn(b->ptr, s, slen) : 0;
414 }
415 
416 int buffer_eq_slen(const buffer * const b, const char * const s, const size_t slen) {
417     /* Note: b must be initialized, i.e. 0 != b->used; uninitialized is not eq*/
418     /* 1 = equal; 0 = not equal */ /* short string sizes expected (< INT_MAX) */
419     return (b->used == slen + 1 && 0 == memcmp(b->ptr, s, slen));
420 }
421 
422 
423 /**
424  * check if two buffer contain the same data
425  */
426 
427 int buffer_is_equal(const buffer *a, const buffer *b) {
428 	/* 1 = equal; 0 = not equal */
429 	return (a->used == b->used && 0 == memcmp(a->ptr, b->ptr, a->used));
430 }
431 
432 
433 void li_tohex_lc(char * const restrict buf, size_t buf_len, const char * const restrict s, size_t s_len) {
434 	force_assert(2 * s_len > s_len);
435 	force_assert(2 * s_len < buf_len);
436 
437 	for (size_t i = 0; i < s_len; ++i) {
438 		buf[2*i]   = hex_chars_lc[(s[i] >> 4) & 0x0F];
439 		buf[2*i+1] = hex_chars_lc[s[i] & 0x0F];
440 	}
441 	buf[2*s_len] = '\0';
442 }
443 
444 void li_tohex_uc(char * const restrict buf, size_t buf_len, const char * const restrict s, size_t s_len) {
445 	force_assert(2 * s_len > s_len);
446 	force_assert(2 * s_len < buf_len);
447 
448 	for (size_t i = 0; i < s_len; ++i) {
449 		buf[2*i]   = hex_chars_uc[(s[i] >> 4) & 0x0F];
450 		buf[2*i+1] = hex_chars_uc[s[i] & 0x0F];
451 	}
452 	buf[2*s_len] = '\0';
453 }
454 
455 
456 void buffer_substr_replace (buffer * const restrict b, const size_t offset,
457                             const size_t len, const buffer * const restrict replace)
458 {
459     const size_t blen = buffer_clen(b);
460     const size_t rlen = buffer_clen(replace);
461 
462     if (rlen > len) {
463         buffer_extend(b, rlen-len);
464         memmove(b->ptr+offset+rlen, b->ptr+offset+len, blen-offset-len);
465     }
466 
467     memcpy(b->ptr+offset, replace->ptr, rlen);
468 
469     if (rlen < len) {
470         memmove(b->ptr+offset+rlen, b->ptr+offset+len, blen-offset-len);
471         buffer_truncate(b, blen-len+rlen);
472     }
473 }
474 
475 
476 void buffer_append_string_encoded_hex_lc(buffer * const restrict b, const char * const restrict s, size_t len) {
477     unsigned char * const p = (unsigned char *)buffer_extend(b, len*2);
478     for (size_t i = 0; i < len; ++i) {
479         p[(i<<1)]   = hex_chars_lc[(s[i] >> 4) & 0x0F];
480         p[(i<<1)+1] = hex_chars_lc[(s[i])      & 0x0F];
481     }
482 }
483 
484 void buffer_append_string_encoded_hex_uc(buffer * const restrict b, const char * const restrict s, size_t len) {
485     unsigned char * const p = (unsigned char *)buffer_extend(b, len*2);
486     for (size_t i = 0; i < len; ++i) {
487         p[(i<<1)]   = hex_chars_uc[(s[i] >> 4) & 0x0F];
488         p[(i<<1)+1] = hex_chars_uc[(s[i])      & 0x0F];
489     }
490 }
491 
492 
493 /* everything except: ! ( ) * - . 0-9 A-Z _ a-z */
494 static const char encoded_chars_rel_uri_part[] = {
495 	/*
496 	0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
497 	*/
498 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  00 -  0F control chars */
499 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  10 -  1F */
500 	1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1,  /*  20 -  2F space " # $ % & ' + , / */
501 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,  /*  30 -  3F : ; < = > ? */
502 	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  40 -  4F @ */
503 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,  /*  50 -  5F [ \ ] ^ */
504 	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  60 -  6F ` */
505 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,  /*  70 -  7F { | } DEL */
506 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  80 -  8F */
507 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  90 -  9F */
508 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  A0 -  AF */
509 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  B0 -  BF */
510 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  C0 -  CF */
511 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  D0 -  DF */
512 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  E0 -  EF */
513 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  F0 -  FF */
514 };
515 
516 /* everything except: ! ( ) * - . / 0-9 A-Z _ a-z */
517 static const char encoded_chars_rel_uri[] = {
518 	/*
519 	0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
520 	*/
521 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  00 -  0F control chars */
522 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  10 -  1F */
523 	1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0,  /*  20 -  2F space " # $ % & ' + , */
524 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,  /*  30 -  3F : ; < = > ? */
525 	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  40 -  4F @ */
526 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,  /*  50 -  5F [ \ ] ^ */
527 	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  60 -  6F ` */
528 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,  /*  70 -  7F { | } DEL */
529 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  80 -  8F */
530 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  90 -  9F */
531 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  A0 -  AF */
532 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  B0 -  BF */
533 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  C0 -  CF */
534 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  D0 -  DF */
535 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  E0 -  EF */
536 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  F0 -  FF */
537 };
538 
539 static const char encoded_chars_html[] = {
540 	/*
541 	0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
542 	*/
543 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  00 -  0F control chars */
544 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  10 -  1F */
545 	0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,  /*  20 -  2F " & ' */
546 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,  /*  30 -  3F < > */
547 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  40 -  4F */
548 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  50 -  5F */
549 	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  60 -  6F ` */
550 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,  /*  70 -  7F DEL */
551 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  80 -  8F */
552 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  90 -  9F */
553 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  A0 -  AF */
554 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  B0 -  BF */
555 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  C0 -  CF */
556 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  D0 -  DF */
557 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  E0 -  EF */
558 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  F0 -  FF */
559 };
560 
561 static const char encoded_chars_minimal_xml[] = {
562 	/*
563 	0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F
564 	*/
565 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  00 -  0F control chars */
566 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  /*  10 -  1F */
567 	0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,  /*  20 -  2F " & ' */
568 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,  /*  30 -  3F < > */
569 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  40 -  4F */
570 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  50 -  5F */
571 	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  60 -  6F ` */
572 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,  /*  70 -  7F DEL */
573 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  80 -  8F */
574 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  90 -  9F */
575 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  A0 -  AF */
576 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  B0 -  BF */
577 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  C0 -  CF */
578 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  D0 -  DF */
579 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  E0 -  EF */
580 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /*  F0 -  FF */
581 };
582 
583 
584 
585 void buffer_append_string_encoded(buffer * const restrict b, const char * const restrict s, size_t s_len, buffer_encoding_t encoding) {
586 	unsigned char *ds, *d;
587 	size_t d_len, ndx;
588 	const char *map = NULL;
589 
590 	switch(encoding) {
591 	case ENCODING_REL_URI:
592 		map = encoded_chars_rel_uri;
593 		break;
594 	case ENCODING_REL_URI_PART:
595 		map = encoded_chars_rel_uri_part;
596 		break;
597 	case ENCODING_HTML:
598 		map = encoded_chars_html;
599 		break;
600 	case ENCODING_MINIMAL_XML:
601 		map = encoded_chars_minimal_xml;
602 		break;
603 	}
604 
605 	/* count to-be-encoded-characters */
606 	for (ds = (unsigned char *)s, d_len = 0, ndx = 0; ndx < s_len; ds++, ndx++) {
607 		if (map[*ds & 0xFF]) {
608 			switch(encoding) {
609 			case ENCODING_REL_URI:
610 			case ENCODING_REL_URI_PART:
611 				d_len += 3;
612 				break;
613 			case ENCODING_HTML:
614 			case ENCODING_MINIMAL_XML:
615 				d_len += 6;
616 				break;
617 			}
618 		} else {
619 			d_len++;
620 		}
621 	}
622 
623 	d = (unsigned char*) buffer_extend(b, d_len);
624 
625 	if (d_len == s_len) { /*(short-circuit; nothing to encoded)*/
626 		memcpy(d, s, s_len);
627 		return;
628 	}
629 
630 	for (ds = (unsigned char *)s, d_len = 0, ndx = 0; ndx < s_len; ds++, ndx++) {
631 		if (map[*ds & 0xFF]) {
632 			switch(encoding) {
633 			case ENCODING_REL_URI:
634 			case ENCODING_REL_URI_PART:
635 				d[d_len++] = '%';
636 				d[d_len++] = hex_chars_uc[((*ds) >> 4) & 0x0F];
637 				d[d_len++] = hex_chars_uc[(*ds) & 0x0F];
638 				break;
639 			case ENCODING_HTML:
640 			case ENCODING_MINIMAL_XML:
641 				d[d_len++] = '&';
642 				d[d_len++] = '#';
643 				d[d_len++] = 'x';
644 				d[d_len++] = hex_chars_uc[((*ds) >> 4) & 0x0F];
645 				d[d_len++] = hex_chars_uc[(*ds) & 0x0F];
646 				d[d_len++] = ';';
647 				break;
648 			}
649 		} else {
650 			d[d_len++] = *ds;
651 		}
652 	}
653 }
654 
655 void buffer_append_string_c_escaped(buffer * const restrict b, const char * const restrict s, size_t s_len) {
656 	unsigned char *ds, *d;
657 	size_t d_len, ndx;
658 
659 	/* count to-be-encoded-characters */
660 	for (ds = (unsigned char *)s, d_len = 0, ndx = 0; ndx < s_len; ds++, ndx++) {
661 		if (__builtin_expect( (*ds >= ' ' && *ds <= '~'), 1))
662 			d_len++;
663 		else { /* CTLs or non-ASCII characters */
664 			switch (*ds) {
665 			case '\t':
666 			case '\r':
667 			case '\n':
668 				d_len += 2;
669 				break;
670 			default:
671 				d_len += 4; /* \xCC */
672 				break;
673 			}
674 		}
675 	}
676 
677 	d = (unsigned char*) buffer_extend(b, d_len);
678 
679 	if (d_len == s_len) { /*(short-circuit; nothing to encoded)*/
680 		memcpy(d, s, s_len);
681 		return;
682 	}
683 
684 	for (ds = (unsigned char *)s, d_len = 0, ndx = 0; ndx < s_len; ds++, ndx++) {
685 		if (__builtin_expect( (*ds >= ' ' && *ds <= '~'), 1))
686 			d[d_len++] = *ds;
687 		else { /* CTLs or non-ASCII characters */
688 			d[d_len++] = '\\';
689 			switch (*ds) {
690 			case '\t':
691 				d[d_len++] = 't';
692 				break;
693 			case '\r':
694 				d[d_len++] = 'r';
695 				break;
696 			case '\n':
697 				d[d_len++] = 'n';
698 				break;
699 			default:
700 				d[d_len++] = 'x';
701 				d[d_len++] = hex_chars_lc[(*ds) >> 4];
702 				d[d_len++] = hex_chars_lc[(*ds) & 0x0F];
703 				break;
704 			}
705 		}
706 	}
707 }
708 
709 
710 void
711 buffer_append_bs_escaped (buffer * const restrict b,
712                           const char * restrict s, const size_t len)
713 {
714     /* replaces non-printable chars with escaped string
715      * default: \xHH where HH is the hex representation of the byte
716      * exceptions: " => \", \ => \\, whitespace chars => \n \t etc. */
717     /* Intended for use escaping string to be surrounded by double-quotes */
718     /* Performs single pass over string and is optimized for ASCII;
719      * non-ASCII escaping might be slightly sped up by walking input twice,
720      * first to calculate escaped length and extend the destination b, and
721      * second to do the escaping. (This non-ASCII optim is not done here) */
722     buffer_string_prepare_append(b, len);
723     for (const char * const end = s+len; s < end; ++s) {
724         unsigned int c;
725         const char * const ptr = s;
726         do {
727             c = *(const unsigned char *)s;
728         } while (c >= ' ' && c <= '~' && c != '"' && c != '\\' && ++s < end);
729         if (s - ptr) buffer_append_string_len(b, ptr, s - ptr);
730 
731         if (s == end)
732             return;
733 
734         /* ('\a', '\v' shortcuts are technically not json-escaping) */
735         /* ('\0' is also omitted due to the possibility of string corruption if
736          *  the receiver supports decoding octal escapes (\000) and the escaped
737          *  string contains \0 followed by two digits not part of escaping)*/
738 
739         char *d;
740         switch (c) {
741           case '\a':case '\b':case '\t':case '\n':case '\v':case '\f':case '\r':
742             c = "0000000abtnvfr"[c];
743             __attribute_fallthrough__
744           case '"': case '\\':
745             d = buffer_extend(b, 2);
746             d[0] = '\\';
747             d[1] = c;
748             break;
749           default:
750             /* non printable char => \xHH */
751             d = buffer_extend(b, 4);
752             d[0] = '\\';
753             d[1] = 'x';
754             d[2] = hex_chars_uc[c >> 4];
755             d[3] = hex_chars_uc[c & 0xF];
756             break;
757         }
758     }
759 }
760 
761 
762 void
763 buffer_append_bs_escaped_json (buffer * const restrict b,
764                                const char * restrict s, const size_t len)
765 {
766     /* replaces non-printable chars with escaped string
767      * json: \u00HH where HH is the hex representation of the byte
768      * exceptions: " => \", \ => \\, whitespace chars => \n \t etc. */
769     /* Intended for use escaping string to be surrounded by double-quotes */
770     buffer_string_prepare_append(b, len);
771     for (const char * const end = s+len; s < end; ++s) {
772         unsigned int c;
773         const char * const ptr = s;
774         do {
775             c = *(const unsigned char *)s;
776         } while (c >= ' ' && c != '"' && c != '\\' && ++s < end);
777         if (s - ptr) buffer_append_string_len(b, ptr, s - ptr);
778 
779         if (s == end)
780             return;
781 
782         /* ('\a', '\v' shortcuts are technically not json-escaping) */
783         /* ('\0' is also omitted due to the possibility of string corruption if
784          *  the receiver supports decoding octal escapes (\000) and the escaped
785          *  string contains \0 followed by two digits not part of escaping)*/
786 
787         char *d;
788         switch (c) {
789           case '\a':case '\b':case '\t':case '\n':case '\v':case '\f':case '\r':
790             c = "0000000abtnvfr"[c];
791             __attribute_fallthrough__
792           case '"': case '\\':
793             d = buffer_extend(b, 2);
794             d[0] = '\\';
795             d[1] = c;
796             break;
797           default:
798             d = buffer_extend(b, 6);
799             d[0] = '\\';
800             d[1] = 'u';
801             d[2] = '0';
802             d[3] = '0';
803             d[4] = hex_chars_uc[c >> 4];
804             d[5] = hex_chars_uc[c & 0xF];
805             break;
806         }
807     }
808 }
809 
810 
811 /* decodes url-special-chars inplace.
812  * replaces non-printable characters with '_'
813  * (If this is used on a portion of query string, then query string should be
814  *  split on '&', and '+' replaced with ' ' before calling this routine)
815  */
816 
817 void buffer_urldecode_path(buffer * const b) {
818     const size_t len = buffer_clen(b);
819     char *src = len ? memchr(b->ptr, '%', len) : NULL;
820     if (NULL == src) return;
821 
822     char *dst = src;
823     do {
824         /* *src == '%' */
825         unsigned char high = ((unsigned char *)src)[1];
826         unsigned char low = high ? hex2int(((unsigned char *)src)[2]) : 0xFF;
827         if (0xFF != (high = hex2int(high)) && 0xFF != low) {
828             high = (high << 4) | low;   /* map ctrls to '_' */
829             *dst = (high >= 32 && high != 127) ? high : '_';
830             src += 2;
831         } /* else ignore this '%'; leave as-is and move on */
832 
833         while ((*++dst = *++src) != '%' && *src) ;
834     } while (*src);
835     b->used = (dst - b->ptr) + 1;
836 }
837 
838 int buffer_is_valid_UTF8(const buffer *b) {
839     /* https://www.w3.org/International/questions/qa-forms-utf-8 */
840     /*assert(b->used);*//*(b->ptr must exist and be '\0'-terminated)*/
841     const unsigned char *c = (unsigned char *)b->ptr;
842     while (*c) {
843 
844         /*(note: includes ctrls)*/
845         if (                         c[0] <  0x80 ) { ++c;  continue; }
846 
847         if (         0xc2 <= c[0] && c[0] <= 0xdf
848             &&       0x80 <= c[1] && c[1] <= 0xbf ) { c+=2; continue; }
849 
850         if ( (   (   0xe0 == c[0]
851                   && 0xa0 <= c[1] && c[1] <= 0xbf)
852               || (   0xe1 <= c[0] && c[0] <= 0xef && c[0] != 0xed
853                   && 0x80 <= c[1] && c[1] <= 0xbf)
854               || (   0xed == c[0]
855                   && 0x80 <= c[1] && c[1] <= 0x9f)   )
856             &&       0x80 <= c[2] && c[2] <= 0xbf ) { c+=3; continue; }
857 
858         if ( (   (   0xf0 == c[0]
859                   && 0x90 <= c[1] && c[1] <= 0xbf)
860               || (   0xf1 <= c[0] && c[0] <= 0xf3
861                   && 0x80 <= c[1] && c[1] <= 0xbf)
862               || (   0xf4 == c[0]
863                   && 0x80 <= c[1] && c[1] <= 0x8f)   )
864             &&       0x80 <= c[2] && c[2] <= 0xbf
865             &&       0x80 <= c[3] && c[3] <= 0xbf ) { c+=4; continue; }
866 
867         return 0; /* invalid */
868     }
869     return 1; /* valid */
870 }
871 
872 /* - special case: empty string returns empty string
873  * - on windows or cygwin: replace \ with /
874  * - strip leading spaces
875  * - prepends "/" if not present already
876  * - resolve "/../", "//" and "/./" the usual way:
877  *   the first one removes a preceding component, the other two
878  *   get compressed to "/".
879  * - "/." and "/.." at the end are similar, but always leave a trailing
880  *   "/"
881  *
882  * /blah/..         gets  /
883  * /blah/../foo     gets  /foo
884  * /abc/./xyz       gets  /abc/xyz
885  * /abc//xyz        gets  /abc/xyz
886  */
887 
888 void buffer_path_simplify(buffer *b)
889 {
890     char *out = b->ptr;
891     char * const end = b->ptr + b->used - 1;
892 
893     if (__builtin_expect( (buffer_is_blank(b)), 0)) {
894         buffer_blank(b);
895         return;
896     }
897 
898   #if defined(__WIN32) || defined(__CYGWIN__)
899     /* cygwin is treating \ and / the same, so we have to that too */
900     for (char *p = b->ptr; *p; p++) {
901         if (*p == '\\') *p = '/';
902     }
903   #endif
904 
905     *end = '/'; /*(end of path modified to avoid need to check '\0')*/
906 
907     char *walk = out;
908     if (__builtin_expect( (*walk == '/'), 1)) {
909         /* scan to detect (potential) need for path simplification
910          * (repeated '/' or "/.") */
911         do {
912             if (*++walk == '.' || *walk == '/')
913                 break;
914             do { ++walk; } while (*walk != '/');
915         } while (walk != end);
916         if (__builtin_expect( (walk == end), 1)) {
917             /* common case: no repeated '/' or "/." */
918             *end = '\0'; /* overwrite extra '/' added to end of path */
919             return;
920         }
921         out = walk-1;
922     }
923     else {
924         if (walk[0] == '.' && walk[1] == '/')
925             *out = *++walk;
926         else if (walk[0] == '.' && walk[1] == '.' && walk[2] == '/')
927             *out = *(walk += 2);
928         else {
929             while (*++walk != '/') ;
930             out = walk;
931         }
932         ++walk;
933     }
934 
935     while (walk <= end) {
936         /* previous char is '/' at this point (or start of string w/o '/') */
937         if (__builtin_expect( (walk[0] == '/'), 0)) {
938             /* skip repeated '/' (e.g. "///" -> "/") */
939             if (++walk < end)
940                 continue;
941             else {
942                 ++out;
943                 break;
944             }
945         }
946         else if (__builtin_expect( (walk[0] == '.'), 0)) {
947             /* handle "./" and "../" */
948             if (walk[1] == '.' && walk[2] == '/') {
949                 /* handle "../" */
950                 while (out > b->ptr && *--out != '/') ;
951                 *out = '/'; /*(in case path had not started with '/')*/
952                 if ((walk += 3) >= end) {
953                     ++out;
954                     break;
955                 }
956                 else
957                 continue;
958             }
959             else if (walk[1] == '/') {
960                 /* handle "./" */
961                 if ((walk += 2) >= end) {
962                     ++out;
963                     break;
964                 }
965                 continue;
966             }
967             else {
968                 /* accept "." if not part of "../" or "./" */
969                 *++out = '.';
970                 ++walk;
971             }
972         }
973 
974         while ((*++out = *walk++) != '/') ;
975     }
976     *out = *end = '\0'; /* overwrite extra '/' added to end of path */
977     b->used = (out - b->ptr) + 1;
978     /*buffer_truncate(b, out - b->ptr);*/
979 }
980 
981 void buffer_to_lower(buffer * const b) {
982     unsigned char * const restrict s = (unsigned char *)b->ptr;
983     const uint_fast32_t used = b->used;
984     for (uint_fast32_t i = 0; i < used; ++i) {
985         if (light_isupper(s[i])) s[i] |= 0x20;
986     }
987 }
988 
989 
990 void buffer_to_upper(buffer * const b) {
991     unsigned char * const restrict s = (unsigned char *)b->ptr;
992     const uint_fast32_t used = b->used;
993     for (uint_fast32_t i = 0; i < used; ++i) {
994         if (light_islower(s[i])) s[i] &= 0xdf;
995     }
996 }
997