1 #include "first.h" 2 3 #include "buffer.h" 4 5 #include <stdlib.h> 6 #include <string.h> 7 #include "sys-time.h" /* strftime() */ 8 9 static const char hex_chars_lc[] = "0123456789abcdef"; 10 static const char hex_chars_uc[] = "0123456789ABCDEF"; 11 12 13 __attribute_noinline__ 14 buffer* buffer_init(void) { 15 buffer * const b = calloc(1, sizeof(*b)); 16 force_assert(b); 17 return b; 18 } 19 20 buffer *buffer_init_buffer(const buffer *src) { 21 buffer * const b = buffer_init(); 22 buffer_copy_string_len(b, BUF_PTR_LEN(src)); 23 return b; 24 } 25 26 void buffer_free(buffer *b) { 27 if (NULL == b) return; 28 29 free(b->ptr); 30 free(b); 31 } 32 33 void buffer_free_ptr(buffer *b) { 34 free(b->ptr); 35 b->ptr = NULL; 36 b->used = 0; 37 b->size = 0; 38 } 39 40 void buffer_move(buffer * restrict b, buffer * restrict src) { 41 buffer tmp; 42 buffer_clear(b); 43 tmp = *src; *src = *b; *b = tmp; 44 } 45 46 /* make sure buffer is at least "size" big + 1 for '\0'. keep old data */ 47 __attribute_cold__ 48 __attribute_noinline__ 49 __attribute_nonnull__() 50 __attribute_returns_nonnull__ 51 static char* buffer_realloc(buffer * const restrict b, const size_t len) { 52 #define BUFFER_PIECE_SIZE 64uL /*(must be power-of-2)*/ 53 size_t sz = (len + 1 + BUFFER_PIECE_SIZE-1) & ~(BUFFER_PIECE_SIZE-1); 54 force_assert(sz > len); 55 if ((sz & (sz-1)) && sz < INT_MAX) {/* not power-2; huge val not expected */ 56 /*(optimizer should recognize this and use ffs or clz or equivalent)*/ 57 const size_t psz = sz; 58 for (sz = 256; sz < psz; sz <<= 1) ; 59 } 60 sz |= 1; /*(extra +1 for '\0' when needed buffer size is exact power-2)*/ 61 62 b->size = sz; 63 b->ptr = realloc(b->ptr, sz); 64 65 force_assert(NULL != b->ptr); 66 return b->ptr; 67 } 68 69 __attribute_cold__ 70 __attribute_noinline__ 71 __attribute_nonnull__() 72 __attribute_returns_nonnull__ 73 static char* buffer_alloc_replace(buffer * const restrict b, const size_t size) { 74 /*(discard old data so realloc() does not copy)*/ 75 if (NULL != b->ptr) { 76 free(b->ptr); 77 b->ptr = NULL; 78 } 79 /*(note: if size larger than one lshift, use size instead of power-2)*/ 80 const size_t bsize2x = (b->size & ~1uL) << 1; 81 return buffer_realloc(b, bsize2x > size ? bsize2x-1 : size); 82 } 83 84 char* buffer_string_prepare_copy(buffer * const b, const size_t size) { 85 b->used = 0; 86 #ifdef __COVERITY__ /*(b->ptr is not NULL if b->size is not 0)*/ 87 force_assert(size >= b->size || b->ptr); 88 #endif 89 return (size < b->size) 90 ? b->ptr 91 : buffer_alloc_replace(b, size); 92 } 93 94 __attribute_cold__ 95 __attribute_noinline__ 96 __attribute_nonnull__() 97 __attribute_returns_nonnull__ 98 static char* buffer_string_prepare_append_resize(buffer * const restrict b, const size_t size) { 99 if (b->used < 2) { /* buffer_is_blank(b) */ 100 char * const s = buffer_string_prepare_copy(b, size); 101 *s = '\0'; /*(for case (1 == b->used))*/ 102 return s; 103 } 104 105 /* not empty, b->used already includes a terminating 0 */ 106 /*(note: if size larger than one lshift, use size instead of power-2)*/ 107 const size_t bsize2x = (b->size & ~1uL) << 1; 108 const size_t req_size = (bsize2x - b->used > size) 109 ? bsize2x-1 110 : b->used + size; 111 112 /* check for overflow: unsigned overflow is defined to wrap around */ 113 force_assert(req_size >= b->used); 114 115 return buffer_realloc(b, req_size) + b->used - 1; 116 } 117 118 char* buffer_string_prepare_append(buffer * const b, const size_t size) { 119 const uint32_t len = b->used ? b->used-1 : 0; 120 return (b->size - len >= size + 1) 121 ? b->ptr + len 122 : buffer_string_prepare_append_resize(b, size); 123 } 124 125 /*(prefer smaller code than inlining buffer_extend in many places in buffer.c)*/ 126 __attribute_noinline__ 127 char* 128 buffer_extend (buffer * const b, const size_t x) 129 { 130 /* extend buffer to append x (reallocate by power-2 (or larger), if needed) 131 * (combine buffer_string_prepare_append() and buffer_commit()) 132 * (future: might make buffer.h static inline func for HTTP/1.1 performance) 133 * pre-sets '\0' byte and b->used (unlike buffer_string_prepare_append())*/ 134 #if 0 135 char * const s = buffer_string_prepare_append(b, x); 136 b->used += x + (0 == b->used); 137 #else 138 const uint32_t len = b->used ? b->used-1 : 0; 139 char * const s = (b->size - len >= x + 1) 140 ? b->ptr + len 141 : buffer_string_prepare_append_resize(b, x); 142 b->used = len+x+1; 143 #endif 144 s[x] = '\0'; 145 return s; 146 } 147 148 void buffer_commit(buffer *b, size_t size) 149 { 150 size_t sz = b->used; 151 if (0 == sz) sz = 1; 152 153 if (size > 0) { 154 /* check for overflow: unsigned overflow is defined to wrap around */ 155 sz += size; 156 force_assert(sz > size); 157 } 158 159 b->used = sz; 160 b->ptr[sz - 1] = '\0'; 161 } 162 163 __attribute_cold__ /*(reduce code size due to inlining)*/ 164 void buffer_copy_string(buffer * restrict b, const char * restrict s) { 165 if (__builtin_expect( (NULL == s), 0)) s = ""; 166 buffer_copy_string_len(b, s, strlen(s)); 167 } 168 169 void buffer_copy_string_len(buffer * const restrict b, const char * const restrict s, const size_t len) { 170 b->used = len + 1; 171 char * const restrict d = (len < b->size) 172 ? b->ptr 173 : buffer_alloc_replace(b, len); 174 d[len] = '\0'; 175 memcpy(d, s, len); 176 } 177 178 __attribute_cold__ /*(reduce code size due to inlining)*/ 179 void buffer_append_string(buffer * restrict b, const char * restrict s) { 180 if (__builtin_expect( (NULL == s), 0)) s = ""; 181 buffer_append_string_len(b, s, strlen(s)); 182 } 183 184 /** 185 * append a string to the end of the buffer 186 * 187 * the resulting buffer is terminated with a '\0' 188 * s is treated as a un-terminated string (a \0 is handled a normal character) 189 * 190 * @param b a buffer 191 * @param s the string 192 * @param s_len size of the string (without the terminating \0) 193 */ 194 195 void buffer_append_string_len(buffer * const restrict b, const char * const restrict s, const size_t len) { 196 memcpy(buffer_extend(b, len), s, len); 197 } 198 199 void buffer_append_str2(buffer * const restrict b, const char * const s1, const size_t len1, const char * const s2, const size_t len2) { 200 char * const restrict s = buffer_extend(b, len1+len2); 201 #ifdef HAVE_MEMPCPY 202 mempcpy(mempcpy(s, s1, len1), s2, len2); 203 #else 204 memcpy(s, s1, len1); 205 memcpy(s+len1, s2, len2); 206 #endif 207 } 208 209 void buffer_append_str3(buffer * const restrict b, const char * const s1, const size_t len1, const char * const s2, const size_t len2, const char * const s3, const size_t len3) { 210 char * restrict s = buffer_extend(b, len1+len2+len3); 211 #ifdef HAVE_MEMPCPY 212 mempcpy(mempcpy(mempcpy(s, s1, len1), s2, len2), s3, len3); 213 #else 214 memcpy(s, s1, len1); 215 memcpy((s+=len1), s2, len2); 216 memcpy((s+=len2), s3, len3); 217 #endif 218 } 219 220 void buffer_append_iovec(buffer * const restrict b, const struct const_iovec * const iov, const size_t n) { 221 size_t len = 0; 222 for (size_t i = 0; i < n; ++i) 223 len += iov[i].iov_len; 224 char *s = buffer_extend(b, len); 225 for (size_t i = 0; i < n; ++i) { 226 if (0 == iov[i].iov_len) continue; 227 #ifdef HAVE_MEMPCPY 228 s = mempcpy(s, iov[i].iov_base, iov[i].iov_len); 229 #else 230 memcpy(s, iov[i].iov_base, iov[i].iov_len); 231 s += iov[i].iov_len; 232 #endif 233 } 234 } 235 236 void buffer_append_path_len(buffer * restrict b, const char * restrict a, size_t alen) { 237 char * restrict s = buffer_string_prepare_append(b, alen+1); 238 const int aslash = (alen && a[0] == '/'); 239 if (b->used > 1 && s[-1] == '/') { 240 if (aslash) { 241 ++a; 242 --alen; 243 } 244 } 245 else { 246 if (0 == b->used) b->used = 1; 247 if (!aslash) { 248 *s++ = '/'; 249 ++b->used; 250 } 251 } 252 b->used += alen; 253 s[alen] = '\0'; 254 memcpy(s, a, alen); 255 } 256 257 void 258 buffer_copy_path_len2 (buffer * const restrict b, const char * const restrict s1, size_t len1, const char * const restrict s2, size_t len2) 259 { 260 /*(similar to buffer_copy_string_len(b, s1, len1) but combined allocation)*/ 261 memcpy(buffer_string_prepare_copy(b, len1+len2+1), s1, len1); 262 b->used = len1 + 1; /*('\0' byte will be written below)*/ 263 264 buffer_append_path_len(b, s2, len2);/*(choice: not inlined, special-cased)*/ 265 } 266 267 void 268 buffer_copy_string_len_lc (buffer * const restrict b, const char * const restrict s, const size_t len) 269 { 270 char * const restrict d = buffer_string_prepare_copy(b, len); 271 b->used = len+1; 272 d[len] = '\0'; 273 for (size_t i = 0; i < len; ++i) 274 d[i] = (!light_isupper(s[i])) ? s[i] : s[i] | 0x20; 275 } 276 277 void buffer_append_uint_hex_lc(buffer *b, uintmax_t value) { 278 char *buf; 279 unsigned int shift = 0; 280 281 { 282 uintmax_t copy = value; 283 do { 284 copy >>= 8; 285 shift += 8; /* counting bits */ 286 } while (0 != copy); 287 } 288 289 buf = buffer_extend(b, shift >> 2); /*nibbles (4 bits)*/ 290 291 while (shift > 0) { 292 shift -= 4; 293 *(buf++) = hex_chars_lc[(value >> shift) & 0x0F]; 294 } 295 } 296 297 __attribute_nonnull__() 298 __attribute_returns_nonnull__ 299 static char* utostr(char buf[LI_ITOSTRING_LENGTH], uintmax_t val) { 300 char *cur = buf+LI_ITOSTRING_LENGTH; 301 uintmax_t x; 302 do { 303 *(--cur) = (char) ('0' + (int)(val - (x = val/10) * 10)); 304 } while (0 != (val = x)); /* val % 10 */ 305 return cur; 306 } 307 308 __attribute_nonnull__() 309 __attribute_returns_nonnull__ 310 static char* itostr(char buf[LI_ITOSTRING_LENGTH], intmax_t val) { 311 /* absolute value not defined for INTMAX_MIN, but can take absolute 312 * value of any negative number via twos complement cast to unsigned. 313 * negative sign is prepended after (now unsigned) value is converted 314 * to string */ 315 uintmax_t uval = val >= 0 ? (uintmax_t)val : ((uintmax_t)~val) + 1; 316 char *cur = utostr(buf, uval); 317 if (val < 0) *(--cur) = '-'; 318 319 return cur; 320 } 321 322 void buffer_append_int(buffer *b, intmax_t val) { 323 char buf[LI_ITOSTRING_LENGTH]; 324 const char * const str = itostr(buf, val); 325 buffer_append_string_len(b, str, buf+sizeof(buf) - str); 326 } 327 328 void buffer_append_strftime(buffer * const restrict b, const char * const restrict format, const struct tm * const restrict tm) { 329 /*(localtime_r() or gmtime_r() producing tm should not have failed)*/ 330 if (__builtin_expect( (NULL == tm), 0)) return; 331 332 /*(expecting typical format strings to result in < 64 bytes needed; 333 * skipping buffer_string_space() calculation and providing fixed size)*/ 334 size_t rv = strftime(buffer_string_prepare_append(b, 63), 64, format, tm); 335 336 /* 0 (in some apis) signals the string may have been too small; 337 * but the format could also just have lead to an empty string */ 338 if (__builtin_expect( (0 == rv), 0) || __builtin_expect( (rv > 63), 0)) { 339 /* unexpected; give it a second try with a larger string */ 340 rv = strftime(buffer_string_prepare_append(b, 4095), 4096, format, tm); 341 if (__builtin_expect( (rv > 4095), 0))/*(input format was ridiculous)*/ 342 return; 343 } 344 345 /*buffer_commit(b, rv);*/ 346 b->used += (uint32_t)rv + (0 == b->used); 347 } 348 349 350 size_t li_itostrn(char *buf, size_t buf_len, intmax_t val) { 351 char p_buf[LI_ITOSTRING_LENGTH]; 352 char* const str = itostr(p_buf, val); 353 size_t len = (size_t)(p_buf+sizeof(p_buf)-str); 354 force_assert(len <= buf_len); 355 memcpy(buf, str, len); 356 return len; 357 } 358 359 size_t li_utostrn(char *buf, size_t buf_len, uintmax_t val) { 360 char p_buf[LI_ITOSTRING_LENGTH]; 361 char* const str = utostr(p_buf, val); 362 size_t len = (size_t)(p_buf+sizeof(p_buf)-str); 363 force_assert(len <= buf_len); 364 memcpy(buf, str, len); 365 return len; 366 } 367 368 #define li_ntox_lc(n) ((n) <= 9 ? (n) + '0' : (n) + 'a' - 10) 369 370 /* c (char) and n (nibble) MUST be unsigned integer types */ 371 #define li_cton(c,n) \ 372 (((n) = (c) - '0') <= 9 || (((n) = ((c)&0xdf) - 'A') <= 5 ? ((n) += 10) : 0)) 373 374 /* converts hex char (0-9, A-Z, a-z) to decimal. 375 * returns 0xFF on invalid input. 376 */ 377 char hex2int(unsigned char hex) { 378 unsigned char n; 379 return li_cton(hex,n) ? (char)n : 0xFF; 380 } 381 382 int li_hex2bin (unsigned char * const bin, const size_t binlen, const char * const hexstr, const size_t len) 383 { 384 /* validate and transform 32-byte MD5 hex string to 16-byte binary MD5, 385 * or 64-byte SHA-256 or SHA-512-256 hex string to 32-byte binary digest */ 386 if (len > (binlen << 1)) return -1; 387 for (int i = 0, ilen = (int)len; i < ilen; i+=2) { 388 int hi = hexstr[i]; 389 int lo = hexstr[i+1]; 390 if ('0' <= hi && hi <= '9') hi -= '0'; 391 else if ((uint32_t)(hi |= 0x20)-'a' <= 'f'-'a')hi += -'a' + 10; 392 else return -1; 393 if ('0' <= lo && lo <= '9') lo -= '0'; 394 else if ((uint32_t)(lo |= 0x20)-'a' <= 'f'-'a')lo += -'a' + 10; 395 else return -1; 396 bin[(i >> 1)] = (unsigned char)((hi << 4) | lo); 397 } 398 return 0; 399 } 400 401 402 __attribute_noinline__ 403 int buffer_eq_icase_ssn(const char * const a, const char * const b, const size_t len) { 404 for (size_t i = 0; i < len; ++i) { 405 unsigned int ca = ((unsigned char *)a)[i]; 406 unsigned int cb = ((unsigned char *)b)[i]; 407 if (ca != cb) { 408 ca |= 0x20; 409 cb |= 0x20; 410 if (ca != cb) return 0; 411 if (!light_islower(ca)) return 0; 412 if (!light_islower(cb)) return 0; 413 } 414 } 415 return 1; 416 } 417 418 int buffer_eq_icase_ss(const char * const a, const size_t alen, const char * const b, const size_t blen) { 419 /* 1 = equal; 0 = not equal */ /* short string sizes expected (< INT_MAX) */ 420 return (alen == blen) ? buffer_eq_icase_ssn(a, b, blen) : 0; 421 } 422 423 int buffer_eq_icase_slen(const buffer * const b, const char * const s, const size_t slen) { 424 /* Note: b must be initialized, i.e. 0 != b->used; uninitialized is not eq*/ 425 /* 1 = equal; 0 = not equal */ /* short string sizes expected (< INT_MAX) */ 426 return (b->used == slen + 1) ? buffer_eq_icase_ssn(b->ptr, s, slen) : 0; 427 } 428 429 int buffer_eq_slen(const buffer * const b, const char * const s, const size_t slen) { 430 /* Note: b must be initialized, i.e. 0 != b->used; uninitialized is not eq*/ 431 /* 1 = equal; 0 = not equal */ /* short string sizes expected (< INT_MAX) */ 432 return (b->used == slen + 1 && 0 == memcmp(b->ptr, s, slen)); 433 } 434 435 436 /** 437 * check if two buffer contain the same data 438 */ 439 440 int buffer_is_equal(const buffer *a, const buffer *b) { 441 /* 1 = equal; 0 = not equal */ 442 return (a->used == b->used && 0 == memcmp(a->ptr, b->ptr, a->used)); 443 } 444 445 446 void li_tohex_lc(char * const restrict buf, size_t buf_len, const char * const restrict s, size_t s_len) { 447 force_assert(2 * s_len > s_len); 448 force_assert(2 * s_len < buf_len); 449 450 for (size_t i = 0; i < s_len; ++i) { 451 buf[2*i] = hex_chars_lc[(s[i] >> 4) & 0x0F]; 452 buf[2*i+1] = hex_chars_lc[s[i] & 0x0F]; 453 } 454 buf[2*s_len] = '\0'; 455 } 456 457 void li_tohex_uc(char * const restrict buf, size_t buf_len, const char * const restrict s, size_t s_len) { 458 force_assert(2 * s_len > s_len); 459 force_assert(2 * s_len < buf_len); 460 461 for (size_t i = 0; i < s_len; ++i) { 462 buf[2*i] = hex_chars_uc[(s[i] >> 4) & 0x0F]; 463 buf[2*i+1] = hex_chars_uc[s[i] & 0x0F]; 464 } 465 buf[2*s_len] = '\0'; 466 } 467 468 469 void buffer_substr_replace (buffer * const restrict b, const size_t offset, 470 const size_t len, const buffer * const restrict replace) 471 { 472 const size_t blen = buffer_clen(b); 473 const size_t rlen = buffer_clen(replace); 474 475 if (rlen > len) { 476 buffer_extend(b, blen-len+rlen); 477 memmove(b->ptr+offset+rlen, b->ptr+offset+len, blen-offset-len); 478 } 479 480 memcpy(b->ptr+offset, replace->ptr, rlen); 481 482 if (rlen < len) { 483 memmove(b->ptr+offset+rlen, b->ptr+offset+len, blen-offset-len); 484 buffer_truncate(b, blen-len+rlen); 485 } 486 } 487 488 489 void buffer_append_string_encoded_hex_lc(buffer * const restrict b, const char * const restrict s, size_t len) { 490 unsigned char * const p = (unsigned char *)buffer_extend(b, len*2); 491 for (size_t i = 0; i < len; ++i) { 492 p[(i<<1)] = hex_chars_lc[(s[i] >> 4) & 0x0F]; 493 p[(i<<1)+1] = hex_chars_lc[(s[i]) & 0x0F]; 494 } 495 } 496 497 void buffer_append_string_encoded_hex_uc(buffer * const restrict b, const char * const restrict s, size_t len) { 498 unsigned char * const p = (unsigned char *)buffer_extend(b, len*2); 499 for (size_t i = 0; i < len; ++i) { 500 p[(i<<1)] = hex_chars_uc[(s[i] >> 4) & 0x0F]; 501 p[(i<<1)+1] = hex_chars_uc[(s[i]) & 0x0F]; 502 } 503 } 504 505 506 /* everything except: ! ( ) * - . 0-9 A-Z _ a-z */ 507 static const char encoded_chars_rel_uri_part[] = { 508 /* 509 0 1 2 3 4 5 6 7 8 9 A B C D E F 510 */ 511 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 00 - 0F control chars */ 512 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 10 - 1F */ 513 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, /* 20 - 2F space " # $ % & ' + , / */ 514 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, /* 30 - 3F : ; < = > ? */ 515 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 40 - 4F @ */ 516 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, /* 50 - 5F [ \ ] ^ */ 517 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 60 - 6F ` */ 518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, /* 70 - 7F { | } DEL */ 519 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 80 - 8F */ 520 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 90 - 9F */ 521 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* A0 - AF */ 522 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* B0 - BF */ 523 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* C0 - CF */ 524 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* D0 - DF */ 525 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* E0 - EF */ 526 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* F0 - FF */ 527 }; 528 529 /* everything except: ! ( ) * - . / 0-9 A-Z _ a-z */ 530 static const char encoded_chars_rel_uri[] = { 531 /* 532 0 1 2 3 4 5 6 7 8 9 A B C D E F 533 */ 534 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 00 - 0F control chars */ 535 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 10 - 1F */ 536 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, /* 20 - 2F space " # $ % & ' + , */ 537 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, /* 30 - 3F : ; < = > ? */ 538 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 40 - 4F @ */ 539 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, /* 50 - 5F [ \ ] ^ */ 540 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 60 - 6F ` */ 541 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, /* 70 - 7F { | } DEL */ 542 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 80 - 8F */ 543 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 90 - 9F */ 544 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* A0 - AF */ 545 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* B0 - BF */ 546 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* C0 - CF */ 547 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* D0 - DF */ 548 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* E0 - EF */ 549 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* F0 - FF */ 550 }; 551 552 static const char encoded_chars_html[] = { 553 /* 554 0 1 2 3 4 5 6 7 8 9 A B C D E F 555 */ 556 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 00 - 0F control chars */ 557 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 10 - 1F */ 558 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, /* 20 - 2F " & ' */ 559 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, /* 30 - 3F < > */ 560 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 40 - 4F */ 561 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 50 - 5F */ 562 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 60 - 6F ` */ 563 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 70 - 7F DEL */ 564 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 80 - 8F */ 565 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 90 - 9F */ 566 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* A0 - AF */ 567 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* B0 - BF */ 568 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* C0 - CF */ 569 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* D0 - DF */ 570 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* E0 - EF */ 571 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* F0 - FF */ 572 }; 573 574 static const char encoded_chars_minimal_xml[] = { 575 /* 576 0 1 2 3 4 5 6 7 8 9 A B C D E F 577 */ 578 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 00 - 0F control chars */ 579 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 10 - 1F */ 580 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, /* 20 - 2F " & ' */ 581 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, /* 30 - 3F < > */ 582 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 40 - 4F */ 583 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 50 - 5F */ 584 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 60 - 6F ` */ 585 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, /* 70 - 7F DEL */ 586 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80 - 8F */ 587 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 90 - 9F */ 588 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A0 - AF */ 589 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* B0 - BF */ 590 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* C0 - CF */ 591 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* D0 - DF */ 592 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* E0 - EF */ 593 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* F0 - FF */ 594 }; 595 596 597 598 void buffer_append_string_encoded(buffer * const restrict b, const char * const restrict s, size_t s_len, buffer_encoding_t encoding) { 599 unsigned char *ds, *d; 600 size_t d_len, ndx; 601 const char *map = NULL; 602 603 switch(encoding) { 604 case ENCODING_REL_URI: 605 map = encoded_chars_rel_uri; 606 break; 607 case ENCODING_REL_URI_PART: 608 map = encoded_chars_rel_uri_part; 609 break; 610 case ENCODING_HTML: 611 map = encoded_chars_html; 612 break; 613 case ENCODING_MINIMAL_XML: 614 map = encoded_chars_minimal_xml; 615 break; 616 } 617 618 /* count to-be-encoded-characters */ 619 for (ds = (unsigned char *)s, d_len = 0, ndx = 0; ndx < s_len; ds++, ndx++) { 620 if (map[*ds & 0xFF]) { 621 switch(encoding) { 622 case ENCODING_REL_URI: 623 case ENCODING_REL_URI_PART: 624 d_len += 3; 625 break; 626 case ENCODING_HTML: 627 case ENCODING_MINIMAL_XML: 628 d_len += 6; 629 break; 630 } 631 } else { 632 d_len++; 633 } 634 } 635 636 d = (unsigned char*) buffer_extend(b, d_len); 637 638 if (d_len == s_len) { /*(short-circuit; nothing to encoded)*/ 639 memcpy(d, s, s_len); 640 return; 641 } 642 643 for (ds = (unsigned char *)s, d_len = 0, ndx = 0; ndx < s_len; ds++, ndx++) { 644 if (map[*ds & 0xFF]) { 645 switch(encoding) { 646 case ENCODING_REL_URI: 647 case ENCODING_REL_URI_PART: 648 d[d_len++] = '%'; 649 d[d_len++] = hex_chars_uc[((*ds) >> 4) & 0x0F]; 650 d[d_len++] = hex_chars_uc[(*ds) & 0x0F]; 651 break; 652 case ENCODING_HTML: 653 case ENCODING_MINIMAL_XML: 654 d[d_len++] = '&'; 655 d[d_len++] = '#'; 656 d[d_len++] = 'x'; 657 d[d_len++] = hex_chars_uc[((*ds) >> 4) & 0x0F]; 658 d[d_len++] = hex_chars_uc[(*ds) & 0x0F]; 659 d[d_len++] = ';'; 660 break; 661 } 662 } else { 663 d[d_len++] = *ds; 664 } 665 } 666 } 667 668 void buffer_append_string_encoded_json(buffer * const restrict b, const char * const restrict s, const size_t len) { 669 const unsigned char * const restrict ds = (unsigned char *)s; 670 size_t dlen = 0; 671 672 /* calculate space needed for string including encodings */ 673 for (size_t i = 0; i < len; ++i) { 674 int c = ds[i]; 675 if (c == '"' || c == '\\' || c < 0x20 || c == 0x7f) { 676 switch (c) { 677 case '\b': 678 case '\t': 679 case '\n': 680 case '\f': 681 case '\r': 682 case '"': 683 case '\\': 684 dlen += 2; 685 break; 686 default: 687 dlen += 6; /* \uCCCC */ 688 break; 689 } 690 } 691 else { 692 ++dlen; 693 } 694 } 695 696 unsigned char * const d = (unsigned char *)buffer_extend(b, dlen); 697 698 if (__builtin_expect( (dlen == len), 1)) {/*(short-circuit; nothing to encode)*/ 699 memcpy(d, ds, len); 700 return; 701 } 702 703 dlen = 0; 704 for (size_t i = 0; i < len; ++i) { 705 int c = ds[i]; 706 if (c == '"' || c == '\\' || c < 0x20 || c == 0x7f) { 707 d[dlen++] = '\\'; 708 switch (c) { 709 case '\b': 710 d[dlen++] = 'b'; 711 break; 712 case '\t': 713 d[dlen++] = 't'; 714 break; 715 case '\n': 716 d[dlen++] = 'n'; 717 break; 718 case '\f': 719 d[dlen++] = 'f'; 720 break; 721 case '\r': 722 d[dlen++] = 'r'; 723 break; 724 case '"': 725 d[dlen++] = '"'; 726 break; 727 case '\\': 728 d[dlen++] = '\\'; 729 break; 730 default: 731 d[dlen ] = 'u'; 732 d[dlen+1] = '0'; 733 d[dlen+2] = '0'; 734 d[dlen+3] = hex_chars_lc[(c >> 4) & 0x0F]; 735 d[dlen+4] = hex_chars_lc[c & 0x0F]; 736 dlen += 5; 737 break; 738 } 739 } 740 else { 741 d[dlen++] = c; 742 } 743 } 744 } 745 746 747 void buffer_append_string_c_escaped(buffer * const restrict b, const char * const restrict s, size_t s_len) { 748 unsigned char *ds, *d; 749 size_t d_len, ndx; 750 751 /* count to-be-encoded-characters */ 752 for (ds = (unsigned char *)s, d_len = 0, ndx = 0; ndx < s_len; ds++, ndx++) { 753 if ((*ds < 0x20) /* control character */ 754 || (*ds >= 0x7f)) { /* DEL + non-ASCII characters */ 755 switch (*ds) { 756 case '\t': 757 case '\r': 758 case '\n': 759 d_len += 2; 760 break; 761 default: 762 d_len += 4; /* \xCC */ 763 break; 764 } 765 } else { 766 d_len++; 767 } 768 } 769 770 d = (unsigned char*) buffer_extend(b, d_len); 771 772 if (d_len == s_len) { /*(short-circuit; nothing to encoded)*/ 773 memcpy(d, s, s_len); 774 return; 775 } 776 777 for (ds = (unsigned char *)s, d_len = 0, ndx = 0; ndx < s_len; ds++, ndx++) { 778 if ((*ds < 0x20) /* control character */ 779 || (*ds >= 0x7f)) { /* DEL + non-ASCII characters */ 780 d[d_len++] = '\\'; 781 switch (*ds) { 782 case '\t': 783 d[d_len++] = 't'; 784 break; 785 case '\r': 786 d[d_len++] = 'r'; 787 break; 788 case '\n': 789 d[d_len++] = 'n'; 790 break; 791 default: 792 d[d_len++] = 'x'; 793 d[d_len++] = hex_chars_lc[((*ds) >> 4) & 0x0F]; 794 d[d_len++] = hex_chars_lc[(*ds) & 0x0F]; 795 break; 796 } 797 } else { 798 d[d_len++] = *ds; 799 } 800 } 801 } 802 803 804 /* decodes url-special-chars inplace. 805 * replaces non-printable characters with '_' 806 * (If this is used on a portion of query string, then query string should be 807 * split on '&', and '+' replaced with ' ' before calling this routine) 808 */ 809 810 void buffer_urldecode_path(buffer * const b) { 811 const size_t len = buffer_clen(b); 812 char *src = len ? memchr(b->ptr, '%', len) : NULL; 813 if (NULL == src) return; 814 815 char *dst = src; 816 do { 817 /* *src == '%' */ 818 unsigned char high = ((unsigned char *)src)[1]; 819 unsigned char low = high ? hex2int(((unsigned char *)src)[2]) : 0xFF; 820 if (0xFF != (high = hex2int(high)) && 0xFF != low) { 821 high = (high << 4) | low; /* map ctrls to '_' */ 822 *dst = (high >= 32 && high != 127) ? high : '_'; 823 src += 2; 824 } /* else ignore this '%'; leave as-is and move on */ 825 826 while ((*++dst = *++src) != '%' && *src) ; 827 } while (*src); 828 b->used = (dst - b->ptr) + 1; 829 } 830 831 int buffer_is_valid_UTF8(const buffer *b) { 832 /* https://www.w3.org/International/questions/qa-forms-utf-8 */ 833 /*assert(b->used);*//*(b->ptr must exist and be '\0'-terminated)*/ 834 const unsigned char *c = (unsigned char *)b->ptr; 835 while (*c) { 836 837 /*(note: includes ctrls)*/ 838 if ( c[0] < 0x80 ) { ++c; continue; } 839 840 if ( 0xc2 <= c[0] && c[0] <= 0xdf 841 && 0x80 <= c[1] && c[1] <= 0xbf ) { c+=2; continue; } 842 843 if ( ( ( 0xe0 == c[0] 844 && 0xa0 <= c[1] && c[1] <= 0xbf) 845 || ( 0xe1 <= c[0] && c[0] <= 0xef && c[0] != 0xed 846 && 0x80 <= c[1] && c[1] <= 0xbf) 847 || ( 0xed == c[0] 848 && 0x80 <= c[1] && c[1] <= 0x9f) ) 849 && 0x80 <= c[2] && c[2] <= 0xbf ) { c+=3; continue; } 850 851 if ( ( ( 0xf0 == c[0] 852 && 0x90 <= c[1] && c[1] <= 0xbf) 853 || ( 0xf1 <= c[0] && c[0] <= 0xf3 854 && 0x80 <= c[1] && c[1] <= 0xbf) 855 || ( 0xf4 == c[0] 856 && 0x80 <= c[1] && c[1] <= 0x8f) ) 857 && 0x80 <= c[2] && c[2] <= 0xbf 858 && 0x80 <= c[3] && c[3] <= 0xbf ) { c+=4; continue; } 859 860 return 0; /* invalid */ 861 } 862 return 1; /* valid */ 863 } 864 865 /* - special case: empty string returns empty string 866 * - on windows or cygwin: replace \ with / 867 * - strip leading spaces 868 * - prepends "/" if not present already 869 * - resolve "/../", "//" and "/./" the usual way: 870 * the first one removes a preceding component, the other two 871 * get compressed to "/". 872 * - "/." and "/.." at the end are similar, but always leave a trailing 873 * "/" 874 * 875 * /blah/.. gets / 876 * /blah/../foo gets /foo 877 * /abc/./xyz gets /abc/xyz 878 * /abc//xyz gets /abc/xyz 879 */ 880 881 void buffer_path_simplify(buffer *b) 882 { 883 char *out = b->ptr; 884 char * const end = b->ptr + b->used - 1; 885 886 if (__builtin_expect( (buffer_is_blank(b)), 0)) { 887 buffer_blank(b); 888 return; 889 } 890 891 #if defined(__WIN32) || defined(__CYGWIN__) 892 /* cygwin is treating \ and / the same, so we have to that too */ 893 for (char *p = b->ptr; *p; p++) { 894 if (*p == '\\') *p = '/'; 895 } 896 #endif 897 898 *end = '/'; /*(end of path modified to avoid need to check '\0')*/ 899 900 char *walk = out; 901 if (__builtin_expect( (*walk == '/'), 1)) { 902 /* scan to detect (potential) need for path simplification 903 * (repeated '/' or "/.") */ 904 do { 905 if (*++walk == '.' || *walk == '/') 906 break; 907 do { ++walk; } while (*walk != '/'); 908 } while (walk != end); 909 if (__builtin_expect( (walk == end), 1)) { 910 /* common case: no repeated '/' or "/." */ 911 *end = '\0'; /* overwrite extra '/' added to end of path */ 912 return; 913 } 914 out = walk-1; 915 } 916 else { 917 if (walk[0] == '.' && walk[1] == '/') 918 *out = *++walk; 919 else if (walk[0] == '.' && walk[1] == '.' && walk[2] == '/') 920 *out = *(walk += 2); 921 else { 922 while (*++walk != '/') ; 923 out = walk; 924 } 925 ++walk; 926 } 927 928 while (walk <= end) { 929 /* previous char is '/' at this point (or start of string w/o '/') */ 930 if (__builtin_expect( (walk[0] == '/'), 0)) { 931 /* skip repeated '/' (e.g. "///" -> "/") */ 932 if (++walk < end) 933 continue; 934 else { 935 ++out; 936 break; 937 } 938 } 939 else if (__builtin_expect( (walk[0] == '.'), 0)) { 940 /* handle "./" and "../" */ 941 if (walk[1] == '.' && walk[2] == '/') { 942 /* handle "../" */ 943 while (out > b->ptr && *--out != '/') ; 944 *out = '/'; /*(in case path had not started with '/')*/ 945 if ((walk += 3) >= end) { 946 ++out; 947 break; 948 } 949 else 950 continue; 951 } 952 else if (walk[1] == '/') { 953 /* handle "./" */ 954 if ((walk += 2) >= end) { 955 ++out; 956 break; 957 } 958 continue; 959 } 960 else { 961 /* accept "." if not part of "../" or "./" */ 962 *++out = '.'; 963 ++walk; 964 } 965 } 966 967 while ((*++out = *walk++) != '/') ; 968 } 969 *out = *end = '\0'; /* overwrite extra '/' added to end of path */ 970 b->used = (out - b->ptr) + 1; 971 /*buffer_truncate(b, out - b->ptr);*/ 972 } 973 974 void buffer_to_lower(buffer * const b) { 975 unsigned char * const restrict s = (unsigned char *)b->ptr; 976 const uint_fast32_t used = b->used; 977 for (uint_fast32_t i = 0; i < used; ++i) { 978 if (light_isupper(s[i])) s[i] |= 0x20; 979 } 980 } 981 982 983 void buffer_to_upper(buffer * const b) { 984 unsigned char * const restrict s = (unsigned char *)b->ptr; 985 const uint_fast32_t used = b->used; 986 for (uint_fast32_t i = 0; i < used; ++i) { 987 if (light_islower(s[i])) s[i] &= 0xdf; 988 } 989 } 990