1 /*
2 * string.c: routines to manipulate counted-length strings
3 * (svn_stringbuf_t and svn_string_t) and C strings.
4 *
5 *
6 * ====================================================================
7 * Licensed to the Apache Software Foundation (ASF) under one
8 * or more contributor license agreements. See the NOTICE file
9 * distributed with this work for additional information
10 * regarding copyright ownership. The ASF licenses this file
11 * to you under the Apache License, Version 2.0 (the
12 * "License"); you may not use this file except in compliance
13 * with the License. You may obtain a copy of the License at
14 *
15 * http://www.apache.org/licenses/LICENSE-2.0
16 *
17 * Unless required by applicable law or agreed to in writing,
18 * software distributed under the License is distributed on an
19 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
20 * KIND, either express or implied. See the License for the
21 * specific language governing permissions and limitations
22 * under the License.
23 * ====================================================================
24 */
25
26
27
28 #include <apr.h>
29 #include <assert.h>
30
31 #include <string.h> /* for memcpy(), memcmp(), strlen() */
32 #include <apr_fnmatch.h>
33 #include "svn_string.h" /* loads "svn_types.h" and <apr_pools.h> */
34 #include "svn_ctype.h"
35 #include "private/svn_dep_compat.h"
36 #include "private/svn_string_private.h"
37
38 #include "svn_private_config.h"
39
40
41
42 /* Allocate the space for a memory buffer from POOL.
43 * Return a pointer to the new buffer in *DATA and its size in *SIZE.
44 * The buffer size will be at least MINIMUM_SIZE.
45 *
46 * N.B.: The stringbuf creation functions use this, but since stringbufs
47 * always consume at least 1 byte for the NUL terminator, the
48 * resulting data pointers will never be NULL.
49 */
50 static APR_INLINE void
membuf_create(void ** data,apr_size_t * size,apr_size_t minimum_size,apr_pool_t * pool)51 membuf_create(void **data, apr_size_t *size,
52 apr_size_t minimum_size, apr_pool_t *pool)
53 {
54 /* apr_palloc will allocate multiples of 8.
55 * Thus, we would waste some of that memory if we stuck to the
56 * smaller size. Note that this is safe even if apr_palloc would
57 * use some other alignment or none at all. */
58 minimum_size = APR_ALIGN_DEFAULT(minimum_size);
59 *data = apr_palloc(pool, minimum_size);
60 *size = minimum_size;
61 }
62
63 /* Ensure that the size of a given memory buffer is at least MINIMUM_SIZE
64 * bytes. If *SIZE is already greater than or equal to MINIMUM_SIZE,
65 * this function does nothing.
66 *
67 * If *SIZE is 0, the allocated buffer size will be MINIMUM_SIZE
68 * rounded up to the nearest APR alignment boundary. Otherwse, *SIZE
69 * will be multiplied by a power of two such that the result is
70 * greater or equal to MINIMUM_SIZE. The pointer to the new buffer
71 * will be returned in *DATA, and its size in *SIZE.
72 */
73 static APR_INLINE void
membuf_ensure(void ** data,apr_size_t * size,apr_size_t minimum_size,apr_pool_t * pool)74 membuf_ensure(void **data, apr_size_t *size,
75 apr_size_t minimum_size, apr_pool_t *pool)
76 {
77 if (minimum_size > *size)
78 {
79 apr_size_t new_size = *size;
80
81 if (new_size == 0)
82 new_size = minimum_size;
83 else
84 while (new_size < minimum_size)
85 {
86 const apr_size_t prev_size = new_size;
87 new_size *= 2;
88
89 /* check for apr_size_t overflow */
90 if (prev_size > new_size)
91 {
92 new_size = minimum_size;
93 break;
94 }
95 }
96
97 membuf_create(data, size, new_size, pool);
98 }
99 }
100
101 void
svn_membuf__create(svn_membuf_t * membuf,apr_size_t size,apr_pool_t * pool)102 svn_membuf__create(svn_membuf_t *membuf, apr_size_t size, apr_pool_t *pool)
103 {
104 membuf_create(&membuf->data, &membuf->size, size, pool);
105 membuf->pool = pool;
106 }
107
108 void
svn_membuf__ensure(svn_membuf_t * membuf,apr_size_t size)109 svn_membuf__ensure(svn_membuf_t *membuf, apr_size_t size)
110 {
111 membuf_ensure(&membuf->data, &membuf->size, size, membuf->pool);
112 }
113
114 void
svn_membuf__resize(svn_membuf_t * membuf,apr_size_t size)115 svn_membuf__resize(svn_membuf_t *membuf, apr_size_t size)
116 {
117 const void *const old_data = membuf->data;
118 const apr_size_t old_size = membuf->size;
119
120 membuf_ensure(&membuf->data, &membuf->size, size, membuf->pool);
121
122 /* If we re-allocated MEMBUF->DATA, it cannot be NULL.
123 * Statically initialized membuffers (OLD_DATA) may be NULL, though. */
124 if (old_data && old_data != membuf->data)
125 memcpy(membuf->data, old_data, old_size);
126 }
127
128 /* Always provide an out-of-line implementation of svn_membuf__zero */
129 #undef svn_membuf__zero
130 void
svn_membuf__zero(svn_membuf_t * membuf)131 svn_membuf__zero(svn_membuf_t *membuf)
132 {
133 SVN_MEMBUF__ZERO(membuf);
134 }
135
136 /* Always provide an out-of-line implementation of svn_membuf__nzero */
137 #undef svn_membuf__nzero
138 void
svn_membuf__nzero(svn_membuf_t * membuf,apr_size_t size)139 svn_membuf__nzero(svn_membuf_t *membuf, apr_size_t size)
140 {
141 SVN_MEMBUF__NZERO(membuf, size);
142 }
143
144 static APR_INLINE svn_boolean_t
string_compare(const char * str1,const char * str2,apr_size_t len1,apr_size_t len2)145 string_compare(const char *str1,
146 const char *str2,
147 apr_size_t len1,
148 apr_size_t len2)
149 {
150 /* easy way out :) */
151 if (len1 != len2)
152 return FALSE;
153
154 /* now the strings must have identical lengths */
155
156 if ((memcmp(str1, str2, len1)) == 0)
157 return TRUE;
158 else
159 return FALSE;
160 }
161
162 static APR_INLINE apr_size_t
string_first_non_whitespace(const char * str,apr_size_t len)163 string_first_non_whitespace(const char *str, apr_size_t len)
164 {
165 apr_size_t i;
166
167 for (i = 0; i < len; i++)
168 {
169 if (! svn_ctype_isspace(str[i]))
170 return i;
171 }
172
173 /* if we get here, then the string must be entirely whitespace */
174 return len;
175 }
176
177 static APR_INLINE apr_size_t
find_char_backward(const char * str,apr_size_t len,char ch)178 find_char_backward(const char *str, apr_size_t len, char ch)
179 {
180 apr_size_t i = len;
181
182 while (i != 0)
183 {
184 if (str[--i] == ch)
185 return i;
186 }
187
188 /* char was not found, return len */
189 return len;
190 }
191
192
193 /* svn_string functions */
194
195 /* Return a new svn_string_t object, allocated in POOL, initialized with
196 * DATA and SIZE. Do not copy the contents of DATA, just store the pointer.
197 * SIZE is the length in bytes of DATA, excluding the required NUL
198 * terminator. */
199 static svn_string_t *
create_string(const char * data,apr_size_t size,apr_pool_t * pool)200 create_string(const char *data, apr_size_t size,
201 apr_pool_t *pool)
202 {
203 svn_string_t *new_string;
204
205 new_string = apr_palloc(pool, sizeof(*new_string));
206
207 new_string->data = data;
208 new_string->len = size;
209
210 return new_string;
211 }
212
213 /* A data buffer for a zero-length string (just a null terminator). Many
214 * svn_string_t instances may share this same buffer. */
215 static const char empty_buffer[1] = {0};
216
217 svn_string_t *
svn_string_create_empty(apr_pool_t * pool)218 svn_string_create_empty(apr_pool_t *pool)
219 {
220 svn_string_t *new_string = apr_palloc(pool, sizeof(*new_string));
221 new_string->data = empty_buffer;
222 new_string->len = 0;
223
224 return new_string;
225 }
226
227
228 svn_string_t *
svn_string_ncreate(const char * bytes,apr_size_t size,apr_pool_t * pool)229 svn_string_ncreate(const char *bytes, apr_size_t size, apr_pool_t *pool)
230 {
231 void *mem;
232 char *data;
233 svn_string_t *new_string;
234
235 /* Allocate memory for svn_string_t and data in one chunk. */
236 mem = apr_palloc(pool, sizeof(*new_string) + size + 1);
237 data = (char*)mem + sizeof(*new_string);
238
239 new_string = mem;
240 new_string->data = data;
241 new_string->len = size;
242
243 /* If SIZE is 0, NULL is valid for BYTES. */
244 if (size)
245 memcpy(data, bytes, size);
246
247 /* Null termination is the convention -- even if we suspect the data
248 to be binary, it's not up to us to decide, it's the caller's
249 call. Heck, that's why they call it the caller! */
250 data[size] = '\0';
251
252 return new_string;
253 }
254
255
256 svn_string_t *
svn_string_create(const char * cstring,apr_pool_t * pool)257 svn_string_create(const char *cstring, apr_pool_t *pool)
258 {
259 return svn_string_ncreate(cstring, strlen(cstring), pool);
260 }
261
262
263 svn_string_t *
svn_string_create_from_buf(const svn_stringbuf_t * strbuf,apr_pool_t * pool)264 svn_string_create_from_buf(const svn_stringbuf_t *strbuf, apr_pool_t *pool)
265 {
266 return svn_string_ncreate(strbuf->data, strbuf->len, pool);
267 }
268
269
270 svn_string_t *
svn_string_createv(apr_pool_t * pool,const char * fmt,va_list ap)271 svn_string_createv(apr_pool_t *pool, const char *fmt, va_list ap)
272 {
273 char *data = apr_pvsprintf(pool, fmt, ap);
274
275 /* wrap an svn_string_t around the new data */
276 return create_string(data, strlen(data), pool);
277 }
278
279
280 svn_string_t *
svn_string_createf(apr_pool_t * pool,const char * fmt,...)281 svn_string_createf(apr_pool_t *pool, const char *fmt, ...)
282 {
283 svn_string_t *str;
284
285 va_list ap;
286 va_start(ap, fmt);
287 str = svn_string_createv(pool, fmt, ap);
288 va_end(ap);
289
290 return str;
291 }
292
293
294 svn_boolean_t
svn_string_isempty(const svn_string_t * str)295 svn_string_isempty(const svn_string_t *str)
296 {
297 return (str->len == 0);
298 }
299
300
301 svn_string_t *
svn_string_dup(const svn_string_t * original_string,apr_pool_t * pool)302 svn_string_dup(const svn_string_t *original_string, apr_pool_t *pool)
303 {
304 return (original_string ? svn_string_ncreate(original_string->data,
305 original_string->len, pool)
306 : NULL);
307 }
308
309
310
311 svn_boolean_t
svn_string_compare(const svn_string_t * str1,const svn_string_t * str2)312 svn_string_compare(const svn_string_t *str1, const svn_string_t *str2)
313 {
314 return
315 string_compare(str1->data, str2->data, str1->len, str2->len);
316 }
317
318
319
320 apr_size_t
svn_string_first_non_whitespace(const svn_string_t * str)321 svn_string_first_non_whitespace(const svn_string_t *str)
322 {
323 return
324 string_first_non_whitespace(str->data, str->len);
325 }
326
327
328 apr_size_t
svn_string_find_char_backward(const svn_string_t * str,char ch)329 svn_string_find_char_backward(const svn_string_t *str, char ch)
330 {
331 return find_char_backward(str->data, str->len, ch);
332 }
333
334 svn_string_t *
svn_stringbuf__morph_into_string(svn_stringbuf_t * strbuf)335 svn_stringbuf__morph_into_string(svn_stringbuf_t *strbuf)
336 {
337 /* In debug mode, detect attempts to modify the original STRBUF object.
338 */
339 #ifdef SVN_DEBUG
340 strbuf->pool = NULL;
341 strbuf->blocksize = strbuf->len + 1;
342 #endif
343
344 /* Both, svn_string_t and svn_stringbuf_t are public API structures
345 * since the svn epoch. Thus, we can rely on their precise layout not
346 * to change.
347 *
348 * It just so happens that svn_string_t is structurally equivalent
349 * to the (data, len) sub-set of svn_stringbuf_t. There is also no
350 * difference in alignment and padding. So, we can just re-interpret
351 * that part of STRBUF as a svn_string_t.
352 *
353 * However, since svn_string_t does not know about the blocksize
354 * member in svn_stringbuf_t, any attempt to re-size the returned
355 * svn_string_t might invalidate the STRBUF struct. Hence, we consider
356 * the source STRBUF "consumed".
357 *
358 * Modifying the string character content is fine, though.
359 */
360 return (svn_string_t *)&strbuf->data;
361 }
362
363
364
365 /* svn_stringbuf functions */
366
367 svn_stringbuf_t *
svn_stringbuf_create_empty(apr_pool_t * pool)368 svn_stringbuf_create_empty(apr_pool_t *pool)
369 {
370 return svn_stringbuf_create_ensure(0, pool);
371 }
372
373 svn_stringbuf_t *
svn_stringbuf_create_ensure(apr_size_t blocksize,apr_pool_t * pool)374 svn_stringbuf_create_ensure(apr_size_t blocksize, apr_pool_t *pool)
375 {
376 void *mem;
377 svn_stringbuf_t *new_string;
378
379 ++blocksize; /* + space for '\0' */
380
381 /* Allocate memory for svn_string_t and data in one chunk. */
382 membuf_create(&mem, &blocksize, blocksize + sizeof(*new_string), pool);
383
384 /* Initialize header and string */
385 new_string = mem;
386 new_string->data = (char*)mem + sizeof(*new_string);
387 new_string->data[0] = '\0';
388 new_string->len = 0;
389 new_string->blocksize = blocksize - sizeof(*new_string);
390 new_string->pool = pool;
391
392 return new_string;
393 }
394
395 svn_stringbuf_t *
svn_stringbuf_ncreate(const char * bytes,apr_size_t size,apr_pool_t * pool)396 svn_stringbuf_ncreate(const char *bytes, apr_size_t size, apr_pool_t *pool)
397 {
398 svn_stringbuf_t *strbuf = svn_stringbuf_create_ensure(size, pool);
399
400 /* If SIZE is 0, NULL is valid for BYTES. */
401 if (size)
402 memcpy(strbuf->data, bytes, size);
403
404 /* Null termination is the convention -- even if we suspect the data
405 to be binary, it's not up to us to decide, it's the caller's
406 call. Heck, that's why they call it the caller! */
407 strbuf->data[size] = '\0';
408 strbuf->len = size;
409
410 return strbuf;
411 }
412
413
414 svn_stringbuf_t *
svn_stringbuf_create(const char * cstring,apr_pool_t * pool)415 svn_stringbuf_create(const char *cstring, apr_pool_t *pool)
416 {
417 return svn_stringbuf_ncreate(cstring, strlen(cstring), pool);
418 }
419
420
421 svn_stringbuf_t *
svn_stringbuf_create_from_string(const svn_string_t * str,apr_pool_t * pool)422 svn_stringbuf_create_from_string(const svn_string_t *str, apr_pool_t *pool)
423 {
424 return svn_stringbuf_ncreate(str->data, str->len, pool);
425 }
426
427 svn_stringbuf_t *
svn_stringbuf_create_wrap(char * str,apr_pool_t * pool)428 svn_stringbuf_create_wrap(char *str, apr_pool_t *pool)
429 {
430 svn_stringbuf_t *result = apr_palloc(pool, sizeof(*result));
431 result->pool = pool;
432 result->data = str;
433 result->len = strlen(str);
434 result->blocksize = result->len + 1;
435
436 return result;
437 }
438
439 svn_stringbuf_t *
svn_stringbuf_createv(apr_pool_t * pool,const char * fmt,va_list ap)440 svn_stringbuf_createv(apr_pool_t *pool, const char *fmt, va_list ap)
441 {
442 char *data = apr_pvsprintf(pool, fmt, ap);
443 apr_size_t size = strlen(data);
444 svn_stringbuf_t *new_string;
445
446 new_string = apr_palloc(pool, sizeof(*new_string));
447 new_string->data = data;
448 new_string->len = size;
449 new_string->blocksize = size + 1;
450 new_string->pool = pool;
451
452 return new_string;
453 }
454
455
456 svn_stringbuf_t *
svn_stringbuf_createf(apr_pool_t * pool,const char * fmt,...)457 svn_stringbuf_createf(apr_pool_t *pool, const char *fmt, ...)
458 {
459 svn_stringbuf_t *str;
460
461 va_list ap;
462 va_start(ap, fmt);
463 str = svn_stringbuf_createv(pool, fmt, ap);
464 va_end(ap);
465
466 return str;
467 }
468
469
470 void
svn_stringbuf_fillchar(svn_stringbuf_t * str,unsigned char c)471 svn_stringbuf_fillchar(svn_stringbuf_t *str, unsigned char c)
472 {
473 memset(str->data, c, str->len);
474 }
475
476
477 void
svn_stringbuf_set(svn_stringbuf_t * str,const char * value)478 svn_stringbuf_set(svn_stringbuf_t *str, const char *value)
479 {
480 apr_size_t amt = strlen(value);
481
482 membuf_ensure((void**) &str->data, &str->blocksize, amt + 1, str->pool);
483 memcpy(str->data, value, amt + 1);
484 str->len = amt;
485 }
486
487 void
svn_stringbuf_setempty(svn_stringbuf_t * str)488 svn_stringbuf_setempty(svn_stringbuf_t *str)
489 {
490 str->data[0] = '\0';
491 str->len = 0;
492 }
493
494
495 void
svn_stringbuf_chop(svn_stringbuf_t * str,apr_size_t nbytes)496 svn_stringbuf_chop(svn_stringbuf_t *str, apr_size_t nbytes)
497 {
498 if (nbytes > str->len)
499 str->len = 0;
500 else
501 str->len -= nbytes;
502
503 str->data[str->len] = '\0';
504 }
505
506 void
svn_stringbuf_leftchop(svn_stringbuf_t * str,apr_size_t nbytes)507 svn_stringbuf_leftchop(svn_stringbuf_t *str, apr_size_t nbytes)
508 {
509 if (str->len == 0)
510 return;
511
512 if (nbytes >= str->len)
513 {
514 str->len = 0;
515 *str->data = '\0';
516 }
517 else
518 {
519 /* Note: This will irretrievably waste nbytes of space in the
520 stringbuf's pool, but unlike the alternative of memmoving the
521 data, it's a constant-time operation. */
522 str->data += nbytes;
523 str->len -= nbytes;
524 str->blocksize -= nbytes;
525 }
526 }
527
528 svn_boolean_t
svn_stringbuf_isempty(const svn_stringbuf_t * str)529 svn_stringbuf_isempty(const svn_stringbuf_t *str)
530 {
531 return (str->len == 0);
532 }
533
534
535 void
svn_stringbuf_ensure(svn_stringbuf_t * str,apr_size_t minimum_size)536 svn_stringbuf_ensure(svn_stringbuf_t *str, apr_size_t minimum_size)
537 {
538 void *mem = NULL;
539 ++minimum_size; /* + space for '\0' */
540
541 membuf_ensure(&mem, &str->blocksize, minimum_size, str->pool);
542 if (mem && mem != str->data)
543 {
544 if (str->data)
545 memcpy(mem, str->data, str->len + 1);
546 str->data = mem;
547 }
548 }
549
550
551 /* WARNING - Optimized code ahead!
552 * This function has been hand-tuned for performance. Please read
553 * the comments below before modifying the code.
554 */
555 void
svn_stringbuf_appendbyte(svn_stringbuf_t * str,char byte)556 svn_stringbuf_appendbyte(svn_stringbuf_t *str, char byte)
557 {
558 char *dest;
559 apr_size_t old_len = str->len;
560
561 /* In most cases, there will be pre-allocated memory left
562 * to just write the new byte at the end of the used section
563 * and terminate the string properly.
564 */
565 if (str->blocksize > old_len + 1)
566 {
567 /* The following read does not depend this write, so we
568 * can issue the write first to minimize register pressure:
569 * The value of old_len+1 is no longer needed; on most processors,
570 * dest[old_len+1] will be calculated implicitly as part of
571 * the addressing scheme.
572 */
573 str->len = old_len+1;
574
575 /* Since the compiler cannot be sure that *src->data and *src
576 * don't overlap, we read src->data *once* before writing
577 * to *src->data. Replacing dest with str->data would force
578 * the compiler to read it again after the first byte.
579 */
580 dest = str->data;
581
582 /* If not already available in a register as per ABI, load
583 * "byte" into the register (e.g. the one freed from old_len+1),
584 * then write it to the string buffer and terminate it properly.
585 *
586 * Including the "byte" fetch, all operations so far could be
587 * issued at once and be scheduled at the CPU's descression.
588 * Most likely, no-one will soon depend on the data that will be
589 * written in this function. So, no stalls there, either.
590 */
591 dest[old_len] = byte;
592 dest[old_len+1] = '\0';
593 }
594 else
595 {
596 /* we need to re-allocate the string buffer
597 * -> let the more generic implementation take care of that part
598 */
599
600 /* Depending on the ABI, "byte" is a register value. If we were
601 * to take its address directly, the compiler might decide to
602 * put in on the stack *unconditionally*, even if that would
603 * only be necessary for this block.
604 */
605 char b = byte;
606 svn_stringbuf_appendbytes(str, &b, 1);
607 }
608 }
609
610
611 void
svn_stringbuf_appendbytes(svn_stringbuf_t * str,const char * bytes,apr_size_t count)612 svn_stringbuf_appendbytes(svn_stringbuf_t *str, const char *bytes,
613 apr_size_t count)
614 {
615 apr_size_t total_len;
616 void *start_address;
617
618 if (!count)
619 /* Allow BYTES to be NULL by avoiding passing it to memcpy. */
620 return;
621
622 total_len = str->len + count; /* total size needed */
623
624 /* svn_stringbuf_ensure adds 1 for null terminator. */
625 svn_stringbuf_ensure(str, total_len);
626
627 /* get address 1 byte beyond end of original bytestring */
628 start_address = (str->data + str->len);
629
630 memcpy(start_address, bytes, count);
631 str->len = total_len;
632
633 str->data[str->len] = '\0'; /* We don't know if this is binary
634 data or not, but convention is
635 to null-terminate. */
636 }
637
638 void
svn_stringbuf_appendfill(svn_stringbuf_t * str,char byte,apr_size_t count)639 svn_stringbuf_appendfill(svn_stringbuf_t *str,
640 char byte,
641 apr_size_t count)
642 {
643 apr_size_t new_len = str->len + count;
644 svn_stringbuf_ensure(str, new_len);
645
646 memset(str->data + str->len, byte, count);
647
648 /* update buffer length and always NUL-terminate it */
649 str->len = new_len;
650 str->data[new_len] = '\0';
651 }
652
653
654 void
svn_stringbuf_appendstr(svn_stringbuf_t * targetstr,const svn_stringbuf_t * appendstr)655 svn_stringbuf_appendstr(svn_stringbuf_t *targetstr,
656 const svn_stringbuf_t *appendstr)
657 {
658 svn_stringbuf_appendbytes(targetstr, appendstr->data, appendstr->len);
659 }
660
661
662 void
svn_stringbuf_appendcstr(svn_stringbuf_t * targetstr,const char * cstr)663 svn_stringbuf_appendcstr(svn_stringbuf_t *targetstr, const char *cstr)
664 {
665 svn_stringbuf_appendbytes(targetstr, cstr, strlen(cstr));
666 }
667
668 void
svn_stringbuf_insert(svn_stringbuf_t * str,apr_size_t pos,const char * bytes,apr_size_t count)669 svn_stringbuf_insert(svn_stringbuf_t *str,
670 apr_size_t pos,
671 const char *bytes,
672 apr_size_t count)
673 {
674 /* For COUNT==0, we allow BYTES to be NULL. It's a no-op in that case. */
675 if (count == 0)
676 return;
677
678 /* special case: BYTES overlaps with this string -> copy the source */
679 if (bytes + count > str->data && bytes < str->data + str->blocksize)
680 bytes = apr_pmemdup(str->pool, bytes, count);
681
682 if (pos > str->len)
683 pos = str->len;
684
685 svn_stringbuf_ensure(str, str->len + count);
686 memmove(str->data + pos + count, str->data + pos, str->len - pos + 1);
687 memcpy(str->data + pos, bytes, count);
688
689 str->len += count;
690 }
691
692 void
svn_stringbuf_remove(svn_stringbuf_t * str,apr_size_t pos,apr_size_t count)693 svn_stringbuf_remove(svn_stringbuf_t *str,
694 apr_size_t pos,
695 apr_size_t count)
696 {
697 if (pos > str->len)
698 pos = str->len;
699 if (count > str->len - pos)
700 count = str->len - pos;
701
702 memmove(str->data + pos, str->data + pos + count, str->len - pos - count + 1);
703 str->len -= count;
704 }
705
706 void
svn_stringbuf_replace(svn_stringbuf_t * str,apr_size_t pos,apr_size_t old_count,const char * bytes,apr_size_t new_count)707 svn_stringbuf_replace(svn_stringbuf_t *str,
708 apr_size_t pos,
709 apr_size_t old_count,
710 const char *bytes,
711 apr_size_t new_count)
712 {
713 /* For COUNT==0, we allow BYTES to be NULL.
714 * In that case, this is just a substring removal. */
715 if (new_count == 0)
716 {
717 svn_stringbuf_remove(str, pos, old_count);
718 return;
719 }
720
721 /* special case: BYTES overlaps with this string -> copy the source */
722 if (bytes + new_count > str->data && bytes < str->data + str->blocksize)
723 bytes = apr_pmemdup(str->pool, bytes, new_count);
724
725 if (pos > str->len)
726 pos = str->len;
727 if (old_count > str->len - pos)
728 old_count = str->len - pos;
729
730 if (old_count < new_count)
731 {
732 apr_size_t delta = new_count - old_count;
733 svn_stringbuf_ensure(str, str->len + delta);
734 }
735
736 if (old_count != new_count)
737 memmove(str->data + pos + new_count, str->data + pos + old_count,
738 str->len - pos - old_count + 1);
739
740 memcpy(str->data + pos, bytes, new_count);
741 str->len += new_count - old_count;
742 }
743
744
745 apr_size_t
svn_stringbuf_replace_all(svn_stringbuf_t * str,const char * to_find,const char * replacement)746 svn_stringbuf_replace_all(svn_stringbuf_t *str,
747 const char *to_find,
748 const char *replacement)
749 {
750 apr_size_t replacements = 0;
751
752 apr_size_t current = 0;
753 apr_size_t original_length = str->len;
754
755 apr_size_t to_copy;
756 apr_size_t to_find_len;
757 apr_size_t replacement_len;
758 apr_size_t new_length;
759
760 /* Early exit. */
761 const char *pos = strstr(str->data, to_find);
762 if (pos == NULL)
763 return 0;
764
765 to_find_len = strlen(to_find);
766 replacement_len = strlen(replacement);
767
768 /* We will store the new contents behind the NUL terminator of the current
769 * data and track the total length in STR->LEN to make the reallocation
770 * code preserve both bits. However, we need to keep the NUL between them
771 * to make strstr stop at that boundary. */
772 ++str->len;
773
774 /* Find all occurrences of TO_FIND, copy the bits in between to the target,
775 * separated by REPLACEMENT. */
776 for ( ; pos; pos = strstr(str->data + current, to_find), ++replacements)
777 {
778 to_copy = pos - str->data - current;
779 svn_stringbuf_ensure(str, str->len + to_copy + replacement_len);
780
781 if (to_copy)
782 memcpy(str->data + str->len, str->data + current, to_copy);
783 current += to_copy + to_find_len;
784
785 str->len += to_copy;
786 memcpy(str->data + str->len, replacement, replacement_len);
787 str->len += replacement_len;
788 }
789
790 /* Copy remainder. */
791 to_copy = original_length - current;
792 if (to_copy)
793 {
794 svn_stringbuf_ensure(str, str->len + to_copy);
795 memcpy(str->data + str->len, str->data + current, to_copy);
796 str->len += to_copy;
797 }
798
799 /* Move new contents to the start of the buffer and terminate it. */
800 new_length = str->len - original_length - 1;
801 memmove(str->data, str->data + original_length + 1, new_length);
802 str->len = new_length;
803 str->data[new_length] = 0;
804
805 /* Done. */
806 return replacements;
807 }
808
809
810 svn_stringbuf_t *
svn_stringbuf_dup(const svn_stringbuf_t * original_string,apr_pool_t * pool)811 svn_stringbuf_dup(const svn_stringbuf_t *original_string, apr_pool_t *pool)
812 {
813 return (svn_stringbuf_ncreate(original_string->data,
814 original_string->len, pool));
815 }
816
817
818
819 svn_boolean_t
svn_stringbuf_compare(const svn_stringbuf_t * str1,const svn_stringbuf_t * str2)820 svn_stringbuf_compare(const svn_stringbuf_t *str1,
821 const svn_stringbuf_t *str2)
822 {
823 return string_compare(str1->data, str2->data, str1->len, str2->len);
824 }
825
826
827
828 apr_size_t
svn_stringbuf_first_non_whitespace(const svn_stringbuf_t * str)829 svn_stringbuf_first_non_whitespace(const svn_stringbuf_t *str)
830 {
831 return string_first_non_whitespace(str->data, str->len);
832 }
833
834
835 void
svn_stringbuf_strip_whitespace(svn_stringbuf_t * str)836 svn_stringbuf_strip_whitespace(svn_stringbuf_t *str)
837 {
838 /* Skip (hide) whitespace at the beginning of the string. */
839 if (svn_ctype_isspace(str->data[0]))
840 {
841 /* Find first non-whitespace character */
842 apr_size_t offset = string_first_non_whitespace(str->data + 1,
843 str->len - 1) + 1;
844
845 /* Go ahead! Waste some RAM, we've got pools! :) */
846 str->data += offset;
847 str->len -= offset;
848 str->blocksize -= offset;
849 }
850
851 /* Now that we've trimmed the front, trim the end, wasting more RAM. */
852 while ((str->len > 0) && svn_ctype_isspace(str->data[str->len - 1]))
853 str->len--;
854 str->data[str->len] = '\0';
855 }
856
857
858 apr_size_t
svn_stringbuf_find_char_backward(const svn_stringbuf_t * str,char ch)859 svn_stringbuf_find_char_backward(const svn_stringbuf_t *str, char ch)
860 {
861 return find_char_backward(str->data, str->len, ch);
862 }
863
864
865 svn_boolean_t
svn_string_compare_stringbuf(const svn_string_t * str1,const svn_stringbuf_t * str2)866 svn_string_compare_stringbuf(const svn_string_t *str1,
867 const svn_stringbuf_t *str2)
868 {
869 return string_compare(str1->data, str2->data, str1->len, str2->len);
870 }
871
872
873
874 /*** C string stuff. ***/
875
876 void
svn_cstring_split_append(apr_array_header_t * array,const char * input,const char * sep_chars,svn_boolean_t chop_whitespace,apr_pool_t * pool)877 svn_cstring_split_append(apr_array_header_t *array,
878 const char *input,
879 const char *sep_chars,
880 svn_boolean_t chop_whitespace,
881 apr_pool_t *pool)
882 {
883 char *pats;
884 char *p;
885
886 pats = apr_pstrdup(pool, input); /* strtok wants non-const data */
887 p = svn_cstring_tokenize(sep_chars, &pats);
888
889 while (p)
890 {
891 if (chop_whitespace)
892 {
893 while (svn_ctype_isspace(*p))
894 p++;
895
896 {
897 char *e = p + (strlen(p) - 1);
898 while ((e >= p) && (svn_ctype_isspace(*e)))
899 e--;
900 *(++e) = '\0';
901 }
902 }
903
904 if (p[0] != '\0')
905 APR_ARRAY_PUSH(array, const char *) = p;
906
907 p = svn_cstring_tokenize(sep_chars, &pats);
908 }
909
910 return;
911 }
912
913
914 apr_array_header_t *
svn_cstring_split(const char * input,const char * sep_chars,svn_boolean_t chop_whitespace,apr_pool_t * pool)915 svn_cstring_split(const char *input,
916 const char *sep_chars,
917 svn_boolean_t chop_whitespace,
918 apr_pool_t *pool)
919 {
920 apr_array_header_t *a = apr_array_make(pool, 5, sizeof(input));
921 svn_cstring_split_append(a, input, sep_chars, chop_whitespace, pool);
922 return a;
923 }
924
925
svn_cstring_match_glob_list(const char * str,const apr_array_header_t * list)926 svn_boolean_t svn_cstring_match_glob_list(const char *str,
927 const apr_array_header_t *list)
928 {
929 int i;
930
931 for (i = 0; i < list->nelts; i++)
932 {
933 const char *this_pattern = APR_ARRAY_IDX(list, i, char *);
934
935 if (apr_fnmatch(this_pattern, str, 0) == APR_SUCCESS)
936 return TRUE;
937 }
938
939 return FALSE;
940 }
941
942 svn_boolean_t
svn_cstring_match_list(const char * str,const apr_array_header_t * list)943 svn_cstring_match_list(const char *str, const apr_array_header_t *list)
944 {
945 int i;
946
947 for (i = 0; i < list->nelts; i++)
948 {
949 const char *this_str = APR_ARRAY_IDX(list, i, char *);
950
951 if (strcmp(this_str, str) == 0)
952 return TRUE;
953 }
954
955 return FALSE;
956 }
957
958 char *
svn_cstring_tokenize(const char * sep,char ** str)959 svn_cstring_tokenize(const char *sep, char **str)
960 {
961 char *token;
962 char *next;
963 char csep;
964
965 /* check parameters */
966 if ((sep == NULL) || (str == NULL) || (*str == NULL))
967 return NULL;
968
969 /* let APR handle edge cases and multiple separators */
970 csep = *sep;
971 if (csep == '\0' || sep[1] != '\0')
972 return apr_strtok(NULL, sep, str);
973
974 /* skip characters in sep (will terminate at '\0') */
975 token = *str;
976 while (*token == csep)
977 ++token;
978
979 if (!*token) /* no more tokens */
980 return NULL;
981
982 /* skip valid token characters to terminate token and
983 * prepare for the next call (will terminate at '\0)
984 */
985 next = strchr(token, csep);
986 if (next == NULL)
987 {
988 *str = token + strlen(token);
989 }
990 else
991 {
992 *next = '\0';
993 *str = next + 1;
994 }
995
996 return token;
997 }
998
svn_cstring_count_newlines(const char * msg)999 int svn_cstring_count_newlines(const char *msg)
1000 {
1001 int count = 0;
1002 const char *p;
1003
1004 for (p = msg; *p; p++)
1005 {
1006 if (*p == '\n')
1007 {
1008 count++;
1009 if (*(p + 1) == '\r')
1010 p++;
1011 }
1012 else if (*p == '\r')
1013 {
1014 count++;
1015 if (*(p + 1) == '\n')
1016 p++;
1017 }
1018 }
1019
1020 return count;
1021 }
1022
1023 char *
svn_cstring_join2(const apr_array_header_t * strings,const char * separator,svn_boolean_t trailing_separator,apr_pool_t * pool)1024 svn_cstring_join2(const apr_array_header_t *strings,
1025 const char *separator,
1026 svn_boolean_t trailing_separator,
1027 apr_pool_t *pool)
1028 {
1029 svn_stringbuf_t *new_str = svn_stringbuf_create_empty(pool);
1030 size_t sep_len = strlen(separator);
1031 int i;
1032
1033 for (i = 0; i < strings->nelts; i++)
1034 {
1035 const char *string = APR_ARRAY_IDX(strings, i, const char *);
1036 if (i > 0)
1037 svn_stringbuf_appendbytes(new_str, separator, sep_len);
1038 svn_stringbuf_appendbytes(new_str, string, strlen(string));
1039 }
1040
1041 if (strings->nelts > 0 && trailing_separator)
1042 svn_stringbuf_appendbytes(new_str, separator, sep_len);
1043
1044 return new_str->data;
1045 }
1046
1047 int
svn_cstring_casecmp(const char * str1,const char * str2)1048 svn_cstring_casecmp(const char *str1, const char *str2)
1049 {
1050 for (;;)
1051 {
1052 const int a = *str1++;
1053 const int b = *str2++;
1054 const int cmp = svn_ctype_casecmp(a, b);
1055 if (cmp || !a || !b)
1056 return cmp;
1057 }
1058 }
1059
1060 svn_error_t *
svn_cstring_strtoui64(apr_uint64_t * n,const char * str,apr_uint64_t minval,apr_uint64_t maxval,int base)1061 svn_cstring_strtoui64(apr_uint64_t *n, const char *str,
1062 apr_uint64_t minval, apr_uint64_t maxval,
1063 int base)
1064 {
1065 apr_int64_t val;
1066 char *endptr;
1067
1068 /* We assume errno is thread-safe. */
1069 errno = 0; /* APR-0.9 doesn't always set errno */
1070
1071 /* ### We're throwing away half the number range here.
1072 * ### APR needs a apr_strtoui64() function. */
1073 val = apr_strtoi64(str, &endptr, base);
1074 if (errno == EINVAL || endptr == str || str[0] == '\0' || *endptr != '\0')
1075 return svn_error_createf(SVN_ERR_INCORRECT_PARAMS, NULL,
1076 _("Could not convert '%s' into a number"),
1077 str);
1078 if ((errno == ERANGE && (val == APR_INT64_MIN || val == APR_INT64_MAX)) ||
1079 val < 0 || (apr_uint64_t)val < minval || (apr_uint64_t)val > maxval)
1080 /* ### Mark this for translation when gettext doesn't choke on macros. */
1081 return svn_error_createf(SVN_ERR_INCORRECT_PARAMS, NULL,
1082 "Number '%s' is out of range "
1083 "'[%" APR_UINT64_T_FMT ", %" APR_UINT64_T_FMT "]'",
1084 str, minval, maxval);
1085 *n = val;
1086 return SVN_NO_ERROR;
1087 }
1088
1089 svn_error_t *
svn_cstring_atoui64(apr_uint64_t * n,const char * str)1090 svn_cstring_atoui64(apr_uint64_t *n, const char *str)
1091 {
1092 return svn_error_trace(svn_cstring_strtoui64(n, str, 0,
1093 APR_UINT64_MAX, 10));
1094 }
1095
1096 svn_error_t *
svn_cstring_atoui(unsigned int * n,const char * str)1097 svn_cstring_atoui(unsigned int *n, const char *str)
1098 {
1099 apr_uint64_t val;
1100
1101 SVN_ERR(svn_cstring_strtoui64(&val, str, 0, APR_UINT32_MAX, 10));
1102 *n = (unsigned int)val;
1103 return SVN_NO_ERROR;
1104 }
1105
1106 svn_error_t *
svn_cstring_strtoi64(apr_int64_t * n,const char * str,apr_int64_t minval,apr_int64_t maxval,int base)1107 svn_cstring_strtoi64(apr_int64_t *n, const char *str,
1108 apr_int64_t minval, apr_int64_t maxval,
1109 int base)
1110 {
1111 apr_int64_t val;
1112 char *endptr;
1113
1114 /* We assume errno is thread-safe. */
1115 errno = 0; /* APR-0.9 doesn't always set errno */
1116
1117 val = apr_strtoi64(str, &endptr, base);
1118 if (errno == EINVAL || endptr == str || str[0] == '\0' || *endptr != '\0')
1119 return svn_error_createf(SVN_ERR_INCORRECT_PARAMS, NULL,
1120 _("Could not convert '%s' into a number"),
1121 str);
1122 if ((errno == ERANGE && (val == APR_INT64_MIN || val == APR_INT64_MAX)) ||
1123 val < minval || val > maxval)
1124 /* ### Mark this for translation when gettext doesn't choke on macros. */
1125 return svn_error_createf(SVN_ERR_INCORRECT_PARAMS, NULL,
1126 "Number '%s' is out of range "
1127 "'[%" APR_INT64_T_FMT ", %" APR_INT64_T_FMT "]'",
1128 str, minval, maxval);
1129 *n = val;
1130 return SVN_NO_ERROR;
1131 }
1132
1133 svn_error_t *
svn_cstring_atoi64(apr_int64_t * n,const char * str)1134 svn_cstring_atoi64(apr_int64_t *n, const char *str)
1135 {
1136 return svn_error_trace(svn_cstring_strtoi64(n, str, APR_INT64_MIN,
1137 APR_INT64_MAX, 10));
1138 }
1139
1140 svn_error_t *
svn_cstring_atoi(int * n,const char * str)1141 svn_cstring_atoi(int *n, const char *str)
1142 {
1143 apr_int64_t val;
1144
1145 SVN_ERR(svn_cstring_strtoi64(&val, str, APR_INT32_MIN, APR_INT32_MAX, 10));
1146 *n = (int)val;
1147 return SVN_NO_ERROR;
1148 }
1149
1150 unsigned long
svn__strtoul(const char * buffer,const char ** end)1151 svn__strtoul(const char* buffer, const char** end)
1152 {
1153 unsigned long result = 0;
1154
1155 /* this loop will execute in just 2 CPU cycles, confirmed by measurement:
1156 7 macro-ops (max 4 / cycle => 2 cycles)
1157 1 load (max 1 / cycle)
1158 1 jumps (compare + conditional jump == 1 macro op; max 1 / cycle)
1159 2 arithmetic ops (subtract, increment; max 3 / cycle)
1160 2 scale-and-add AGU ops (max 3 / cycle)
1161 1 compiler-generated move operation
1162 dependency chain: temp = result * 4 + result; result = temp * 2 + c
1163 (2 ops with latency 1 => 2 cycles)
1164 */
1165 while (1)
1166 {
1167 unsigned long c = (unsigned char)*buffer - (unsigned char)'0';
1168 if (c > 9)
1169 break;
1170
1171 result = result * 10 + c;
1172 ++buffer;
1173 }
1174
1175 *end = buffer;
1176 return result;
1177 }
1178
1179 /* "Precalculated" itoa values for 2 places (including leading zeros).
1180 * For maximum performance, make sure all table entries are word-aligned.
1181 */
1182 static const char decimal_table[100][4]
1183 = { "00", "01", "02", "03", "04", "05", "06", "07", "08", "09"
1184 , "10", "11", "12", "13", "14", "15", "16", "17", "18", "19"
1185 , "20", "21", "22", "23", "24", "25", "26", "27", "28", "29"
1186 , "30", "31", "32", "33", "34", "35", "36", "37", "38", "39"
1187 , "40", "41", "42", "43", "44", "45", "46", "47", "48", "49"
1188 , "50", "51", "52", "53", "54", "55", "56", "57", "58", "59"
1189 , "60", "61", "62", "63", "64", "65", "66", "67", "68", "69"
1190 , "70", "71", "72", "73", "74", "75", "76", "77", "78", "79"
1191 , "80", "81", "82", "83", "84", "85", "86", "87", "88", "89"
1192 , "90", "91", "92", "93", "94", "95", "96", "97", "98", "99"};
1193
1194 /* Copy the two bytes at SOURCE[0] and SOURCE[1] to DEST[0] and DEST[1] */
1195 #define COPY_TWO_BYTES(dest,source)\
1196 memcpy((dest), (source), 2)
1197
1198 apr_size_t
svn__ui64toa(char * dest,apr_uint64_t number)1199 svn__ui64toa(char * dest, apr_uint64_t number)
1200 {
1201 char buffer[SVN_INT64_BUFFER_SIZE];
1202 apr_uint32_t reduced; /* used for 32 bit DIV */
1203 char* target;
1204
1205 /* Small numbers are by far the most common case.
1206 * Therefore, we use special code.
1207 */
1208 if (number < 100)
1209 {
1210 if (number < 10)
1211 {
1212 dest[0] = (char)('0' + number);
1213 dest[1] = 0;
1214 return 1;
1215 }
1216 else
1217 {
1218 COPY_TWO_BYTES(dest, decimal_table[(apr_size_t)number]);
1219 dest[2] = 0;
1220 return 2;
1221 }
1222 }
1223
1224 /* Standard code. Write string in pairs of chars back-to-front */
1225 buffer[SVN_INT64_BUFFER_SIZE - 1] = 0;
1226 target = &buffer[SVN_INT64_BUFFER_SIZE - 3];
1227
1228 /* Loop may be executed 0 .. 2 times. */
1229 while (number >= 100000000)
1230 {
1231 /* Number is larger than 100^4, i.e. we can write 4x2 chars.
1232 * Also, use 32 bit DIVs as these are about twice as fast.
1233 */
1234 reduced = (apr_uint32_t)(number % 100000000);
1235 number /= 100000000;
1236
1237 COPY_TWO_BYTES(target - 0, decimal_table[reduced % 100]);
1238 reduced /= 100;
1239 COPY_TWO_BYTES(target - 2, decimal_table[reduced % 100]);
1240 reduced /= 100;
1241 COPY_TWO_BYTES(target - 4, decimal_table[reduced % 100]);
1242 reduced /= 100;
1243 COPY_TWO_BYTES(target - 6, decimal_table[reduced % 100]);
1244 target -= 8;
1245 }
1246
1247 /* Now, the number fits into 32 bits, but may still be larger than 99 */
1248 reduced = (apr_uint32_t)(number);
1249 while (reduced >= 100)
1250 {
1251 COPY_TWO_BYTES(target, decimal_table[reduced % 100]);
1252 reduced /= 100;
1253 target -= 2;
1254 }
1255
1256 /* The number is now smaller than 100 but larger than 1 */
1257 COPY_TWO_BYTES(target, decimal_table[reduced]);
1258
1259 /* Correction for uneven count of places. */
1260 if (reduced < 10)
1261 ++target;
1262
1263 /* Copy to target */
1264 memcpy(dest, target, &buffer[SVN_INT64_BUFFER_SIZE] - target);
1265 return &buffer[SVN_INT64_BUFFER_SIZE] - target - 1;
1266 }
1267
1268 apr_size_t
svn__i64toa(char * dest,apr_int64_t number)1269 svn__i64toa(char * dest, apr_int64_t number)
1270 {
1271 if (number >= 0)
1272 return svn__ui64toa(dest, (apr_uint64_t)number);
1273
1274 *dest = '-';
1275 return svn__ui64toa(dest + 1, 0 - (apr_uint64_t)number) + 1;
1276 }
1277
1278 static void
ui64toa_sep(apr_uint64_t number,char separator,char * buffer)1279 ui64toa_sep(apr_uint64_t number, char separator, char *buffer)
1280 {
1281 apr_size_t length = svn__ui64toa(buffer, number);
1282 apr_size_t i;
1283
1284 for (i = length; i > 3; i -= 3)
1285 {
1286 memmove(&buffer[i - 2], &buffer[i - 3], length - i + 3);
1287 buffer[i-3] = separator;
1288 length++;
1289 }
1290
1291 buffer[length] = 0;
1292 }
1293
1294 char *
svn__ui64toa_sep(apr_uint64_t number,char separator,apr_pool_t * pool)1295 svn__ui64toa_sep(apr_uint64_t number, char separator, apr_pool_t *pool)
1296 {
1297 char buffer[2 * SVN_INT64_BUFFER_SIZE];
1298 ui64toa_sep(number, separator, buffer);
1299
1300 return apr_pstrdup(pool, buffer);
1301 }
1302
1303 char *
svn__i64toa_sep(apr_int64_t number,char separator,apr_pool_t * pool)1304 svn__i64toa_sep(apr_int64_t number, char separator, apr_pool_t *pool)
1305 {
1306 char buffer[2 * SVN_INT64_BUFFER_SIZE];
1307 if (number < 0)
1308 {
1309 buffer[0] = '-';
1310 ui64toa_sep((apr_uint64_t)(-number), separator, &buffer[1]);
1311 }
1312 else
1313 ui64toa_sep((apr_uint64_t)(number), separator, buffer);
1314
1315 return apr_pstrdup(pool, buffer);
1316 }
1317
1318 apr_size_t
svn__ui64tobase36(char * dest,apr_uint64_t value)1319 svn__ui64tobase36(char *dest, apr_uint64_t value)
1320 {
1321 char *dest_start = dest;
1322 if (value < 10)
1323 {
1324 /* pretty frequent and trivial case. Make it fast. */
1325 *(dest++) = (char)(value) + '0';
1326 }
1327 else
1328 {
1329 char buffer[SVN_INT64_BUFFER_SIZE];
1330 char *p = buffer;
1331
1332 /* write result as little-endian to buffer */
1333 while (value > 0)
1334 {
1335 char c = (char)(value % 36);
1336 value /= 36;
1337
1338 *p = (c <= 9) ? (c + '0') : (c - 10 + 'a');
1339 ++p;
1340 }
1341
1342 /* copy as big-endian to DEST */
1343 while (p > buffer)
1344 *(dest++) = *(--p);
1345 }
1346
1347 *dest = '\0';
1348 return dest - dest_start;
1349 }
1350
1351 apr_uint64_t
svn__base36toui64(const char ** next,const char * source)1352 svn__base36toui64(const char **next, const char *source)
1353 {
1354 apr_uint64_t result = 0;
1355 apr_uint64_t factor = 1;
1356 int i = 0;
1357 char digits[SVN_INT64_BUFFER_SIZE];
1358
1359 /* convert digits to numerical values and count the number of places.
1360 * Also, prevent buffer overflow. */
1361 while (i < sizeof(digits))
1362 {
1363 char c = *source;
1364 if (c < 'a')
1365 {
1366 /* includes detection of NUL terminator */
1367 if (c < '0' || c > '9')
1368 break;
1369
1370 c -= '0';
1371 }
1372 else
1373 {
1374 if (c < 'a' || c > 'z')
1375 break;
1376
1377 c -= 'a' - 10;
1378 }
1379
1380 digits[i++] = c;
1381 source++;
1382 }
1383
1384 /* fold digits into the result */
1385 while (i > 0)
1386 {
1387 result += factor * (apr_uint64_t)digits[--i];
1388 factor *= 36;
1389 }
1390
1391 if (next)
1392 *next = source;
1393
1394 return result;
1395 }
1396
1397
1398 apr_size_t
svn_cstring__similarity(const char * stra,const char * strb,svn_membuf_t * buffer,apr_size_t * rlcs)1399 svn_cstring__similarity(const char *stra, const char *strb,
1400 svn_membuf_t *buffer, apr_size_t *rlcs)
1401 {
1402 svn_string_t stringa, stringb;
1403 stringa.data = stra;
1404 stringa.len = strlen(stra);
1405 stringb.data = strb;
1406 stringb.len = strlen(strb);
1407 return svn_string__similarity(&stringa, &stringb, buffer, rlcs);
1408 }
1409
1410 apr_size_t
svn_string__similarity(const svn_string_t * stringa,const svn_string_t * stringb,svn_membuf_t * buffer,apr_size_t * rlcs)1411 svn_string__similarity(const svn_string_t *stringa,
1412 const svn_string_t *stringb,
1413 svn_membuf_t *buffer, apr_size_t *rlcs)
1414 {
1415 const char *stra = stringa->data;
1416 const char *strb = stringb->data;
1417 const apr_size_t lena = stringa->len;
1418 const apr_size_t lenb = stringb->len;
1419 const apr_size_t total = lena + lenb;
1420 const char *enda = stra + lena;
1421 const char *endb = strb + lenb;
1422 apr_size_t lcs = 0;
1423
1424 /* Skip the common prefix ... */
1425 while (stra < enda && strb < endb && *stra == *strb)
1426 {
1427 ++stra; ++strb;
1428 ++lcs;
1429 }
1430
1431 /* ... and the common suffix */
1432 while (stra < enda && strb < endb)
1433 {
1434 --enda; --endb;
1435 if (*enda != *endb)
1436 {
1437 ++enda; ++endb;
1438 break;
1439 }
1440
1441 ++lcs;
1442 }
1443
1444 if (stra < enda && strb < endb)
1445 {
1446 const apr_size_t resta = enda - stra;
1447 const apr_size_t restb = endb - strb;
1448 const apr_size_t slots = (resta > restb ? restb : resta);
1449 apr_size_t *curr, *prev;
1450 const char *pstr;
1451
1452 /* The outer loop must iterate on the longer string. */
1453 if (resta < restb)
1454 {
1455 pstr = stra;
1456 stra = strb;
1457 strb = pstr;
1458
1459 pstr = enda;
1460 enda = endb;
1461 endb = pstr;
1462 }
1463
1464 /* Allocate two columns in the LCS matrix
1465 ### Optimize this to (slots + 2) instesd of 2 * (slots + 1) */
1466 svn_membuf__ensure(buffer, 2 * (slots + 1) * sizeof(apr_size_t));
1467 svn_membuf__nzero(buffer, (slots + 2) * sizeof(apr_size_t));
1468 prev = buffer->data;
1469 curr = prev + slots + 1;
1470
1471 /* Calculate LCS length of the remainder */
1472 for (pstr = stra; pstr < enda; ++pstr)
1473 {
1474 apr_size_t i;
1475 for (i = 1; i <= slots; ++i)
1476 {
1477 if (*pstr == strb[i-1])
1478 curr[i] = prev[i-1] + 1;
1479 else
1480 curr[i] = (curr[i-1] > prev[i] ? curr[i-1] : prev[i]);
1481 }
1482
1483 /* Swap the buffers, making the previous one current */
1484 {
1485 apr_size_t *const temp = prev;
1486 prev = curr;
1487 curr = temp;
1488 }
1489 }
1490
1491 lcs += prev[slots];
1492 }
1493
1494 if (rlcs)
1495 *rlcs = lcs;
1496
1497 /* Return similarity ratio rounded to 4 significant digits */
1498 if (total)
1499 return ((2 * SVN_STRING__SIM_RANGE_MAX * lcs + total/2) / total);
1500 else
1501 return SVN_STRING__SIM_RANGE_MAX;
1502 }
1503
1504 apr_size_t
svn_cstring__match_length(const char * a,const char * b,apr_size_t max_len)1505 svn_cstring__match_length(const char *a,
1506 const char *b,
1507 apr_size_t max_len)
1508 {
1509 apr_size_t pos = 0;
1510
1511 #if SVN_UNALIGNED_ACCESS_IS_OK
1512
1513 /* Chunky processing is so much faster ...
1514 *
1515 * We can't make this work on architectures that require aligned access
1516 * because A and B will probably have different alignment. So, skipping
1517 * the first few chars until alignment is reached is not an option.
1518 */
1519 for (; max_len - pos >= sizeof(apr_size_t); pos += sizeof(apr_size_t))
1520 if (*(const apr_size_t*)(a + pos) != *(const apr_size_t*)(b + pos))
1521 break;
1522
1523 #endif
1524
1525 for (; pos < max_len; ++pos)
1526 if (a[pos] != b[pos])
1527 break;
1528
1529 return pos;
1530 }
1531
1532 apr_size_t
svn_cstring__reverse_match_length(const char * a,const char * b,apr_size_t max_len)1533 svn_cstring__reverse_match_length(const char *a,
1534 const char *b,
1535 apr_size_t max_len)
1536 {
1537 apr_size_t pos = 0;
1538
1539 #if SVN_UNALIGNED_ACCESS_IS_OK
1540
1541 /* Chunky processing is so much faster ...
1542 *
1543 * We can't make this work on architectures that require aligned access
1544 * because A and B will probably have different alignment. So, skipping
1545 * the first few chars until alignment is reached is not an option.
1546 */
1547 for (pos = sizeof(apr_size_t); pos <= max_len; pos += sizeof(apr_size_t))
1548 if (*(const apr_size_t*)(a - pos) != *(const apr_size_t*)(b - pos))
1549 break;
1550
1551 pos -= sizeof(apr_size_t);
1552
1553 #endif
1554
1555 /* If we find a mismatch at -pos, pos-1 characters matched.
1556 */
1557 while (++pos <= max_len)
1558 if (a[0-pos] != b[0-pos])
1559 return pos - 1;
1560
1561 /* No mismatch found -> at least MAX_LEN matching chars.
1562 */
1563 return max_len;
1564 }
1565
1566 const char *
svn_cstring_skip_prefix(const char * str,const char * prefix)1567 svn_cstring_skip_prefix(const char *str, const char *prefix)
1568 {
1569 apr_size_t len = strlen(prefix);
1570
1571 if (strncmp(str, prefix, len) == 0)
1572 {
1573 return str + len;
1574 }
1575 else
1576 {
1577 return NULL;
1578 }
1579 }
1580