1 /* reps-strings.c : intepreting representations with respect to strings
2 *
3 * ====================================================================
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
19 * under the License.
20 * ====================================================================
21 */
22
23 #include <assert.h>
24
25 #include "svn_fs.h"
26 #include "svn_pools.h"
27
28 #include "fs.h"
29 #include "err.h"
30 #include "trail.h"
31 #include "reps-strings.h"
32
33 #include "bdb/reps-table.h"
34 #include "bdb/strings-table.h"
35
36 #include "../libsvn_fs/fs-loader.h"
37 #define SVN_WANT_BDB
38 #include "svn_private_config.h"
39
40
41 /*** Helper Functions ***/
42
43
44 /* Return non-zero iff REP is mutable under transaction TXN_ID. */
rep_is_mutable(representation_t * rep,const char * txn_id)45 static svn_boolean_t rep_is_mutable(representation_t *rep,
46 const char *txn_id)
47 {
48 if ((! rep->txn_id) || (strcmp(rep->txn_id, txn_id) != 0))
49 return FALSE;
50 return TRUE;
51 }
52
53 /* Helper macro that evaluates to an error message indicating that
54 the representation referred to by X has an unknown node kind. */
55 #define UNKNOWN_NODE_KIND(x) \
56 svn_error_createf \
57 (SVN_ERR_FS_CORRUPT, NULL, \
58 _("Unknown node kind for representation '%s'"), x)
59
60 /* Return a `fulltext' representation, allocated in POOL, which
61 * references the string STR_KEY.
62 *
63 * If TXN_ID is non-zero and non-NULL, make the representation mutable
64 * under that TXN_ID.
65 *
66 * If STR_KEY is non-null, copy it into an allocation from POOL.
67 *
68 * If MD5_CHECKSUM is non-null, use it as the MD5 checksum for the new
69 * rep; else initialize the rep with an all-zero (i.e., always
70 * successful) MD5 checksum.
71 *
72 * If SHA1_CHECKSUM is non-null, use it as the SHA1 checksum for the new
73 * rep; else initialize the rep with an all-zero (i.e., always
74 * successful) SHA1 checksum.
75 */
76 static representation_t *
make_fulltext_rep(const char * str_key,const char * txn_id,svn_checksum_t * md5_checksum,svn_checksum_t * sha1_checksum,apr_pool_t * pool)77 make_fulltext_rep(const char *str_key,
78 const char *txn_id,
79 svn_checksum_t *md5_checksum,
80 svn_checksum_t *sha1_checksum,
81 apr_pool_t *pool)
82
83 {
84 representation_t *rep = apr_pcalloc(pool, sizeof(*rep));
85 if (txn_id && *txn_id)
86 rep->txn_id = apr_pstrdup(pool, txn_id);
87 rep->kind = rep_kind_fulltext;
88 rep->md5_checksum = svn_checksum_dup(md5_checksum, pool);
89 rep->sha1_checksum = svn_checksum_dup(sha1_checksum, pool);
90 rep->contents.fulltext.string_key
91 = str_key ? apr_pstrdup(pool, str_key) : NULL;
92 return rep;
93 }
94
95
96 /* Set *KEYS to an array of string keys gleaned from `delta'
97 representation REP. Allocate *KEYS in POOL. */
98 static svn_error_t *
delta_string_keys(apr_array_header_t ** keys,const representation_t * rep,apr_pool_t * pool)99 delta_string_keys(apr_array_header_t **keys,
100 const representation_t *rep,
101 apr_pool_t *pool)
102 {
103 const char *key;
104 int i;
105 apr_array_header_t *chunks;
106
107 if (rep->kind != rep_kind_delta)
108 return svn_error_create
109 (SVN_ERR_FS_GENERAL, NULL,
110 _("Representation is not of type 'delta'"));
111
112 /* Set up a convenience variable. */
113 chunks = rep->contents.delta.chunks;
114
115 /* Initialize *KEYS to an empty array. */
116 *keys = apr_array_make(pool, chunks->nelts, sizeof(key));
117 if (! chunks->nelts)
118 return SVN_NO_ERROR;
119
120 /* Now, push the string keys for each window into *KEYS */
121 for (i = 0; i < chunks->nelts; i++)
122 {
123 rep_delta_chunk_t *chunk = APR_ARRAY_IDX(chunks, i, rep_delta_chunk_t *);
124
125 key = apr_pstrdup(pool, chunk->string_key);
126 APR_ARRAY_PUSH(*keys, const char *) = key;
127 }
128
129 return SVN_NO_ERROR;
130 }
131
132
133 /* Delete the strings associated with array KEYS in FS as part of TRAIL. */
134 static svn_error_t *
delete_strings(const apr_array_header_t * keys,svn_fs_t * fs,trail_t * trail,apr_pool_t * pool)135 delete_strings(const apr_array_header_t *keys,
136 svn_fs_t *fs,
137 trail_t *trail,
138 apr_pool_t *pool)
139 {
140 int i;
141 const char *str_key;
142 apr_pool_t *subpool = svn_pool_create(pool);
143
144 for (i = 0; i < keys->nelts; i++)
145 {
146 svn_pool_clear(subpool);
147 str_key = APR_ARRAY_IDX(keys, i, const char *);
148 SVN_ERR(svn_fs_bdb__string_delete(fs, str_key, trail, subpool));
149 }
150 svn_pool_destroy(subpool);
151 return SVN_NO_ERROR;
152 }
153
154
155
156 /*** Reading the contents from a representation. ***/
157
158 struct compose_handler_baton
159 {
160 /* The combined window, and the pool it's allocated from. */
161 svn_txdelta_window_t *window;
162 apr_pool_t *window_pool;
163
164 /* If the incoming window was self-compressed, and the combined WINDOW
165 exists from previous iterations, SOURCE_BUF will point to the
166 expanded self-compressed window. */
167 char *source_buf;
168
169 /* The trail for this operation. WINDOW_POOL will be a child of
170 TRAIL->pool. No allocations will be made from TRAIL->pool itself. */
171 trail_t *trail;
172
173 /* TRUE when no more windows have to be read/combined. */
174 svn_boolean_t done;
175
176 /* TRUE if we've just started reading a new window. We need this
177 because the svndiff handler will push a NULL window at the end of
178 the stream, and we have to ignore that; but we must also know
179 when it's appropriate to push a NULL window at the combiner. */
180 svn_boolean_t init;
181 };
182
183
184 /* Handle one window. If BATON is emtpy, copy the WINDOW into it;
185 otherwise, combine WINDOW with the one in BATON, unless WINDOW
186 is self-compressed (i.e., does not copy from the source view),
187 in which case expand. */
188
189 static svn_error_t *
compose_handler(svn_txdelta_window_t * window,void * baton)190 compose_handler(svn_txdelta_window_t *window, void *baton)
191 {
192 struct compose_handler_baton *cb = baton;
193 SVN_ERR_ASSERT(!cb->done || window == NULL);
194 SVN_ERR_ASSERT(cb->trail && cb->trail->pool);
195
196 if (!cb->init && !window)
197 return SVN_NO_ERROR;
198
199 /* We should never get here if we've already expanded a
200 self-compressed window. */
201 SVN_ERR_ASSERT(!cb->source_buf);
202
203 if (cb->window)
204 {
205 if (window && (window->sview_len == 0 || window->src_ops == 0))
206 {
207 /* This is a self-compressed window. Don't combine it with
208 the others, because the combiner may go quadratic. Instead,
209 expand it here and signal that the combination has
210 ended. */
211 apr_size_t source_len = window->tview_len;
212 SVN_ERR_ASSERT(cb->window->sview_len == source_len);
213 cb->source_buf = apr_palloc(cb->window_pool, source_len);
214 svn_txdelta_apply_instructions(window, NULL,
215 cb->source_buf, &source_len);
216 cb->done = TRUE;
217 }
218 else
219 {
220 /* Combine the incoming window with whatever's in the baton. */
221 apr_pool_t *composite_pool = svn_pool_create(cb->trail->pool);
222 svn_txdelta_window_t *composite;
223
224 composite = svn_txdelta_compose_windows(window, cb->window,
225 composite_pool);
226 svn_pool_destroy(cb->window_pool);
227 cb->window = composite;
228 cb->window_pool = composite_pool;
229 cb->done = (composite->sview_len == 0 || composite->src_ops == 0);
230 }
231 }
232 else if (window)
233 {
234 /* Copy the (first) window into the baton. */
235 apr_pool_t *window_pool = svn_pool_create(cb->trail->pool);
236 SVN_ERR_ASSERT(cb->window_pool == NULL);
237 cb->window = svn_txdelta_window_dup(window, window_pool);
238 cb->window_pool = window_pool;
239 cb->done = (window->sview_len == 0 || window->src_ops == 0);
240 }
241 else
242 cb->done = TRUE;
243
244 cb->init = FALSE;
245 return SVN_NO_ERROR;
246 }
247
248
249
250 /* Read one delta window from REP[CUR_CHUNK] and push it at the
251 composition handler. */
252
253 static svn_error_t *
get_one_window(struct compose_handler_baton * cb,svn_fs_t * fs,representation_t * rep,int cur_chunk)254 get_one_window(struct compose_handler_baton *cb,
255 svn_fs_t *fs,
256 representation_t *rep,
257 int cur_chunk)
258 {
259 svn_stream_t *wstream;
260 char diffdata[4096]; /* hunk of svndiff data */
261 svn_filesize_t off; /* offset into svndiff data */
262 apr_size_t amt; /* how much svndiff data to/was read */
263 const char *str_key;
264
265 apr_array_header_t *chunks = rep->contents.delta.chunks;
266 rep_delta_chunk_t *this_chunk, *first_chunk;
267
268 cb->init = TRUE;
269 if (chunks->nelts <= cur_chunk)
270 return compose_handler(NULL, cb);
271
272 /* Set up a window handling stream for the svndiff data. */
273 wstream = svn_txdelta_parse_svndiff(compose_handler, cb, TRUE,
274 cb->trail->pool);
275
276 /* First things first: send the "SVN"{version} header through the
277 stream. ### For now, we will just use the version specified
278 in the first chunk, and then verify that no chunks have a
279 different version number than the one used. In the future,
280 we might simply convert chunks that use a different version
281 of the diff format -- or, heck, a different format
282 altogether -- to the format/version of the first chunk. */
283 first_chunk = APR_ARRAY_IDX(chunks, 0, rep_delta_chunk_t*);
284 diffdata[0] = 'S';
285 diffdata[1] = 'V';
286 diffdata[2] = 'N';
287 diffdata[3] = (char) (first_chunk->version);
288 amt = 4;
289 SVN_ERR(svn_stream_write(wstream, diffdata, &amt));
290 /* FIXME: The stream write handler is borked; assert (amt == 4); */
291
292 /* Get this string key which holds this window's data.
293 ### todo: make sure this is an `svndiff' DIFF skel here. */
294 this_chunk = APR_ARRAY_IDX(chunks, cur_chunk, rep_delta_chunk_t*);
295 str_key = this_chunk->string_key;
296
297 /* Run through the svndiff data, at least as far as necessary. */
298 off = 0;
299 do
300 {
301 amt = sizeof(diffdata);
302 SVN_ERR(svn_fs_bdb__string_read(fs, str_key, diffdata,
303 off, &amt, cb->trail,
304 cb->trail->pool));
305 off += amt;
306 SVN_ERR(svn_stream_write(wstream, diffdata, &amt));
307 }
308 while (amt != 0);
309 SVN_ERR(svn_stream_close(wstream));
310
311 SVN_ERR_ASSERT(!cb->init);
312 SVN_ERR_ASSERT(cb->window != NULL);
313 SVN_ERR_ASSERT(cb->window_pool != NULL);
314 return SVN_NO_ERROR;
315 }
316
317
318 /* Undeltify a range of data. DELTAS is the set of delta windows to
319 combine, FULLTEXT is the source text, CUR_CHUNK is the index of the
320 delta chunk we're starting from. OFFSET is the relative offset of
321 the requested data within the chunk; BUF and LEN are what we're
322 undeltifying to. */
323
324 static svn_error_t *
rep_undeltify_range(svn_fs_t * fs,const apr_array_header_t * deltas,representation_t * fulltext,int cur_chunk,char * buf,apr_size_t offset,apr_size_t * len,trail_t * trail,apr_pool_t * pool)325 rep_undeltify_range(svn_fs_t *fs,
326 const apr_array_header_t *deltas,
327 representation_t *fulltext,
328 int cur_chunk,
329 char *buf,
330 apr_size_t offset,
331 apr_size_t *len,
332 trail_t *trail,
333 apr_pool_t *pool)
334 {
335 apr_size_t len_read = 0;
336
337 do
338 {
339 struct compose_handler_baton cb = { 0 };
340 char *source_buf, *target_buf;
341 apr_size_t target_len;
342 int cur_rep;
343
344 cb.trail = trail;
345 cb.done = FALSE;
346 for (cur_rep = 0; !cb.done && cur_rep < deltas->nelts; ++cur_rep)
347 {
348 representation_t *const rep =
349 APR_ARRAY_IDX(deltas, cur_rep, representation_t*);
350 SVN_ERR(get_one_window(&cb, fs, rep, cur_chunk));
351 }
352
353 if (!cb.window)
354 /* That's it, no more source data is available. */
355 break;
356
357 /* The source view length should not be 0 if there are source
358 copy ops in the window. */
359 SVN_ERR_ASSERT(cb.window->sview_len > 0 || cb.window->src_ops == 0);
360
361 /* cb.window is the combined delta window. Read the source text
362 into a buffer. */
363 if (cb.source_buf)
364 {
365 /* The combiner already created the source text from a
366 self-compressed window. */
367 source_buf = cb.source_buf;
368 }
369 else if (fulltext && cb.window->sview_len > 0 && cb.window->src_ops > 0)
370 {
371 apr_size_t source_len = cb.window->sview_len;
372 source_buf = apr_palloc(cb.window_pool, source_len);
373 SVN_ERR(svn_fs_bdb__string_read
374 (fs, fulltext->contents.fulltext.string_key,
375 source_buf, cb.window->sview_offset, &source_len,
376 trail, pool));
377 if (source_len != cb.window->sview_len)
378 return svn_error_create
379 (SVN_ERR_FS_CORRUPT, NULL,
380 _("Svndiff source length inconsistency"));
381 }
382 else
383 {
384 source_buf = NULL; /* Won't read anything from here. */
385 }
386
387 if (offset > 0)
388 {
389 target_len = *len - len_read + offset;
390 target_buf = apr_palloc(cb.window_pool, target_len);
391 }
392 else
393 {
394 target_len = *len - len_read;
395 target_buf = buf;
396 }
397
398 svn_txdelta_apply_instructions(cb.window, source_buf,
399 target_buf, &target_len);
400 if (offset > 0)
401 {
402 SVN_ERR_ASSERT(target_len > offset);
403 target_len -= offset;
404 memcpy(buf, target_buf + offset, target_len);
405 offset = 0; /* Read from the beginning of the next chunk. */
406 }
407 /* Don't need this window any more. */
408 svn_pool_destroy(cb.window_pool);
409
410 len_read += target_len;
411 buf += target_len;
412 ++cur_chunk;
413 }
414 while (len_read < *len);
415
416 *len = len_read;
417 return SVN_NO_ERROR;
418 }
419
420
421
422 /* Calculate the index of the chunk in REP that contains REP_OFFSET,
423 and find the relative CHUNK_OFFSET within the chunk.
424 Return -1 if offset is beyond the end of the represented data.
425 ### The basic assumption is that all delta windows are the same size
426 and aligned at the same offset, so this number is the same in all
427 dependent deltas. Oh, and the chunks in REP must be ordered. */
428
429 static int
get_chunk_offset(representation_t * rep,svn_filesize_t rep_offset,apr_size_t * chunk_offset)430 get_chunk_offset(representation_t *rep,
431 svn_filesize_t rep_offset,
432 apr_size_t *chunk_offset)
433 {
434 const apr_array_header_t *chunks = rep->contents.delta.chunks;
435 int cur_chunk;
436 assert(chunks->nelts);
437
438 /* ### Yes, this is a linear search. I'll change this to bisection
439 the very second we notice it's slowing us down. */
440 for (cur_chunk = 0; cur_chunk < chunks->nelts; ++cur_chunk)
441 {
442 const rep_delta_chunk_t *const this_chunk
443 = APR_ARRAY_IDX(chunks, cur_chunk, rep_delta_chunk_t*);
444
445 if ((this_chunk->offset + this_chunk->size) > rep_offset)
446 {
447 assert(this_chunk->offset <= rep_offset);
448 assert(rep_offset - this_chunk->offset < SVN_MAX_OBJECT_SIZE);
449 *chunk_offset = (apr_size_t) (rep_offset - this_chunk->offset);
450 return cur_chunk;
451 }
452 }
453
454 return -1;
455 }
456
457 /* Copy into BUF *LEN bytes starting at OFFSET from the string
458 represented via REP_KEY in FS, as part of TRAIL.
459 The number of bytes actually copied is stored in *LEN. */
460 static svn_error_t *
rep_read_range(svn_fs_t * fs,const char * rep_key,svn_filesize_t offset,char * buf,apr_size_t * len,trail_t * trail,apr_pool_t * pool)461 rep_read_range(svn_fs_t *fs,
462 const char *rep_key,
463 svn_filesize_t offset,
464 char *buf,
465 apr_size_t *len,
466 trail_t *trail,
467 apr_pool_t *pool)
468 {
469 representation_t *rep;
470 apr_size_t chunk_offset;
471
472 /* Read in our REP. */
473 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
474 if (rep->kind == rep_kind_fulltext)
475 {
476 SVN_ERR(svn_fs_bdb__string_read(fs, rep->contents.fulltext.string_key,
477 buf, offset, len, trail, pool));
478 }
479 else if (rep->kind == rep_kind_delta)
480 {
481 const int cur_chunk = get_chunk_offset(rep, offset, &chunk_offset);
482 if (cur_chunk < 0)
483 *len = 0;
484 else
485 {
486 svn_error_t *err;
487 /* Preserve for potential use in error message. */
488 const char *first_rep_key = rep_key;
489 /* Make a list of all the rep's we need to undeltify this range.
490 We'll have to read them within this trail anyway, so we might
491 as well do it once and up front. */
492 apr_array_header_t *reps = apr_array_make(pool, 30, sizeof(rep));
493 do
494 {
495 const rep_delta_chunk_t *const first_chunk
496 = APR_ARRAY_IDX(rep->contents.delta.chunks,
497 0, rep_delta_chunk_t*);
498 const rep_delta_chunk_t *const chunk
499 = APR_ARRAY_IDX(rep->contents.delta.chunks,
500 cur_chunk, rep_delta_chunk_t*);
501
502 /* Verify that this chunk is of the same version as the first. */
503 if (first_chunk->version != chunk->version)
504 return svn_error_createf
505 (SVN_ERR_FS_CORRUPT, NULL,
506 _("Diff version inconsistencies in representation '%s'"),
507 rep_key);
508
509 rep_key = chunk->rep_key;
510 APR_ARRAY_PUSH(reps, representation_t *) = rep;
511 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key,
512 trail, pool));
513 }
514 while (rep->kind == rep_kind_delta
515 && rep->contents.delta.chunks->nelts > cur_chunk);
516
517 /* Right. We've either just read the fulltext rep, or a rep that's
518 too short, in which case we'll undeltify without source data.*/
519 if (rep->kind != rep_kind_delta && rep->kind != rep_kind_fulltext)
520 return UNKNOWN_NODE_KIND(rep_key);
521
522 if (rep->kind == rep_kind_delta)
523 rep = NULL; /* Don't use source data */
524
525 err = rep_undeltify_range(fs, reps, rep, cur_chunk, buf,
526 chunk_offset, len, trail, pool);
527 if (err)
528 {
529 if (err->apr_err == SVN_ERR_FS_CORRUPT)
530 return svn_error_createf
531 (SVN_ERR_FS_CORRUPT, err,
532 _("Corruption detected whilst reading delta chain from "
533 "representation '%s' to '%s'"), first_rep_key, rep_key);
534 else
535 return svn_error_trace(err);
536 }
537 }
538 }
539 else /* unknown kind */
540 return UNKNOWN_NODE_KIND(rep_key);
541
542 return SVN_NO_ERROR;
543 }
544
545
546 svn_error_t *
svn_fs_base__get_mutable_rep(const char ** new_rep_key,const char * rep_key,svn_fs_t * fs,const char * txn_id,trail_t * trail,apr_pool_t * pool)547 svn_fs_base__get_mutable_rep(const char **new_rep_key,
548 const char *rep_key,
549 svn_fs_t *fs,
550 const char *txn_id,
551 trail_t *trail,
552 apr_pool_t *pool)
553 {
554 representation_t *rep = NULL;
555 const char *new_str = NULL;
556
557 /* We were passed an existing REP_KEY, so examine it. If it is
558 mutable already, then just return REP_KEY as the mutable result
559 key. */
560 if (rep_key && (rep_key[0] != '\0'))
561 {
562 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
563 if (rep_is_mutable(rep, txn_id))
564 {
565 *new_rep_key = rep_key;
566 return SVN_NO_ERROR;
567 }
568 }
569
570 /* Either we weren't provided a base key to examine, or the base key
571 we were provided was not mutable. So, let's make a new
572 representation and return its key to the caller. */
573 SVN_ERR(svn_fs_bdb__string_append(fs, &new_str, 0, NULL, trail, pool));
574 rep = make_fulltext_rep(new_str, txn_id,
575 svn_checksum_empty_checksum(svn_checksum_md5,
576 pool),
577 svn_checksum_empty_checksum(svn_checksum_sha1,
578 pool),
579 pool);
580 return svn_fs_bdb__write_new_rep(new_rep_key, fs, rep, trail, pool);
581 }
582
583
584 svn_error_t *
svn_fs_base__delete_rep_if_mutable(svn_fs_t * fs,const char * rep_key,const char * txn_id,trail_t * trail,apr_pool_t * pool)585 svn_fs_base__delete_rep_if_mutable(svn_fs_t *fs,
586 const char *rep_key,
587 const char *txn_id,
588 trail_t *trail,
589 apr_pool_t *pool)
590 {
591 representation_t *rep;
592
593 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
594 if (! rep_is_mutable(rep, txn_id))
595 return SVN_NO_ERROR;
596
597 if (rep->kind == rep_kind_fulltext)
598 {
599 SVN_ERR(svn_fs_bdb__string_delete(fs,
600 rep->contents.fulltext.string_key,
601 trail, pool));
602 }
603 else if (rep->kind == rep_kind_delta)
604 {
605 apr_array_header_t *keys;
606 SVN_ERR(delta_string_keys(&keys, rep, pool));
607 SVN_ERR(delete_strings(keys, fs, trail, pool));
608 }
609 else /* unknown kind */
610 return UNKNOWN_NODE_KIND(rep_key);
611
612 return svn_fs_bdb__delete_rep(fs, rep_key, trail, pool);
613 }
614
615
616
617 /*** Reading and writing data via representations. ***/
618
619 /** Reading. **/
620
621 struct rep_read_baton
622 {
623 /* The FS from which we're reading. */
624 svn_fs_t *fs;
625
626 /* The representation skel whose contents we want to read. If this
627 is NULL, the rep has never had any contents, so all reads fetch 0
628 bytes.
629
630 Formerly, we cached the entire rep skel here, not just the key.
631 That way we didn't have to fetch the rep from the db every time
632 we want to read a little bit more of the file. Unfortunately,
633 this has a problem: if, say, a file's representation changes
634 while we're reading (changes from fulltext to delta, for
635 example), we'll never know it. So for correctness, we now
636 refetch the representation skel every time we want to read
637 another chunk. */
638 const char *rep_key;
639
640 /* How many bytes have been read already. */
641 svn_filesize_t offset;
642
643 /* If present, the read will be done as part of this trail, and the
644 trail's pool will be used. Otherwise, see `pool' below. */
645 trail_t *trail;
646
647 /* MD5 checksum context. Initialized when the baton is created, updated as
648 we read data, and finalized when the stream is closed. */
649 svn_checksum_ctx_t *md5_checksum_ctx;
650
651 /* Final resting place of the checksum created by md5_checksum_cxt. */
652 svn_checksum_t *md5_checksum;
653
654 /* SHA1 checksum context. Initialized when the baton is created, updated as
655 we read data, and finalized when the stream is closed. */
656 svn_checksum_ctx_t *sha1_checksum_ctx;
657
658 /* Final resting place of the checksum created by sha1_checksum_cxt. */
659 svn_checksum_t *sha1_checksum;
660
661 /* The length of the rep's contents (as fulltext, that is,
662 independent of how the rep actually stores the data.) This is
663 retrieved when the baton is created, and used to determine when
664 we have read the last byte, at which point we compare checksums.
665
666 Getting this at baton creation time makes interleaved reads and
667 writes on the same rep in the same trail impossible. But we're
668 not doing that, and probably no one ever should. And anyway if
669 they do, they should see problems immediately. */
670 svn_filesize_t size;
671
672 /* Set to FALSE when the baton is created, TRUE when the checksum_ctx
673 is digestified. */
674 svn_boolean_t checksum_finalized;
675
676 /* Used for temporary allocations. This pool is cleared at the
677 start of each invocation of the relevant stream read function --
678 see rep_read_contents(). */
679 apr_pool_t *scratch_pool;
680
681 };
682
683
684 static svn_error_t *
rep_read_get_baton(struct rep_read_baton ** rb_p,svn_fs_t * fs,const char * rep_key,svn_boolean_t use_trail_for_reads,trail_t * trail,apr_pool_t * pool)685 rep_read_get_baton(struct rep_read_baton **rb_p,
686 svn_fs_t *fs,
687 const char *rep_key,
688 svn_boolean_t use_trail_for_reads,
689 trail_t *trail,
690 apr_pool_t *pool)
691 {
692 struct rep_read_baton *b;
693
694 b = apr_pcalloc(pool, sizeof(*b));
695 b->md5_checksum_ctx = svn_checksum_ctx_create(svn_checksum_md5, pool);
696 b->sha1_checksum_ctx = svn_checksum_ctx_create(svn_checksum_sha1, pool);
697
698 if (rep_key)
699 SVN_ERR(svn_fs_base__rep_contents_size(&(b->size), fs, rep_key,
700 trail, pool));
701 else
702 b->size = 0;
703
704 b->checksum_finalized = FALSE;
705 b->fs = fs;
706 b->trail = use_trail_for_reads ? trail : NULL;
707 b->scratch_pool = svn_pool_create(pool);
708 b->rep_key = rep_key;
709 b->offset = 0;
710
711 *rb_p = b;
712
713 return SVN_NO_ERROR;
714 }
715
716
717
718 /*** Retrieving data. ***/
719
720 svn_error_t *
svn_fs_base__rep_contents_size(svn_filesize_t * size_p,svn_fs_t * fs,const char * rep_key,trail_t * trail,apr_pool_t * pool)721 svn_fs_base__rep_contents_size(svn_filesize_t *size_p,
722 svn_fs_t *fs,
723 const char *rep_key,
724 trail_t *trail,
725 apr_pool_t *pool)
726 {
727 representation_t *rep;
728
729 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
730
731 if (rep->kind == rep_kind_fulltext)
732 {
733 /* Get the size by asking Berkeley for the string's length. */
734 SVN_ERR(svn_fs_bdb__string_size(size_p, fs,
735 rep->contents.fulltext.string_key,
736 trail, pool));
737 }
738 else if (rep->kind == rep_kind_delta)
739 {
740 /* Get the size by finding the last window pkg in the delta and
741 adding its offset to its size. This way, we won't even be
742 messed up by overlapping windows, as long as the window pkgs
743 are still ordered. */
744 apr_array_header_t *chunks = rep->contents.delta.chunks;
745 rep_delta_chunk_t *last_chunk;
746
747 SVN_ERR_ASSERT(chunks->nelts);
748
749 last_chunk = APR_ARRAY_IDX(chunks, chunks->nelts - 1,
750 rep_delta_chunk_t *);
751 *size_p = last_chunk->offset + last_chunk->size;
752 }
753 else /* unknown kind */
754 return UNKNOWN_NODE_KIND(rep_key);
755
756 return SVN_NO_ERROR;
757 }
758
759
760 svn_error_t *
svn_fs_base__rep_contents_checksums(svn_checksum_t ** md5_checksum,svn_checksum_t ** sha1_checksum,svn_fs_t * fs,const char * rep_key,trail_t * trail,apr_pool_t * pool)761 svn_fs_base__rep_contents_checksums(svn_checksum_t **md5_checksum,
762 svn_checksum_t **sha1_checksum,
763 svn_fs_t *fs,
764 const char *rep_key,
765 trail_t *trail,
766 apr_pool_t *pool)
767 {
768 representation_t *rep;
769
770 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
771 if (md5_checksum)
772 *md5_checksum = svn_checksum_dup(rep->md5_checksum, pool);
773 if (sha1_checksum)
774 *sha1_checksum = svn_checksum_dup(rep->sha1_checksum, pool);
775
776 return SVN_NO_ERROR;
777 }
778
779
780 svn_error_t *
svn_fs_base__rep_contents(svn_string_t * str,svn_fs_t * fs,const char * rep_key,trail_t * trail,apr_pool_t * pool)781 svn_fs_base__rep_contents(svn_string_t *str,
782 svn_fs_t *fs,
783 const char *rep_key,
784 trail_t *trail,
785 apr_pool_t *pool)
786 {
787 svn_filesize_t contents_size;
788 apr_size_t len;
789 char *data;
790
791 SVN_ERR(svn_fs_base__rep_contents_size(&contents_size, fs, rep_key,
792 trail, pool));
793
794 /* What if the contents are larger than we can handle? */
795 if (contents_size > SVN_MAX_OBJECT_SIZE)
796 return svn_error_createf
797 (SVN_ERR_FS_GENERAL, NULL,
798 _("Rep contents are too large: "
799 "got %s, limit is %s"),
800 apr_psprintf(pool, "%" SVN_FILESIZE_T_FMT, contents_size),
801 apr_psprintf(pool, "%" APR_SIZE_T_FMT, SVN_MAX_OBJECT_SIZE));
802 else
803 str->len = (apr_size_t) contents_size;
804
805 data = apr_palloc(pool, str->len);
806 str->data = data;
807 len = str->len;
808 SVN_ERR(rep_read_range(fs, rep_key, 0, data, &len, trail, pool));
809
810 /* Paranoia. */
811 if (len != str->len)
812 return svn_error_createf
813 (SVN_ERR_FS_CORRUPT, NULL,
814 _("Failure reading representation '%s'"), rep_key);
815
816 /* Just the standard paranoia. */
817 {
818 representation_t *rep;
819 svn_checksum_t *checksum, *rep_checksum;
820
821 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
822 rep_checksum = rep->sha1_checksum ? rep->sha1_checksum : rep->md5_checksum;
823 SVN_ERR(svn_checksum(&checksum, rep_checksum->kind, str->data, str->len,
824 pool));
825
826 if (! svn_checksum_match(checksum, rep_checksum))
827 return svn_error_create(SVN_ERR_FS_CORRUPT,
828 svn_checksum_mismatch_err(rep_checksum, checksum, pool,
829 _("Checksum mismatch on representation '%s'"),
830 rep_key),
831 NULL);
832 }
833
834 return SVN_NO_ERROR;
835 }
836
837
838 struct read_rep_args
839 {
840 struct rep_read_baton *rb; /* The data source. */
841 char *buf; /* Where to put what we read. */
842 apr_size_t *len; /* How much to read / was read. */
843 };
844
845
846 /* BATON is of type `read_rep_args':
847
848 Read into BATON->rb->buf the *(BATON->len) bytes starting at
849 BATON->rb->offset from the data represented at BATON->rb->rep_key
850 in BATON->rb->fs, as part of TRAIL.
851
852 Afterwards, *(BATON->len) is the number of bytes actually read, and
853 BATON->rb->offset is incremented by that amount.
854
855 If BATON->rb->rep_key is null, this is assumed to mean the file's
856 contents have no representation, i.e., the file has no contents.
857 In that case, if BATON->rb->offset > 0, return the error
858 SVN_ERR_FS_FILE_CONTENTS_CHANGED, else just set *(BATON->len) to
859 zero and return. */
860 static svn_error_t *
txn_body_read_rep(void * baton,trail_t * trail)861 txn_body_read_rep(void *baton, trail_t *trail)
862 {
863 struct read_rep_args *args = baton;
864
865 if (args->rb->rep_key)
866 {
867 SVN_ERR(rep_read_range(args->rb->fs,
868 args->rb->rep_key,
869 args->rb->offset,
870 args->buf,
871 args->len,
872 trail,
873 args->rb->scratch_pool));
874
875 args->rb->offset += *(args->len);
876
877 /* We calculate the checksum just once, the moment we see the
878 * last byte of data. But we can't assume there was a short
879 * read. The caller may have known the length of the data and
880 * requested exactly that amount, so there would never be a
881 * short read. (That's why the read baton has to know the
882 * length of the data in advance.)
883 *
884 * On the other hand, some callers invoke the stream reader in a
885 * loop whose termination condition is that the read returned
886 * zero bytes of data -- which usually results in the read
887 * function being called one more time *after* the call that got
888 * a short read (indicating end-of-stream).
889 *
890 * The conditions below ensure that we compare checksums even
891 * when there is no short read associated with the last byte of
892 * data, while also ensuring that it's harmless to repeatedly
893 * read 0 bytes from the stream.
894 */
895 if (! args->rb->checksum_finalized)
896 {
897 SVN_ERR(svn_checksum_update(args->rb->md5_checksum_ctx, args->buf,
898 *(args->len)));
899 SVN_ERR(svn_checksum_update(args->rb->sha1_checksum_ctx, args->buf,
900 *(args->len)));
901
902 if (args->rb->offset == args->rb->size)
903 {
904 representation_t *rep;
905
906 SVN_ERR(svn_checksum_final(&args->rb->md5_checksum,
907 args->rb->md5_checksum_ctx,
908 trail->pool));
909 SVN_ERR(svn_checksum_final(&args->rb->sha1_checksum,
910 args->rb->sha1_checksum_ctx,
911 trail->pool));
912 args->rb->checksum_finalized = TRUE;
913
914 SVN_ERR(svn_fs_bdb__read_rep(&rep, args->rb->fs,
915 args->rb->rep_key,
916 trail, trail->pool));
917
918 if (rep->md5_checksum
919 && (! svn_checksum_match(rep->md5_checksum,
920 args->rb->md5_checksum)))
921 return svn_error_create(SVN_ERR_FS_CORRUPT,
922 svn_checksum_mismatch_err(rep->md5_checksum,
923 args->rb->md5_checksum, trail->pool,
924 _("MD5 checksum mismatch on representation '%s'"),
925 args->rb->rep_key),
926 NULL);
927
928 if (rep->sha1_checksum
929 && (! svn_checksum_match(rep->sha1_checksum,
930 args->rb->sha1_checksum)))
931 return svn_error_createf(SVN_ERR_FS_CORRUPT,
932 svn_checksum_mismatch_err(rep->sha1_checksum,
933 args->rb->sha1_checksum, trail->pool,
934 _("SHA1 checksum mismatch on representation '%s'"),
935 args->rb->rep_key),
936 NULL);
937 }
938 }
939 }
940 else if (args->rb->offset > 0)
941 {
942 return
943 svn_error_create
944 (SVN_ERR_FS_REP_CHANGED, NULL,
945 _("Null rep, but offset past zero already"));
946 }
947 else
948 *(args->len) = 0;
949
950 return SVN_NO_ERROR;
951 }
952
953
954 static svn_error_t *
rep_read_contents(void * baton,char * buf,apr_size_t * len)955 rep_read_contents(void *baton, char *buf, apr_size_t *len)
956 {
957 struct rep_read_baton *rb = baton;
958 struct read_rep_args args;
959
960 /* Clear the scratch pool of the results of previous invocations. */
961 svn_pool_clear(rb->scratch_pool);
962
963 args.rb = rb;
964 args.buf = buf;
965 args.len = len;
966
967 /* If we got a trail, use it; else make one. */
968 if (rb->trail)
969 SVN_ERR(txn_body_read_rep(&args, rb->trail));
970 else
971 {
972 /* In the case of reading from the db, any returned data should
973 live in our pre-allocated buffer, so the whole operation can
974 happen within a single malloc/free cycle. This prevents us
975 from creating millions of unnecessary trail subpools when
976 reading a big file. */
977 SVN_ERR(svn_fs_base__retry_txn(rb->fs,
978 txn_body_read_rep,
979 &args,
980 TRUE,
981 rb->scratch_pool));
982 }
983 return SVN_NO_ERROR;
984 }
985
986
987 /** Writing. **/
988
989
990 struct rep_write_baton
991 {
992 /* The FS in which we're writing. */
993 svn_fs_t *fs;
994
995 /* The representation skel whose contents we want to write. */
996 const char *rep_key;
997
998 /* The transaction id under which this write action will take
999 place. */
1000 const char *txn_id;
1001
1002 /* If present, do the write as part of this trail, and use trail's
1003 pool. Otherwise, see `pool' below. */
1004 trail_t *trail;
1005
1006 /* SHA1 and MD5 checksums. Initialized when the baton is created,
1007 updated as we write data, and finalized and stored when the
1008 stream is closed. */
1009 svn_checksum_ctx_t *md5_checksum_ctx;
1010 svn_checksum_t *md5_checksum;
1011 svn_checksum_ctx_t *sha1_checksum_ctx;
1012 svn_checksum_t *sha1_checksum;
1013 svn_boolean_t finalized;
1014
1015 /* Used for temporary allocations, iff `trail' (above) is null. */
1016 apr_pool_t *pool;
1017
1018 };
1019
1020
1021 static struct rep_write_baton *
rep_write_get_baton(svn_fs_t * fs,const char * rep_key,const char * txn_id,trail_t * trail,apr_pool_t * pool)1022 rep_write_get_baton(svn_fs_t *fs,
1023 const char *rep_key,
1024 const char *txn_id,
1025 trail_t *trail,
1026 apr_pool_t *pool)
1027 {
1028 struct rep_write_baton *b;
1029
1030 b = apr_pcalloc(pool, sizeof(*b));
1031 b->md5_checksum_ctx = svn_checksum_ctx_create(svn_checksum_md5, pool);
1032 b->sha1_checksum_ctx = svn_checksum_ctx_create(svn_checksum_sha1, pool);
1033 b->fs = fs;
1034 b->trail = trail;
1035 b->pool = pool;
1036 b->rep_key = rep_key;
1037 b->txn_id = txn_id;
1038 return b;
1039 }
1040
1041
1042
1043 /* Write LEN bytes from BUF into the end of the string represented via
1044 REP_KEY in FS, as part of TRAIL. If the representation is not
1045 mutable, return the error SVN_FS_REP_NOT_MUTABLE. */
1046 static svn_error_t *
rep_write(svn_fs_t * fs,const char * rep_key,const char * buf,apr_size_t len,const char * txn_id,trail_t * trail,apr_pool_t * pool)1047 rep_write(svn_fs_t *fs,
1048 const char *rep_key,
1049 const char *buf,
1050 apr_size_t len,
1051 const char *txn_id,
1052 trail_t *trail,
1053 apr_pool_t *pool)
1054 {
1055 representation_t *rep;
1056
1057 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
1058
1059 if (! rep_is_mutable(rep, txn_id))
1060 return svn_error_createf
1061 (SVN_ERR_FS_REP_NOT_MUTABLE, NULL,
1062 _("Rep '%s' is not mutable"), rep_key);
1063
1064 if (rep->kind == rep_kind_fulltext)
1065 {
1066 SVN_ERR(svn_fs_bdb__string_append
1067 (fs, &(rep->contents.fulltext.string_key), len, buf,
1068 trail, pool));
1069 }
1070 else if (rep->kind == rep_kind_delta)
1071 {
1072 /* There should never be a case when we have a mutable
1073 non-fulltext rep. The only code that creates mutable reps is
1074 in this file, and it creates them fulltext. */
1075 return svn_error_createf
1076 (SVN_ERR_FS_CORRUPT, NULL,
1077 _("Rep '%s' both mutable and non-fulltext"), rep_key);
1078 }
1079 else /* unknown kind */
1080 return UNKNOWN_NODE_KIND(rep_key);
1081
1082 return SVN_NO_ERROR;
1083 }
1084
1085
1086 struct write_rep_args
1087 {
1088 struct rep_write_baton *wb; /* Destination. */
1089 const char *buf; /* Data. */
1090 apr_size_t len; /* How much to write. */
1091 };
1092
1093
1094 /* BATON is of type `write_rep_args':
1095 Append onto BATON->wb->rep_key's contents BATON->len bytes of
1096 data from BATON->wb->buf, in BATON->rb->fs, as part of TRAIL.
1097
1098 If the representation is not mutable, return the error
1099 SVN_FS_REP_NOT_MUTABLE. */
1100 static svn_error_t *
txn_body_write_rep(void * baton,trail_t * trail)1101 txn_body_write_rep(void *baton, trail_t *trail)
1102 {
1103 struct write_rep_args *args = baton;
1104
1105 SVN_ERR(rep_write(args->wb->fs,
1106 args->wb->rep_key,
1107 args->buf,
1108 args->len,
1109 args->wb->txn_id,
1110 trail,
1111 trail->pool));
1112 SVN_ERR(svn_checksum_update(args->wb->md5_checksum_ctx,
1113 args->buf, args->len));
1114 SVN_ERR(svn_checksum_update(args->wb->sha1_checksum_ctx,
1115 args->buf, args->len));
1116 return SVN_NO_ERROR;
1117 }
1118
1119
1120 static svn_error_t *
rep_write_contents(void * baton,const char * buf,apr_size_t * len)1121 rep_write_contents(void *baton,
1122 const char *buf,
1123 apr_size_t *len)
1124 {
1125 struct rep_write_baton *wb = baton;
1126 struct write_rep_args args;
1127
1128 /* We toss LEN's indirectness because if not all the bytes are
1129 written, it's an error, so we wouldn't be reporting anything back
1130 through *LEN anyway. */
1131 args.wb = wb;
1132 args.buf = buf;
1133 args.len = *len;
1134
1135 /* If we got a trail, use it; else make one. */
1136 if (wb->trail)
1137 SVN_ERR(txn_body_write_rep(&args, wb->trail));
1138 else
1139 {
1140 /* In the case of simply writing the rep to the db, we're
1141 *certain* that there's no data coming back to us that needs
1142 to be preserved... so the whole operation can happen within a
1143 single malloc/free cycle. This prevents us from creating
1144 millions of unnecessary trail subpools when writing a big
1145 file. */
1146 SVN_ERR(svn_fs_base__retry_txn(wb->fs,
1147 txn_body_write_rep,
1148 &args,
1149 TRUE,
1150 wb->pool));
1151 }
1152
1153 return SVN_NO_ERROR;
1154 }
1155
1156
1157 /* Helper for rep_write_close_contents(); see that doc string for
1158 more. BATON is of type `struct rep_write_baton'. */
1159 static svn_error_t *
txn_body_write_close_rep(void * baton,trail_t * trail)1160 txn_body_write_close_rep(void *baton, trail_t *trail)
1161 {
1162 struct rep_write_baton *wb = baton;
1163 representation_t *rep;
1164
1165 SVN_ERR(svn_fs_bdb__read_rep(&rep, wb->fs, wb->rep_key,
1166 trail, trail->pool));
1167 rep->md5_checksum = svn_checksum_dup(wb->md5_checksum, trail->pool);
1168 rep->sha1_checksum = svn_checksum_dup(wb->sha1_checksum, trail->pool);
1169 return svn_fs_bdb__write_rep(wb->fs, wb->rep_key, rep,
1170 trail, trail->pool);
1171 }
1172
1173
1174 /* BATON is of type `struct rep_write_baton'.
1175 *
1176 * Finalize BATON->md5_context and store the resulting digest under
1177 * BATON->rep_key.
1178 */
1179 static svn_error_t *
rep_write_close_contents(void * baton)1180 rep_write_close_contents(void *baton)
1181 {
1182 struct rep_write_baton *wb = baton;
1183
1184 /* ### Thought: if we fixed apr-util MD5 contexts to allow repeated
1185 digestification, then we wouldn't need a stream close function at
1186 all -- instead, we could update the stored checksum each time a
1187 write occurred, which would have the added advantage of making
1188 interleaving reads and writes work. Currently, they'd fail with
1189 a checksum mismatch, it just happens that our code never tries to
1190 do that anyway. */
1191
1192 if (! wb->finalized)
1193 {
1194 SVN_ERR(svn_checksum_final(&wb->md5_checksum, wb->md5_checksum_ctx,
1195 wb->pool));
1196 SVN_ERR(svn_checksum_final(&wb->sha1_checksum, wb->sha1_checksum_ctx,
1197 wb->pool));
1198 wb->finalized = TRUE;
1199 }
1200
1201 /* If we got a trail, use it; else make one. */
1202 if (wb->trail)
1203 return txn_body_write_close_rep(wb, wb->trail);
1204 else
1205 /* We need to keep our trail pool around this time so the
1206 checksums we've calculated survive. */
1207 return svn_fs_base__retry_txn(wb->fs, txn_body_write_close_rep,
1208 wb, FALSE, wb->pool);
1209 }
1210
1211
1212 /** Public read and write stream constructors. **/
1213
1214 svn_error_t *
svn_fs_base__rep_contents_read_stream(svn_stream_t ** rs_p,svn_fs_t * fs,const char * rep_key,svn_boolean_t use_trail_for_reads,trail_t * trail,apr_pool_t * pool)1215 svn_fs_base__rep_contents_read_stream(svn_stream_t **rs_p,
1216 svn_fs_t *fs,
1217 const char *rep_key,
1218 svn_boolean_t use_trail_for_reads,
1219 trail_t *trail,
1220 apr_pool_t *pool)
1221 {
1222 struct rep_read_baton *rb;
1223
1224 SVN_ERR(rep_read_get_baton(&rb, fs, rep_key, use_trail_for_reads,
1225 trail, pool));
1226 *rs_p = svn_stream_create(rb, pool);
1227 svn_stream_set_read2(*rs_p, NULL /* only full read support */,
1228 rep_read_contents);
1229
1230 return SVN_NO_ERROR;
1231 }
1232
1233
1234 /* Clear the contents of REP_KEY, so that it represents the empty
1235 string, as part of TRAIL. TXN_ID is the id of the Subversion
1236 transaction under which this occurs. If REP_KEY is not mutable,
1237 return the error SVN_ERR_FS_REP_NOT_MUTABLE. */
1238 static svn_error_t *
rep_contents_clear(svn_fs_t * fs,const char * rep_key,const char * txn_id,trail_t * trail,apr_pool_t * pool)1239 rep_contents_clear(svn_fs_t *fs,
1240 const char *rep_key,
1241 const char *txn_id,
1242 trail_t *trail,
1243 apr_pool_t *pool)
1244 {
1245 representation_t *rep;
1246 const char *str_key;
1247
1248 SVN_ERR(svn_fs_bdb__read_rep(&rep, fs, rep_key, trail, pool));
1249
1250 /* Make sure it's mutable. */
1251 if (! rep_is_mutable(rep, txn_id))
1252 return svn_error_createf
1253 (SVN_ERR_FS_REP_NOT_MUTABLE, NULL,
1254 _("Rep '%s' is not mutable"), rep_key);
1255
1256 SVN_ERR_ASSERT(rep->kind == rep_kind_fulltext);
1257
1258 /* If rep has no string, just return success. Else, clear the
1259 underlying string. */
1260 str_key = rep->contents.fulltext.string_key;
1261 if (str_key && *str_key)
1262 {
1263 SVN_ERR(svn_fs_bdb__string_clear(fs, str_key, trail, pool));
1264 rep->md5_checksum = NULL;
1265 rep->sha1_checksum = NULL;
1266 SVN_ERR(svn_fs_bdb__write_rep(fs, rep_key, rep, trail, pool));
1267 }
1268 return SVN_NO_ERROR;
1269 }
1270
1271
1272 svn_error_t *
svn_fs_base__rep_contents_write_stream(svn_stream_t ** ws_p,svn_fs_t * fs,const char * rep_key,const char * txn_id,svn_boolean_t use_trail_for_writes,trail_t * trail,apr_pool_t * pool)1273 svn_fs_base__rep_contents_write_stream(svn_stream_t **ws_p,
1274 svn_fs_t *fs,
1275 const char *rep_key,
1276 const char *txn_id,
1277 svn_boolean_t use_trail_for_writes,
1278 trail_t *trail,
1279 apr_pool_t *pool)
1280 {
1281 struct rep_write_baton *wb;
1282
1283 /* Clear the current rep contents (free mutability check!). */
1284 SVN_ERR(rep_contents_clear(fs, rep_key, txn_id, trail, pool));
1285
1286 /* Now, generate the write baton and stream. */
1287 wb = rep_write_get_baton(fs, rep_key, txn_id,
1288 use_trail_for_writes ? trail : NULL, pool);
1289 *ws_p = svn_stream_create(wb, pool);
1290 svn_stream_set_write(*ws_p, rep_write_contents);
1291 svn_stream_set_close(*ws_p, rep_write_close_contents);
1292
1293 return SVN_NO_ERROR;
1294 }
1295
1296
1297
1298 /*** Deltified storage. ***/
1299
1300 /* Baton for svn_write_fn_t write_string_set(). */
1301 struct write_svndiff_strings_baton
1302 {
1303 /* The fs where lives the string we're writing. */
1304 svn_fs_t *fs;
1305
1306 /* The key of the string we're writing to. Typically this is
1307 initialized to NULL, so svn_fs_base__string_append() can fill in a
1308 value. */
1309 const char *key;
1310
1311 /* The amount of txdelta data written to the current
1312 string-in-progress. */
1313 apr_size_t size;
1314
1315 /* The amount of svndiff header information we've written thus far
1316 to the strings table. */
1317 apr_size_t header_read;
1318
1319 /* The version number of the svndiff data written. ### You'd better
1320 not count on this being populated after the first chunk is sent
1321 through the interface, since it lives at the 4th byte of the
1322 stream. */
1323 apr_byte_t version;
1324
1325 /* The trail we're writing in. */
1326 trail_t *trail;
1327
1328 };
1329
1330
1331 /* Function of type `svn_write_fn_t', for writing to a collection of
1332 strings; BATON is `struct write_svndiff_strings_baton *'.
1333
1334 On the first call, BATON->key is null. A new string key in
1335 BATON->fs is chosen and stored in BATON->key; each call appends
1336 *LEN bytes from DATA onto the string. *LEN is never changed; if
1337 the write fails to write all *LEN bytes, an error is returned.
1338 BATON->size is used to track the total amount of data written via
1339 this handler, and must be reset by the caller to 0 when appropriate. */
1340 static svn_error_t *
write_svndiff_strings(void * baton,const char * data,apr_size_t * len)1341 write_svndiff_strings(void *baton, const char *data, apr_size_t *len)
1342 {
1343 struct write_svndiff_strings_baton *wb = baton;
1344 const char *buf = data;
1345 apr_size_t nheader = 0;
1346
1347 /* If we haven't stripped all the header information from this
1348 stream yet, keep stripping. If someone sends a first window
1349 through here that's shorter than 4 bytes long, this will probably
1350 cause a nuclear reactor meltdown somewhere in the American
1351 midwest. */
1352 if (wb->header_read < 4)
1353 {
1354 nheader = 4 - wb->header_read;
1355 *len -= nheader;
1356 buf += nheader;
1357 wb->header_read += nheader;
1358
1359 /* If we have *now* read the full 4-byte header, check that
1360 least byte for the version number of the svndiff format. */
1361 if (wb->header_read == 4)
1362 wb->version = *(buf - 1);
1363 }
1364
1365 /* Append to the current string we're writing (or create a new one
1366 if WB->key is NULL). */
1367 SVN_ERR(svn_fs_bdb__string_append(wb->fs, &(wb->key), *len,
1368 buf, wb->trail, wb->trail->pool));
1369
1370 /* Make sure we (still) have a key. */
1371 if (wb->key == NULL)
1372 return svn_error_create(SVN_ERR_FS_GENERAL, NULL,
1373 _("Failed to get new string key"));
1374
1375 /* Restore *LEN to the value it *would* have been were it not for
1376 header stripping. */
1377 *len += nheader;
1378
1379 /* Increment our running total of bytes written to this string. */
1380 wb->size += *len;
1381
1382 return SVN_NO_ERROR;
1383 }
1384
1385
1386 typedef struct window_write_t
1387 {
1388 const char *key; /* string key for this window */
1389 apr_size_t svndiff_len; /* amount of svndiff data written to the string */
1390 svn_filesize_t text_off; /* offset of fulltext represented by this window */
1391 apr_size_t text_len; /* amount of fulltext data represented by this window */
1392
1393 } window_write_t;
1394
1395
1396 svn_error_t *
svn_fs_base__rep_deltify(svn_fs_t * fs,const char * target,const char * source,trail_t * trail,apr_pool_t * pool)1397 svn_fs_base__rep_deltify(svn_fs_t *fs,
1398 const char *target,
1399 const char *source,
1400 trail_t *trail,
1401 apr_pool_t *pool)
1402 {
1403 base_fs_data_t *bfd = fs->fsap_data;
1404 svn_stream_t *source_stream; /* stream to read the source */
1405 svn_stream_t *target_stream; /* stream to read the target */
1406 svn_txdelta_stream_t *txdelta_stream; /* stream to read delta windows */
1407
1408 /* window-y things, and an array to track them */
1409 window_write_t *ww;
1410 apr_array_header_t *windows;
1411
1412 /* stream to write new (deltified) target data and its baton */
1413 svn_stream_t *new_target_stream;
1414 struct write_svndiff_strings_baton new_target_baton;
1415
1416 /* window handler/baton for writing to above stream */
1417 svn_txdelta_window_handler_t new_target_handler;
1418 void *new_target_handler_baton;
1419
1420 /* yes, we do windows */
1421 svn_txdelta_window_t *window;
1422
1423 /* The current offset into the fulltext that our window is about to
1424 write. This doubles, after all windows are written, as the
1425 total size of the svndiff data for the deltification process. */
1426 svn_filesize_t tview_off = 0;
1427
1428 /* The total amount of diff data written while deltifying. */
1429 svn_filesize_t diffsize = 0;
1430
1431 /* TARGET's original string keys */
1432 apr_array_header_t *orig_str_keys;
1433
1434 /* The checksums for the representation's fulltext contents. */
1435 svn_checksum_t *rep_md5_checksum;
1436 svn_checksum_t *rep_sha1_checksum;
1437
1438 /* MD5 digest */
1439 const unsigned char *digest;
1440
1441 /* pool for holding the windows */
1442 apr_pool_t *wpool;
1443
1444 /* Paranoia: never allow a rep to be deltified against itself,
1445 because then there would be no fulltext reachable in the delta
1446 chain, and badness would ensue. */
1447 if (strcmp(target, source) == 0)
1448 return svn_error_createf
1449 (SVN_ERR_FS_CORRUPT, NULL,
1450 _("Attempt to deltify '%s' against itself"),
1451 target);
1452
1453 /* Set up a handler for the svndiff data, which will write each
1454 window to its own string in the `strings' table. */
1455 new_target_baton.fs = fs;
1456 new_target_baton.trail = trail;
1457 new_target_baton.header_read = FALSE;
1458 new_target_stream = svn_stream_create(&new_target_baton, pool);
1459 svn_stream_set_write(new_target_stream, write_svndiff_strings);
1460
1461 /* Get streams to our source and target text data. */
1462 SVN_ERR(svn_fs_base__rep_contents_read_stream(&source_stream, fs, source,
1463 TRUE, trail, pool));
1464 SVN_ERR(svn_fs_base__rep_contents_read_stream(&target_stream, fs, target,
1465 TRUE, trail, pool));
1466
1467 /* Setup a stream to convert the textdelta data into svndiff windows. */
1468 svn_txdelta2(&txdelta_stream, source_stream, target_stream, TRUE, pool);
1469
1470 if (bfd->format >= SVN_FS_BASE__MIN_SVNDIFF1_FORMAT)
1471 svn_txdelta_to_svndiff3(&new_target_handler, &new_target_handler_baton,
1472 new_target_stream, 1,
1473 SVN_DELTA_COMPRESSION_LEVEL_DEFAULT, pool);
1474 else
1475 svn_txdelta_to_svndiff3(&new_target_handler, &new_target_handler_baton,
1476 new_target_stream, 0,
1477 SVN_DELTA_COMPRESSION_LEVEL_DEFAULT, pool);
1478
1479 /* subpool for the windows */
1480 wpool = svn_pool_create(pool);
1481
1482 /* Now, loop, manufacturing and dispatching windows of svndiff data. */
1483 windows = apr_array_make(pool, 1, sizeof(ww));
1484 do
1485 {
1486 /* Reset some baton variables. */
1487 new_target_baton.size = 0;
1488 new_target_baton.key = NULL;
1489
1490 /* Free the window. */
1491 svn_pool_clear(wpool);
1492
1493 /* Fetch the next window of txdelta data. */
1494 SVN_ERR(svn_txdelta_next_window(&window, txdelta_stream, wpool));
1495
1496 /* Send off this package to be written as svndiff data. */
1497 SVN_ERR(new_target_handler(window, new_target_handler_baton));
1498 if (window)
1499 {
1500 /* Add a new window description to our array. */
1501 ww = apr_pcalloc(pool, sizeof(*ww));
1502 ww->key = new_target_baton.key;
1503 ww->svndiff_len = new_target_baton.size;
1504 ww->text_off = tview_off;
1505 ww->text_len = window->tview_len;
1506 APR_ARRAY_PUSH(windows, window_write_t *) = ww;
1507
1508 /* Update our recordkeeping variables. */
1509 tview_off += window->tview_len;
1510 diffsize += ww->svndiff_len;
1511 }
1512
1513 } while (window);
1514
1515 svn_pool_destroy(wpool);
1516
1517 /* Having processed all the windows, we can query the MD5 digest
1518 from the stream. */
1519 digest = svn_txdelta_md5_digest(txdelta_stream);
1520 if (! digest)
1521 return svn_error_createf
1522 (SVN_ERR_DELTA_MD5_CHECKSUM_ABSENT, NULL,
1523 _("Failed to calculate MD5 digest for '%s'"),
1524 source);
1525
1526 /* Construct a list of the strings used by the old representation so
1527 that we can delete them later. While we are here, if the old
1528 representation was a fulltext, check to make sure the delta we're
1529 replacing it with is actually smaller. (Don't perform this check
1530 if we're replacing a delta; in that case, we're going for a time
1531 optimization, not a space optimization.) */
1532 {
1533 representation_t *old_rep;
1534 const char *str_key;
1535
1536 SVN_ERR(svn_fs_bdb__read_rep(&old_rep, fs, target, trail, pool));
1537 if (old_rep->kind == rep_kind_fulltext)
1538 {
1539 svn_filesize_t old_size = 0;
1540
1541 str_key = old_rep->contents.fulltext.string_key;
1542 SVN_ERR(svn_fs_bdb__string_size(&old_size, fs, str_key,
1543 trail, pool));
1544 orig_str_keys = apr_array_make(pool, 1, sizeof(str_key));
1545 APR_ARRAY_PUSH(orig_str_keys, const char *) = str_key;
1546
1547 /* If the new data is NOT an space optimization, destroy the
1548 string(s) we created, and get outta here. */
1549 if (diffsize >= old_size)
1550 {
1551 int i;
1552 for (i = 0; i < windows->nelts; i++)
1553 {
1554 ww = APR_ARRAY_IDX(windows, i, window_write_t *);
1555 SVN_ERR(svn_fs_bdb__string_delete(fs, ww->key, trail, pool));
1556 }
1557 return SVN_NO_ERROR;
1558 }
1559 }
1560 else if (old_rep->kind == rep_kind_delta)
1561 SVN_ERR(delta_string_keys(&orig_str_keys, old_rep, pool));
1562 else /* unknown kind */
1563 return UNKNOWN_NODE_KIND(target);
1564
1565 /* Save the checksums, since the new rep needs them. */
1566 rep_md5_checksum = svn_checksum_dup(old_rep->md5_checksum, pool);
1567 rep_sha1_checksum = svn_checksum_dup(old_rep->sha1_checksum, pool);
1568 }
1569
1570 /* Hook the new strings we wrote into the rest of the filesystem by
1571 building a new representation to replace our old one. */
1572 {
1573 representation_t new_rep;
1574 rep_delta_chunk_t *chunk;
1575 apr_array_header_t *chunks;
1576 int i;
1577
1578 new_rep.kind = rep_kind_delta;
1579 new_rep.txn_id = NULL;
1580
1581 /* Migrate the old rep's checksums to the new rep. */
1582 new_rep.md5_checksum = svn_checksum_dup(rep_md5_checksum, pool);
1583 new_rep.sha1_checksum = svn_checksum_dup(rep_sha1_checksum, pool);
1584
1585 chunks = apr_array_make(pool, windows->nelts, sizeof(chunk));
1586
1587 /* Loop through the windows we wrote, creating and adding new
1588 chunks to the representation. */
1589 for (i = 0; i < windows->nelts; i++)
1590 {
1591 ww = APR_ARRAY_IDX(windows, i, window_write_t *);
1592
1593 /* Allocate a chunk and its window */
1594 chunk = apr_palloc(pool, sizeof(*chunk));
1595 chunk->offset = ww->text_off;
1596
1597 /* Populate the window */
1598 chunk->version = new_target_baton.version;
1599 chunk->string_key = ww->key;
1600 chunk->size = ww->text_len;
1601 chunk->rep_key = source;
1602
1603 /* Add this chunk to the array. */
1604 APR_ARRAY_PUSH(chunks, rep_delta_chunk_t *) = chunk;
1605 }
1606
1607 /* Put the chunks array into the representation. */
1608 new_rep.contents.delta.chunks = chunks;
1609
1610 /* Write out the new representation. */
1611 SVN_ERR(svn_fs_bdb__write_rep(fs, target, &new_rep, trail, pool));
1612
1613 /* Delete the original pre-deltified strings. */
1614 SVN_ERR(delete_strings(orig_str_keys, fs, trail, pool));
1615 }
1616
1617 return SVN_NO_ERROR;
1618 }
1619