1 /*
2 * diff_file.c : routines for doing diffs on files
3 *
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
20 * under the License.
21 * ====================================================================
22 */
23
24
25 #include <apr.h>
26 #include <apr_pools.h>
27 #include <apr_general.h>
28 #include <apr_file_io.h>
29 #include <apr_file_info.h>
30 #include <apr_time.h>
31 #include <apr_mmap.h>
32 #include <apr_getopt.h>
33
34 #include <assert.h>
35
36 #include "svn_error.h"
37 #include "svn_diff.h"
38 #include "svn_types.h"
39 #include "svn_string.h"
40 #include "svn_subst.h"
41 #include "svn_io.h"
42 #include "svn_utf.h"
43 #include "svn_pools.h"
44 #include "diff.h"
45 #include "svn_private_config.h"
46 #include "svn_path.h"
47 #include "svn_ctype.h"
48
49 #include "private/svn_utf_private.h"
50 #include "private/svn_eol_private.h"
51 #include "private/svn_dep_compat.h"
52 #include "private/svn_adler32.h"
53 #include "private/svn_diff_private.h"
54
55 /* A token, i.e. a line read from a file. */
56 typedef struct svn_diff__file_token_t
57 {
58 /* Next token in free list. */
59 struct svn_diff__file_token_t *next;
60 svn_diff_datasource_e datasource;
61 /* Offset in the datasource. */
62 apr_off_t offset;
63 /* Offset of the normalized token (may skip leading whitespace) */
64 apr_off_t norm_offset;
65 /* Total length - before normalization. */
66 apr_off_t raw_length;
67 /* Total length - after normalization. */
68 apr_off_t length;
69 } svn_diff__file_token_t;
70
71
72 typedef struct svn_diff__file_baton_t
73 {
74 const svn_diff_file_options_t *options;
75
76 struct file_info {
77 const char *path; /* path to this file, absolute or relative to CWD */
78
79 /* All the following fields are active while this datasource is open */
80 apr_file_t *file; /* handle of this file */
81 apr_off_t size; /* total raw size in bytes of this file */
82
83 /* The current chunk: CHUNK_SIZE bytes except for the last chunk. */
84 int chunk; /* the current chunk number, zero-based */
85 char *buffer; /* a buffer containing the current chunk */
86 char *curp; /* current position in the current chunk */
87 char *endp; /* next memory address after the current chunk */
88
89 svn_diff__normalize_state_t normalize_state;
90
91 /* Where the identical suffix starts in this datasource */
92 int suffix_start_chunk;
93 apr_off_t suffix_offset_in_chunk;
94 } files[4];
95
96 /* List of free tokens that may be reused. */
97 svn_diff__file_token_t *tokens;
98
99 apr_pool_t *pool;
100 } svn_diff__file_baton_t;
101
102 static int
datasource_to_index(svn_diff_datasource_e datasource)103 datasource_to_index(svn_diff_datasource_e datasource)
104 {
105 switch (datasource)
106 {
107 case svn_diff_datasource_original:
108 return 0;
109
110 case svn_diff_datasource_modified:
111 return 1;
112
113 case svn_diff_datasource_latest:
114 return 2;
115
116 case svn_diff_datasource_ancestor:
117 return 3;
118 }
119
120 return -1;
121 }
122
123 /* Files are read in chunks of 128k. There is no support for this number
124 * whatsoever. If there is a number someone comes up with that has some
125 * argumentation, let's use that.
126 */
127 /* If you change this number, update test_norm_offset(),
128 * test_identical_suffix() and and test_token_compare() in diff-diff3-test.c.
129 */
130 #define CHUNK_SHIFT 17
131 #define CHUNK_SIZE (1 << CHUNK_SHIFT)
132
133 #define chunk_to_offset(chunk) ((chunk) << CHUNK_SHIFT)
134 #define offset_to_chunk(offset) ((offset) >> CHUNK_SHIFT)
135 #define offset_in_chunk(offset) ((offset) & (CHUNK_SIZE - 1))
136
137
138 /* Read a chunk from a FILE into BUFFER, starting from OFFSET, going for
139 * *LENGTH. The actual bytes read are stored in *LENGTH on return.
140 */
141 static APR_INLINE svn_error_t *
read_chunk(apr_file_t * file,char * buffer,apr_off_t length,apr_off_t offset,apr_pool_t * scratch_pool)142 read_chunk(apr_file_t *file,
143 char *buffer, apr_off_t length,
144 apr_off_t offset, apr_pool_t *scratch_pool)
145 {
146 /* XXX: The final offset may not be the one we asked for.
147 * XXX: Check.
148 */
149 SVN_ERR(svn_io_file_seek(file, APR_SET, &offset, scratch_pool));
150 return svn_io_file_read_full2(file, buffer, (apr_size_t) length,
151 NULL, NULL, scratch_pool);
152 }
153
154
155 /* Map or read a file at PATH. *BUFFER will point to the file
156 * contents; if the file was mapped, *FILE and *MM will contain the
157 * mmap context; otherwise they will be NULL. SIZE will contain the
158 * file size. Allocate from POOL.
159 */
160 #if APR_HAS_MMAP
161 #define MMAP_T_PARAM(NAME) apr_mmap_t **NAME,
162 #define MMAP_T_ARG(NAME) &(NAME),
163 #else
164 #define MMAP_T_PARAM(NAME)
165 #define MMAP_T_ARG(NAME)
166 #endif
167
168 static svn_error_t *
map_or_read_file(apr_file_t ** file,MMAP_T_PARAM (mm)char ** buffer,apr_size_t * size_p,const char * path,apr_pool_t * pool)169 map_or_read_file(apr_file_t **file,
170 MMAP_T_PARAM(mm)
171 char **buffer, apr_size_t *size_p,
172 const char *path, apr_pool_t *pool)
173 {
174 apr_finfo_t finfo;
175 apr_status_t rv;
176 apr_size_t size;
177
178 *buffer = NULL;
179
180 SVN_ERR(svn_io_file_open(file, path, APR_READ, APR_OS_DEFAULT, pool));
181 SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, *file, pool));
182
183 if (finfo.size > APR_SIZE_MAX)
184 {
185 return svn_error_createf(APR_ENOMEM, NULL,
186 _("File '%s' is too large to be read in "
187 "to memory"), path);
188 }
189
190 size = (apr_size_t) finfo.size;
191 #if APR_HAS_MMAP
192 if (size > APR_MMAP_THRESHOLD)
193 {
194 rv = apr_mmap_create(mm, *file, 0, size, APR_MMAP_READ, pool);
195 if (rv == APR_SUCCESS)
196 {
197 *buffer = (*mm)->mm;
198 }
199 else
200 {
201 /* Clear *MM because output parameters are undefined on error. */
202 *mm = NULL;
203 }
204
205 /* On failure we just fall through and try reading the file into
206 * memory instead.
207 */
208 }
209 #endif /* APR_HAS_MMAP */
210
211 if (*buffer == NULL && size > 0)
212 {
213 *buffer = apr_palloc(pool, size);
214
215 SVN_ERR(svn_io_file_read_full2(*file, *buffer, size, NULL, NULL, pool));
216
217 /* Since we have the entire contents of the file we can
218 * close it now.
219 */
220 SVN_ERR(svn_io_file_close(*file, pool));
221
222 *file = NULL;
223 }
224
225 *size_p = size;
226
227 return SVN_NO_ERROR;
228 }
229
230
231 /* For all files in the FILE array, increment the curp pointer. If a file
232 * points before the beginning of file, let it point at the first byte again.
233 * If the end of the current chunk is reached, read the next chunk in the
234 * buffer and point curp to the start of the chunk. If EOF is reached, set
235 * curp equal to endp to indicate EOF. */
236 #define INCREMENT_POINTERS(all_files, files_len, pool) \
237 do { \
238 apr_size_t svn_macro__i; \
239 \
240 for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++) \
241 { \
242 if ((all_files)[svn_macro__i].curp < (all_files)[svn_macro__i].endp - 1)\
243 (all_files)[svn_macro__i].curp++; \
244 else \
245 SVN_ERR(increment_chunk(&(all_files)[svn_macro__i], (pool))); \
246 } \
247 } while (0)
248
249
250 /* For all files in the FILE array, decrement the curp pointer. If the
251 * start of a chunk is reached, read the previous chunk in the buffer and
252 * point curp to the last byte of the chunk. If the beginning of a FILE is
253 * reached, set chunk to -1 to indicate BOF. */
254 #define DECREMENT_POINTERS(all_files, files_len, pool) \
255 do { \
256 apr_size_t svn_macro__i; \
257 \
258 for (svn_macro__i = 0; svn_macro__i < (files_len); svn_macro__i++) \
259 { \
260 if ((all_files)[svn_macro__i].curp > (all_files)[svn_macro__i].buffer) \
261 (all_files)[svn_macro__i].curp--; \
262 else \
263 SVN_ERR(decrement_chunk(&(all_files)[svn_macro__i], (pool))); \
264 } \
265 } while (0)
266
267
268 static svn_error_t *
increment_chunk(struct file_info * file,apr_pool_t * pool)269 increment_chunk(struct file_info *file, apr_pool_t *pool)
270 {
271 apr_off_t length;
272 apr_off_t last_chunk = offset_to_chunk(file->size);
273
274 if (file->chunk == -1)
275 {
276 /* We are at BOF (Beginning Of File). Point to first chunk/byte again. */
277 file->chunk = 0;
278 file->curp = file->buffer;
279 }
280 else if (file->chunk == last_chunk)
281 {
282 /* We are at the last chunk. Indicate EOF by setting curp == endp. */
283 file->curp = file->endp;
284 }
285 else
286 {
287 /* There are still chunks left. Read next chunk and reset pointers. */
288 file->chunk++;
289 length = file->chunk == last_chunk ?
290 offset_in_chunk(file->size) : CHUNK_SIZE;
291 SVN_ERR(read_chunk(file->file, file->buffer,
292 length, chunk_to_offset(file->chunk),
293 pool));
294 file->endp = file->buffer + length;
295 file->curp = file->buffer;
296 }
297
298 return SVN_NO_ERROR;
299 }
300
301
302 static svn_error_t *
decrement_chunk(struct file_info * file,apr_pool_t * pool)303 decrement_chunk(struct file_info *file, apr_pool_t *pool)
304 {
305 if (file->chunk == 0)
306 {
307 /* We are already at the first chunk. Indicate BOF (Beginning Of File)
308 by setting chunk = -1 and curp = endp - 1. Both conditions are
309 important. They help the increment step to catch the BOF situation
310 in an efficient way. */
311 file->chunk--;
312 file->curp = file->endp - 1;
313 }
314 else
315 {
316 /* Read previous chunk and reset pointers. */
317 file->chunk--;
318 SVN_ERR(read_chunk(file->file, file->buffer,
319 CHUNK_SIZE, chunk_to_offset(file->chunk),
320 pool));
321 file->endp = file->buffer + CHUNK_SIZE;
322 file->curp = file->endp - 1;
323 }
324
325 return SVN_NO_ERROR;
326 }
327
328
329 /* Check whether one of the FILEs has its pointers 'before' the beginning of
330 * the file (this can happen while scanning backwards). This is the case if
331 * one of them has chunk == -1. */
332 static svn_boolean_t
is_one_at_bof(struct file_info file[],apr_size_t file_len)333 is_one_at_bof(struct file_info file[], apr_size_t file_len)
334 {
335 apr_size_t i;
336
337 for (i = 0; i < file_len; i++)
338 if (file[i].chunk == -1)
339 return TRUE;
340
341 return FALSE;
342 }
343
344 /* Check whether one of the FILEs has its pointers at EOF (this is the case if
345 * one of them has curp == endp (this can only happen at the last chunk)) */
346 static svn_boolean_t
is_one_at_eof(struct file_info file[],apr_size_t file_len)347 is_one_at_eof(struct file_info file[], apr_size_t file_len)
348 {
349 apr_size_t i;
350
351 for (i = 0; i < file_len; i++)
352 if (file[i].curp == file[i].endp)
353 return TRUE;
354
355 return FALSE;
356 }
357
358 /* Quickly determine whether there is a eol char in CHUNK.
359 * (mainly copy-n-paste from eol.c#svn_eol__find_eol_start).
360 */
361
362 #if SVN_UNALIGNED_ACCESS_IS_OK
contains_eol(apr_uintptr_t chunk)363 static svn_boolean_t contains_eol(apr_uintptr_t chunk)
364 {
365 apr_uintptr_t r_test = chunk ^ SVN__R_MASK;
366 apr_uintptr_t n_test = chunk ^ SVN__N_MASK;
367
368 r_test |= (r_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
369 n_test |= (n_test & SVN__LOWER_7BITS_SET) + SVN__LOWER_7BITS_SET;
370
371 return (r_test & n_test & SVN__BIT_7_SET) != SVN__BIT_7_SET;
372 }
373 #endif
374
375 /* Find the prefix which is identical between all elements of the FILE array.
376 * Return the number of prefix lines in PREFIX_LINES. REACHED_ONE_EOF will be
377 * set to TRUE if one of the FILEs reached its end while scanning prefix,
378 * i.e. at least one file consisted entirely of prefix. Otherwise,
379 * REACHED_ONE_EOF is set to FALSE.
380 *
381 * After this function is finished, the buffers, chunks, curp's and endp's
382 * of the FILEs are set to point at the first byte after the prefix. */
383 static svn_error_t *
find_identical_prefix(svn_boolean_t * reached_one_eof,apr_off_t * prefix_lines,struct file_info file[],apr_size_t file_len,apr_pool_t * pool)384 find_identical_prefix(svn_boolean_t *reached_one_eof, apr_off_t *prefix_lines,
385 struct file_info file[], apr_size_t file_len,
386 apr_pool_t *pool)
387 {
388 svn_boolean_t had_cr = FALSE;
389 svn_boolean_t is_match;
390 apr_off_t lines = 0;
391 apr_size_t i;
392
393 *reached_one_eof = FALSE;
394
395 for (i = 1, is_match = TRUE; i < file_len; i++)
396 is_match = is_match && *file[0].curp == *file[i].curp;
397 while (is_match)
398 {
399 #if SVN_UNALIGNED_ACCESS_IS_OK
400 apr_ssize_t max_delta, delta;
401 #endif /* SVN_UNALIGNED_ACCESS_IS_OK */
402
403 /* ### TODO: see if we can take advantage of
404 diff options like ignore_eol_style or ignore_space. */
405 /* check for eol, and count */
406 if (*file[0].curp == '\r')
407 {
408 lines++;
409 had_cr = TRUE;
410 }
411 else if (*file[0].curp == '\n' && !had_cr)
412 {
413 lines++;
414 }
415 else
416 {
417 had_cr = FALSE;
418 }
419
420 INCREMENT_POINTERS(file, file_len, pool);
421
422 #if SVN_UNALIGNED_ACCESS_IS_OK
423
424 /* Try to advance as far as possible with machine-word granularity.
425 * Determine how far we may advance with chunky ops without reaching
426 * endp for any of the files.
427 * Signedness is important here if curp gets close to endp.
428 */
429 max_delta = file[0].endp - file[0].curp - sizeof(apr_uintptr_t);
430 for (i = 1; i < file_len; i++)
431 {
432 delta = file[i].endp - file[i].curp - sizeof(apr_uintptr_t);
433 if (delta < max_delta)
434 max_delta = delta;
435 }
436
437 is_match = TRUE;
438 for (delta = 0; delta < max_delta; delta += sizeof(apr_uintptr_t))
439 {
440 apr_uintptr_t chunk = *(const apr_uintptr_t *)(file[0].curp + delta);
441 if (contains_eol(chunk))
442 break;
443
444 for (i = 1; i < file_len; i++)
445 if (chunk != *(const apr_uintptr_t *)(file[i].curp + delta))
446 {
447 is_match = FALSE;
448 break;
449 }
450
451 if (! is_match)
452 break;
453 }
454
455 if (delta /* > 0*/)
456 {
457 /* We either found a mismatch or an EOL at or shortly behind curp+delta
458 * or we cannot proceed with chunky ops without exceeding endp.
459 * In any way, everything up to curp + delta is equal and not an EOL.
460 */
461 for (i = 0; i < file_len; i++)
462 file[i].curp += delta;
463
464 /* Skipped data without EOL markers, so last char was not a CR. */
465 had_cr = FALSE;
466 }
467 #endif
468
469 *reached_one_eof = is_one_at_eof(file, file_len);
470 if (*reached_one_eof)
471 break;
472 else
473 for (i = 1, is_match = TRUE; i < file_len; i++)
474 is_match = is_match && *file[0].curp == *file[i].curp;
475 }
476
477 if (had_cr)
478 {
479 /* Check if we ended in the middle of a \r\n for one file, but \r for
480 another. If so, back up one byte, so the next loop will back up
481 the entire line. Also decrement lines, since we counted one
482 too many for the \r. */
483 svn_boolean_t ended_at_nonmatching_newline = FALSE;
484 for (i = 0; i < file_len; i++)
485 if (file[i].curp < file[i].endp)
486 ended_at_nonmatching_newline = ended_at_nonmatching_newline
487 || *file[i].curp == '\n';
488 if (ended_at_nonmatching_newline)
489 {
490 lines--;
491 DECREMENT_POINTERS(file, file_len, pool);
492 }
493 }
494
495 /* Back up one byte, so we point at the last identical byte */
496 DECREMENT_POINTERS(file, file_len, pool);
497
498 /* Back up to the last eol sequence (\n, \r\n or \r) */
499 while (!is_one_at_bof(file, file_len) &&
500 *file[0].curp != '\n' && *file[0].curp != '\r')
501 DECREMENT_POINTERS(file, file_len, pool);
502
503 /* Slide one byte forward, to point past the eol sequence */
504 INCREMENT_POINTERS(file, file_len, pool);
505
506 *prefix_lines = lines;
507
508 return SVN_NO_ERROR;
509 }
510
511
512 /* The number of identical suffix lines to keep with the middle section. These
513 * lines are not eliminated as suffix, and can be picked up by the token
514 * parsing and lcs steps. This is mainly for backward compatibility with
515 * the previous diff (and blame) output (if there are multiple diff solutions,
516 * our lcs algorithm prefers taking common lines from the start, rather than
517 * from the end. By giving it back some suffix lines, we give it some wiggle
518 * room to find the exact same diff as before).
519 *
520 * The number 50 is more or less arbitrary, based on some real-world tests
521 * with big files (and then doubling the required number to be on the safe
522 * side). This has a negligible effect on the power of the optimization. */
523 /* If you change this number, update test_identical_suffix() in diff-diff3-test.c */
524 #ifndef SUFFIX_LINES_TO_KEEP
525 #define SUFFIX_LINES_TO_KEEP 50
526 #endif
527
528 /* Find the suffix which is identical between all elements of the FILE array.
529 * Return the number of suffix lines in SUFFIX_LINES.
530 *
531 * Before this function is called the FILEs' pointers and chunks should be
532 * positioned right after the identical prefix (which is the case after
533 * find_identical_prefix), so we can determine where suffix scanning should
534 * ultimately stop. */
535 static svn_error_t *
find_identical_suffix(apr_off_t * suffix_lines,struct file_info file[],apr_size_t file_len,apr_pool_t * pool)536 find_identical_suffix(apr_off_t *suffix_lines, struct file_info file[],
537 apr_size_t file_len, apr_pool_t *pool)
538 {
539 struct file_info file_for_suffix[4] = { { 0 } };
540 apr_off_t length[4];
541 apr_off_t suffix_min_chunk0;
542 apr_off_t suffix_min_offset0;
543 apr_off_t min_file_size;
544 int suffix_lines_to_keep = SUFFIX_LINES_TO_KEEP;
545 svn_boolean_t is_match;
546 apr_off_t lines = 0;
547 svn_boolean_t had_nl;
548 apr_size_t i;
549
550 /* Initialize file_for_suffix[].
551 Read last chunk, position curp at last byte. */
552 for (i = 0; i < file_len; i++)
553 {
554 file_for_suffix[i].path = file[i].path;
555 file_for_suffix[i].file = file[i].file;
556 file_for_suffix[i].size = file[i].size;
557 file_for_suffix[i].chunk =
558 (int) offset_to_chunk(file_for_suffix[i].size); /* last chunk */
559 length[i] = offset_in_chunk(file_for_suffix[i].size);
560 if (length[i] == 0)
561 {
562 /* last chunk is an empty chunk -> start at next-to-last chunk */
563 file_for_suffix[i].chunk = file_for_suffix[i].chunk - 1;
564 length[i] = CHUNK_SIZE;
565 }
566
567 if (file_for_suffix[i].chunk == file[i].chunk)
568 {
569 /* Prefix ended in last chunk, so we can reuse the prefix buffer */
570 file_for_suffix[i].buffer = file[i].buffer;
571 }
572 else
573 {
574 /* There is at least more than 1 chunk,
575 so allocate full chunk size buffer */
576 file_for_suffix[i].buffer = apr_palloc(pool, CHUNK_SIZE);
577 SVN_ERR(read_chunk(file_for_suffix[i].file,
578 file_for_suffix[i].buffer, length[i],
579 chunk_to_offset(file_for_suffix[i].chunk),
580 pool));
581 }
582 file_for_suffix[i].endp = file_for_suffix[i].buffer + length[i];
583 file_for_suffix[i].curp = file_for_suffix[i].endp - 1;
584 }
585
586 /* Get the chunk and pointer offset (for file[0]) at which we should stop
587 scanning backward for the identical suffix, i.e. when we reach prefix. */
588 suffix_min_chunk0 = file[0].chunk;
589 suffix_min_offset0 = file[0].curp - file[0].buffer;
590
591 /* Compensate if other files are smaller than file[0] */
592 for (i = 1, min_file_size = file[0].size; i < file_len; i++)
593 if (file[i].size < min_file_size)
594 min_file_size = file[i].size;
595 if (file[0].size > min_file_size)
596 {
597 suffix_min_chunk0 += (file[0].size - min_file_size) / CHUNK_SIZE;
598 suffix_min_offset0 += (file[0].size - min_file_size) % CHUNK_SIZE;
599 }
600
601 /* Scan backwards until mismatch or until we reach the prefix. */
602 for (i = 1, is_match = TRUE; i < file_len; i++)
603 is_match = is_match
604 && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
605 if (is_match && *file_for_suffix[0].curp != '\r'
606 && *file_for_suffix[0].curp != '\n')
607 /* Count an extra line for the last line not ending in an eol. */
608 lines++;
609
610 had_nl = FALSE;
611 while (is_match)
612 {
613 svn_boolean_t reached_prefix;
614 #if SVN_UNALIGNED_ACCESS_IS_OK
615 /* Initialize the minimum pointer positions. */
616 const char *min_curp[4];
617 svn_boolean_t can_read_word;
618 #endif /* SVN_UNALIGNED_ACCESS_IS_OK */
619
620 /* ### TODO: see if we can take advantage of
621 diff options like ignore_eol_style or ignore_space. */
622 /* check for eol, and count */
623 if (*file_for_suffix[0].curp == '\n')
624 {
625 lines++;
626 had_nl = TRUE;
627 }
628 else if (*file_for_suffix[0].curp == '\r' && !had_nl)
629 {
630 lines++;
631 }
632 else
633 {
634 had_nl = FALSE;
635 }
636
637 DECREMENT_POINTERS(file_for_suffix, file_len, pool);
638
639 #if SVN_UNALIGNED_ACCESS_IS_OK
640 for (i = 0; i < file_len; i++)
641 min_curp[i] = file_for_suffix[i].buffer;
642
643 /* If we are in the same chunk that contains the last part of the common
644 prefix, use the min_curp[0] pointer to make sure we don't get a
645 suffix that overlaps the already determined common prefix. */
646 if (file_for_suffix[0].chunk == suffix_min_chunk0)
647 min_curp[0] += suffix_min_offset0;
648
649 /* Scan quickly by reading with machine-word granularity. */
650 for (i = 0, can_read_word = TRUE; can_read_word && i < file_len; i++)
651 can_read_word = ((file_for_suffix[i].curp + 1 - sizeof(apr_uintptr_t))
652 > min_curp[i]);
653
654 while (can_read_word)
655 {
656 apr_uintptr_t chunk;
657
658 /* For each file curp is positioned at the current byte, but we
659 want to examine the current byte and the ones before the current
660 location as one machine word. */
661
662 chunk = *(const apr_uintptr_t *)(file_for_suffix[0].curp + 1
663 - sizeof(apr_uintptr_t));
664 if (contains_eol(chunk))
665 break;
666
667 for (i = 1, is_match = TRUE; is_match && i < file_len; i++)
668 is_match = (chunk
669 == *(const apr_uintptr_t *)
670 (file_for_suffix[i].curp + 1
671 - sizeof(apr_uintptr_t)));
672
673 if (! is_match)
674 break;
675
676 for (i = 0; i < file_len; i++)
677 {
678 file_for_suffix[i].curp -= sizeof(apr_uintptr_t);
679 can_read_word = can_read_word
680 && ( (file_for_suffix[i].curp + 1
681 - sizeof(apr_uintptr_t))
682 > min_curp[i]);
683 }
684
685 /* We skipped some bytes, so there are no closing EOLs */
686 had_nl = FALSE;
687 }
688
689 /* The > min_curp[i] check leaves at least one final byte for checking
690 in the non block optimized case below. */
691 #endif
692
693 reached_prefix = file_for_suffix[0].chunk == suffix_min_chunk0
694 && (file_for_suffix[0].curp - file_for_suffix[0].buffer)
695 == suffix_min_offset0;
696 if (reached_prefix || is_one_at_bof(file_for_suffix, file_len))
697 break;
698
699 is_match = TRUE;
700 for (i = 1; i < file_len; i++)
701 is_match = is_match
702 && *file_for_suffix[0].curp == *file_for_suffix[i].curp;
703 }
704
705 /* Slide one byte forward, to point at the first byte of identical suffix */
706 INCREMENT_POINTERS(file_for_suffix, file_len, pool);
707
708 /* Slide forward until we find an eol sequence to add the rest of the line
709 we're in. Then add SUFFIX_LINES_TO_KEEP more lines. Stop if at least
710 one file reaches its end. */
711 do
712 {
713 svn_boolean_t had_cr = FALSE;
714 while (!is_one_at_eof(file_for_suffix, file_len)
715 && *file_for_suffix[0].curp != '\n'
716 && *file_for_suffix[0].curp != '\r')
717 INCREMENT_POINTERS(file_for_suffix, file_len, pool);
718
719 /* Slide one or two more bytes, to point past the eol. */
720 if (!is_one_at_eof(file_for_suffix, file_len)
721 && *file_for_suffix[0].curp == '\r')
722 {
723 lines--;
724 had_cr = TRUE;
725 INCREMENT_POINTERS(file_for_suffix, file_len, pool);
726 }
727 if (!is_one_at_eof(file_for_suffix, file_len)
728 && *file_for_suffix[0].curp == '\n')
729 {
730 if (!had_cr)
731 lines--;
732 INCREMENT_POINTERS(file_for_suffix, file_len, pool);
733 }
734 }
735 while (!is_one_at_eof(file_for_suffix, file_len)
736 && suffix_lines_to_keep--);
737
738 if (is_one_at_eof(file_for_suffix, file_len))
739 lines = 0;
740
741 /* Save the final suffix information in the original file_info */
742 for (i = 0; i < file_len; i++)
743 {
744 file[i].suffix_start_chunk = file_for_suffix[i].chunk;
745 file[i].suffix_offset_in_chunk =
746 file_for_suffix[i].curp - file_for_suffix[i].buffer;
747 }
748
749 *suffix_lines = lines;
750
751 return SVN_NO_ERROR;
752 }
753
754
755 /* Let FILE stand for the array of file_info struct elements of BATON->files
756 * that are indexed by the elements of the DATASOURCE array.
757 * BATON's type is (svn_diff__file_baton_t *).
758 *
759 * For each file in the FILE array, open the file at FILE.path; initialize
760 * FILE.file, FILE.size, FILE.buffer, FILE.curp and FILE.endp; allocate a
761 * buffer and read the first chunk. Then find the prefix and suffix lines
762 * which are identical between all the files. Return the number of identical
763 * prefix lines in PREFIX_LINES, and the number of identical suffix lines in
764 * SUFFIX_LINES.
765 *
766 * Finding the identical prefix and suffix allows us to exclude those from the
767 * rest of the diff algorithm, which increases performance by reducing the
768 * problem space.
769 *
770 * Implements svn_diff_fns2_t::datasources_open. */
771 static svn_error_t *
datasources_open(void * baton,apr_off_t * prefix_lines,apr_off_t * suffix_lines,const svn_diff_datasource_e * datasources,apr_size_t datasources_len)772 datasources_open(void *baton,
773 apr_off_t *prefix_lines,
774 apr_off_t *suffix_lines,
775 const svn_diff_datasource_e *datasources,
776 apr_size_t datasources_len)
777 {
778 svn_diff__file_baton_t *file_baton = baton;
779 struct file_info files[4];
780 apr_off_t length[4];
781 #ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING
782 svn_boolean_t reached_one_eof;
783 #endif
784 apr_size_t i;
785
786 /* Make sure prefix_lines and suffix_lines are set correctly, even if we
787 * exit early because one of the files is empty. */
788 *prefix_lines = 0;
789 *suffix_lines = 0;
790
791 /* Open datasources and read first chunk */
792 for (i = 0; i < datasources_len; i++)
793 {
794 svn_filesize_t filesize;
795 struct file_info *file
796 = &file_baton->files[datasource_to_index(datasources[i])];
797 SVN_ERR(svn_io_file_open(&file->file, file->path,
798 APR_READ, APR_OS_DEFAULT, file_baton->pool));
799 SVN_ERR(svn_io_file_size_get(&filesize, file->file, file_baton->pool));
800 file->size = filesize;
801 length[i] = filesize > CHUNK_SIZE ? CHUNK_SIZE : filesize;
802 file->buffer = apr_palloc(file_baton->pool, (apr_size_t) length[i]);
803 SVN_ERR(read_chunk(file->file, file->buffer,
804 length[i], 0, file_baton->pool));
805 file->endp = file->buffer + length[i];
806 file->curp = file->buffer;
807 /* Set suffix_start_chunk to a guard value, so if suffix scanning is
808 * skipped because one of the files is empty, or because of
809 * reached_one_eof, we can still easily check for the suffix during
810 * token reading (datasource_get_next_token). */
811 file->suffix_start_chunk = -1;
812
813 files[i] = *file;
814 }
815
816 for (i = 0; i < datasources_len; i++)
817 if (length[i] == 0)
818 /* There will not be any identical prefix/suffix, so we're done. */
819 return SVN_NO_ERROR;
820
821 #ifndef SVN_DISABLE_PREFIX_SUFFIX_SCANNING
822
823 SVN_ERR(find_identical_prefix(&reached_one_eof, prefix_lines,
824 files, datasources_len, file_baton->pool));
825
826 if (!reached_one_eof)
827 /* No file consisted totally of identical prefix,
828 * so there may be some identical suffix. */
829 SVN_ERR(find_identical_suffix(suffix_lines, files, datasources_len,
830 file_baton->pool));
831
832 #endif
833
834 /* Copy local results back to baton. */
835 for (i = 0; i < datasources_len; i++)
836 file_baton->files[datasource_to_index(datasources[i])] = files[i];
837
838 return SVN_NO_ERROR;
839 }
840
841
842 /* Implements svn_diff_fns2_t::datasource_close */
843 static svn_error_t *
datasource_close(void * baton,svn_diff_datasource_e datasource)844 datasource_close(void *baton, svn_diff_datasource_e datasource)
845 {
846 /* Do nothing. The compare_token function needs previous datasources
847 * to stay available until all datasources are processed.
848 */
849
850 return SVN_NO_ERROR;
851 }
852
853 /* Implements svn_diff_fns2_t::datasource_get_next_token */
854 static svn_error_t *
datasource_get_next_token(apr_uint32_t * hash,void ** token,void * baton,svn_diff_datasource_e datasource)855 datasource_get_next_token(apr_uint32_t *hash, void **token, void *baton,
856 svn_diff_datasource_e datasource)
857 {
858 svn_diff__file_baton_t *file_baton = baton;
859 svn_diff__file_token_t *file_token;
860 struct file_info *file = &file_baton->files[datasource_to_index(datasource)];
861 char *endp;
862 char *curp;
863 char *eol;
864 apr_off_t last_chunk;
865 apr_off_t length;
866 apr_uint32_t h = 0;
867 /* Did the last chunk end in a CR character? */
868 svn_boolean_t had_cr = FALSE;
869
870 *token = NULL;
871
872 curp = file->curp;
873 endp = file->endp;
874
875 last_chunk = offset_to_chunk(file->size);
876
877 /* Are we already at the end of a chunk? */
878 if (curp == endp)
879 {
880 /* Are we at EOF */
881 if (last_chunk == file->chunk)
882 return SVN_NO_ERROR; /* EOF */
883
884 /* Or right before an identical suffix in the next chunk? */
885 if (file->chunk + 1 == file->suffix_start_chunk
886 && file->suffix_offset_in_chunk == 0)
887 return SVN_NO_ERROR;
888 }
889
890 /* Stop when we encounter the identical suffix. If suffix scanning was not
891 * performed, suffix_start_chunk will be -1, so this condition will never
892 * be true. */
893 if (file->chunk == file->suffix_start_chunk
894 && (curp - file->buffer) == file->suffix_offset_in_chunk)
895 return SVN_NO_ERROR;
896
897 /* Allocate a new token, or fetch one from the "reusable tokens" list. */
898 file_token = file_baton->tokens;
899 if (file_token)
900 {
901 file_baton->tokens = file_token->next;
902 }
903 else
904 {
905 file_token = apr_palloc(file_baton->pool, sizeof(*file_token));
906 }
907
908 file_token->datasource = datasource;
909 file_token->offset = chunk_to_offset(file->chunk)
910 + (curp - file->buffer);
911 file_token->norm_offset = file_token->offset;
912 file_token->raw_length = 0;
913 file_token->length = 0;
914
915 while (1)
916 {
917 eol = svn_eol__find_eol_start(curp, endp - curp);
918 if (eol)
919 {
920 had_cr = (*eol == '\r');
921 eol++;
922 /* If we have the whole eol sequence in the chunk... */
923 if (!(had_cr && eol == endp))
924 {
925 /* Also skip past the '\n' in an '\r\n' sequence. */
926 if (had_cr && *eol == '\n')
927 eol++;
928 break;
929 }
930 }
931
932 if (file->chunk == last_chunk)
933 {
934 eol = endp;
935 break;
936 }
937
938 length = endp - curp;
939 file_token->raw_length += length;
940 {
941 char *c = curp;
942
943 svn_diff__normalize_buffer(&c, &length,
944 &file->normalize_state,
945 curp, file_baton->options);
946 if (file_token->length == 0)
947 {
948 /* When we are reading the first part of the token, move the
949 normalized offset past leading ignored characters, if any. */
950 file_token->norm_offset += (c - curp);
951 }
952 file_token->length += length;
953 h = svn__adler32(h, c, length);
954 }
955
956 curp = endp = file->buffer;
957 file->chunk++;
958 length = file->chunk == last_chunk ?
959 offset_in_chunk(file->size) : CHUNK_SIZE;
960 endp += length;
961 file->endp = endp;
962
963 /* Issue #4283: Normally we should have checked for reaching the skipped
964 suffix here, but because we assume that a suffix always starts on a
965 line and token boundary we rely on catching the suffix earlier in this
966 function.
967
968 When changing things here, make sure the whitespace settings are
969 applied, or we might not reach the exact suffix boundary as token
970 boundary. */
971 SVN_ERR(read_chunk(file->file,
972 curp, length,
973 chunk_to_offset(file->chunk),
974 file_baton->pool));
975
976 /* If the last chunk ended in a CR, we're done. */
977 if (had_cr)
978 {
979 eol = curp;
980 if (*curp == '\n')
981 ++eol;
982 break;
983 }
984 }
985
986 length = eol - curp;
987 file_token->raw_length += length;
988 file->curp = eol;
989
990 /* If the file length is exactly a multiple of CHUNK_SIZE, we will end up
991 * with a spurious empty token. Avoid returning it.
992 * Note that we use the unnormalized length; we don't want a line containing
993 * only spaces (and no trailing newline) to appear like a non-existent
994 * line. */
995 if (file_token->raw_length > 0)
996 {
997 char *c = curp;
998 svn_diff__normalize_buffer(&c, &length,
999 &file->normalize_state,
1000 curp, file_baton->options);
1001 if (file_token->length == 0)
1002 {
1003 /* When we are reading the first part of the token, move the
1004 normalized offset past leading ignored characters, if any. */
1005 file_token->norm_offset += (c - curp);
1006 }
1007
1008 file_token->length += length;
1009
1010 *hash = svn__adler32(h, c, length);
1011 *token = file_token;
1012 }
1013
1014 return SVN_NO_ERROR;
1015 }
1016
1017 #define COMPARE_CHUNK_SIZE 4096
1018
1019 /* Implements svn_diff_fns2_t::token_compare */
1020 static svn_error_t *
token_compare(void * baton,void * token1,void * token2,int * compare)1021 token_compare(void *baton, void *token1, void *token2, int *compare)
1022 {
1023 svn_diff__file_baton_t *file_baton = baton;
1024 svn_diff__file_token_t *file_token[2];
1025 char buffer[2][COMPARE_CHUNK_SIZE];
1026 char *bufp[2];
1027 apr_off_t offset[2];
1028 struct file_info *file[2];
1029 apr_off_t length[2];
1030 apr_off_t total_length;
1031 /* How much is left to read of each token from the file. */
1032 apr_off_t raw_length[2];
1033 int i;
1034 svn_diff__normalize_state_t state[2];
1035
1036 file_token[0] = token1;
1037 file_token[1] = token2;
1038 if (file_token[0]->length < file_token[1]->length)
1039 {
1040 *compare = -1;
1041 return SVN_NO_ERROR;
1042 }
1043
1044 if (file_token[0]->length > file_token[1]->length)
1045 {
1046 *compare = 1;
1047 return SVN_NO_ERROR;
1048 }
1049
1050 total_length = file_token[0]->length;
1051 if (total_length == 0)
1052 {
1053 *compare = 0;
1054 return SVN_NO_ERROR;
1055 }
1056
1057 for (i = 0; i < 2; ++i)
1058 {
1059 int idx = datasource_to_index(file_token[i]->datasource);
1060
1061 file[i] = &file_baton->files[idx];
1062 offset[i] = file_token[i]->norm_offset;
1063 state[i] = svn_diff__normalize_state_normal;
1064
1065 if (offset_to_chunk(offset[i]) == file[i]->chunk)
1066 {
1067 /* If the start of the token is in memory, the entire token is
1068 * in memory.
1069 */
1070 bufp[i] = file[i]->buffer;
1071 bufp[i] += offset_in_chunk(offset[i]);
1072
1073 length[i] = total_length;
1074 raw_length[i] = 0;
1075 }
1076 else
1077 {
1078 apr_off_t skipped;
1079
1080 length[i] = 0;
1081
1082 /* When we skipped the first part of the token via the whitespace
1083 normalization we must reduce the raw length of the token */
1084 skipped = (file_token[i]->norm_offset - file_token[i]->offset);
1085
1086 raw_length[i] = file_token[i]->raw_length - skipped;
1087 }
1088 }
1089
1090 do
1091 {
1092 apr_off_t len;
1093 for (i = 0; i < 2; i++)
1094 {
1095 if (length[i] == 0)
1096 {
1097 /* Error if raw_length is 0, that's an unexpected change
1098 * of the file that can happen when ingoring whitespace
1099 * and that can lead to an infinite loop. */
1100 if (raw_length[i] == 0)
1101 return svn_error_createf(SVN_ERR_DIFF_DATASOURCE_MODIFIED,
1102 NULL,
1103 _("The file '%s' changed unexpectedly"
1104 " during diff"),
1105 file[i]->path);
1106
1107 /* Read a chunk from disk into a buffer */
1108 bufp[i] = buffer[i];
1109 length[i] = raw_length[i] > COMPARE_CHUNK_SIZE ?
1110 COMPARE_CHUNK_SIZE : raw_length[i];
1111
1112 SVN_ERR(read_chunk(file[i]->file,
1113 bufp[i], length[i], offset[i],
1114 file_baton->pool));
1115 offset[i] += length[i];
1116 raw_length[i] -= length[i];
1117 /* bufp[i] gets reset to buffer[i] before reading each chunk,
1118 so, overwriting it isn't a problem */
1119 svn_diff__normalize_buffer(&bufp[i], &length[i], &state[i],
1120 bufp[i], file_baton->options);
1121
1122 /* assert(length[i] == file_token[i]->length); */
1123 }
1124 }
1125
1126 len = length[0] > length[1] ? length[1] : length[0];
1127
1128 /* Compare two chunks (that could be entire tokens if they both reside
1129 * in memory).
1130 */
1131 *compare = memcmp(bufp[0], bufp[1], (size_t) len);
1132 if (*compare != 0)
1133 return SVN_NO_ERROR;
1134
1135 total_length -= len;
1136 length[0] -= len;
1137 length[1] -= len;
1138 bufp[0] += len;
1139 bufp[1] += len;
1140 }
1141 while(total_length > 0);
1142
1143 *compare = 0;
1144 return SVN_NO_ERROR;
1145 }
1146
1147
1148 /* Implements svn_diff_fns2_t::token_discard */
1149 static void
token_discard(void * baton,void * token)1150 token_discard(void *baton, void *token)
1151 {
1152 svn_diff__file_baton_t *file_baton = baton;
1153 svn_diff__file_token_t *file_token = token;
1154
1155 /* Prepend FILE_TOKEN to FILE_BATON->TOKENS, for reuse. */
1156 file_token->next = file_baton->tokens;
1157 file_baton->tokens = file_token;
1158 }
1159
1160
1161 /* Implements svn_diff_fns2_t::token_discard_all */
1162 static void
token_discard_all(void * baton)1163 token_discard_all(void *baton)
1164 {
1165 svn_diff__file_baton_t *file_baton = baton;
1166
1167 /* Discard all memory in use by the tokens, and close all open files. */
1168 svn_pool_clear(file_baton->pool);
1169 }
1170
1171
1172 static const svn_diff_fns2_t svn_diff__file_vtable =
1173 {
1174 datasources_open,
1175 datasource_close,
1176 datasource_get_next_token,
1177 token_compare,
1178 token_discard,
1179 token_discard_all
1180 };
1181
1182 /* Id for the --ignore-eol-style option, which doesn't have a short name. */
1183 #define SVN_DIFF__OPT_IGNORE_EOL_STYLE 256
1184
1185 /* Options supported by svn_diff_file_options_parse(). */
1186 static const apr_getopt_option_t diff_options[] =
1187 {
1188 { "ignore-space-change", 'b', 0, NULL },
1189 { "ignore-all-space", 'w', 0, NULL },
1190 { "ignore-eol-style", SVN_DIFF__OPT_IGNORE_EOL_STYLE, 0, NULL },
1191 { "show-c-function", 'p', 0, NULL },
1192 /* ### For compatibility; we don't support the argument to -u, because
1193 * ### we don't have optional argument support. */
1194 { "unified", 'u', 0, NULL },
1195 { "context", 'U', 1, NULL },
1196 { NULL, 0, 0, NULL }
1197 };
1198
1199 svn_diff_file_options_t *
svn_diff_file_options_create(apr_pool_t * pool)1200 svn_diff_file_options_create(apr_pool_t *pool)
1201 {
1202 svn_diff_file_options_t * opts = apr_pcalloc(pool, sizeof(*opts));
1203
1204 opts->context_size = SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1205
1206 return opts;
1207 }
1208
1209 /* A baton for use with opt_parsing_error_func(). */
1210 struct opt_parsing_error_baton_t
1211 {
1212 svn_error_t *err;
1213 apr_pool_t *pool;
1214 };
1215
1216 /* Store an error message from apr_getopt_long(). Set BATON->err to a new
1217 * error with a message generated from FMT and the remaining arguments.
1218 * Implements apr_getopt_err_fn_t. */
1219 static void
opt_parsing_error_func(void * baton,const char * fmt,...)1220 opt_parsing_error_func(void *baton,
1221 const char *fmt, ...)
1222 {
1223 struct opt_parsing_error_baton_t *b = baton;
1224 const char *message;
1225 va_list ap;
1226
1227 va_start(ap, fmt);
1228 message = apr_pvsprintf(b->pool, fmt, ap);
1229 va_end(ap);
1230
1231 /* Skip leading ": " (if present, which it always is in known cases). */
1232 if (strncmp(message, ": ", 2) == 0)
1233 message += 2;
1234
1235 b->err = svn_error_create(SVN_ERR_INVALID_DIFF_OPTION, NULL, message);
1236 }
1237
1238 svn_error_t *
svn_diff_file_options_parse(svn_diff_file_options_t * options,const apr_array_header_t * args,apr_pool_t * pool)1239 svn_diff_file_options_parse(svn_diff_file_options_t *options,
1240 const apr_array_header_t *args,
1241 apr_pool_t *pool)
1242 {
1243 apr_getopt_t *os;
1244 struct opt_parsing_error_baton_t opt_parsing_error_baton;
1245 apr_array_header_t *argv;
1246
1247 opt_parsing_error_baton.err = NULL;
1248 opt_parsing_error_baton.pool = pool;
1249
1250 /* Make room for each option (starting at index 1) plus trailing NULL. */
1251 argv = apr_array_make(pool, args->nelts + 2, sizeof(char*));
1252 APR_ARRAY_PUSH(argv, const char *) = "";
1253 apr_array_cat(argv, args);
1254 APR_ARRAY_PUSH(argv, const char *) = NULL;
1255
1256 apr_getopt_init(&os, pool,
1257 argv->nelts - 1 /* Exclude trailing NULL */,
1258 (const char *const *) argv->elts);
1259
1260 /* Capture any error message from apr_getopt_long(). This will typically
1261 * say which option is wrong, which we would not otherwise know. */
1262 os->errfn = opt_parsing_error_func;
1263 os->errarg = &opt_parsing_error_baton;
1264
1265 while (1)
1266 {
1267 const char *opt_arg;
1268 int opt_id;
1269 apr_status_t err = apr_getopt_long(os, diff_options, &opt_id, &opt_arg);
1270
1271 if (APR_STATUS_IS_EOF(err))
1272 break;
1273 if (err)
1274 /* Wrap apr_getopt_long()'s error message. Its doc string implies
1275 * it always will produce one, but never mind if it doesn't. Avoid
1276 * using the message associated with the return code ERR, because
1277 * it refers to the "command line" which may be misleading here. */
1278 return svn_error_create(SVN_ERR_INVALID_DIFF_OPTION,
1279 opt_parsing_error_baton.err,
1280 _("Error in options to internal diff"));
1281
1282 switch (opt_id)
1283 {
1284 case 'b':
1285 /* -w takes precedence over -b. */
1286 if (! options->ignore_space)
1287 options->ignore_space = svn_diff_file_ignore_space_change;
1288 break;
1289 case 'w':
1290 options->ignore_space = svn_diff_file_ignore_space_all;
1291 break;
1292 case SVN_DIFF__OPT_IGNORE_EOL_STYLE:
1293 options->ignore_eol_style = TRUE;
1294 break;
1295 case 'p':
1296 options->show_c_function = TRUE;
1297 break;
1298 case 'U':
1299 SVN_ERR(svn_cstring_atoi(&options->context_size, opt_arg));
1300 break;
1301 default:
1302 break;
1303 }
1304 }
1305
1306 /* Check for spurious arguments. */
1307 if (os->ind < os->argc)
1308 return svn_error_createf(SVN_ERR_INVALID_DIFF_OPTION, NULL,
1309 _("Invalid argument '%s' in diff options"),
1310 os->argv[os->ind]);
1311
1312 return SVN_NO_ERROR;
1313 }
1314
1315 svn_error_t *
svn_diff_file_diff_2(svn_diff_t ** diff,const char * original,const char * modified,const svn_diff_file_options_t * options,apr_pool_t * pool)1316 svn_diff_file_diff_2(svn_diff_t **diff,
1317 const char *original,
1318 const char *modified,
1319 const svn_diff_file_options_t *options,
1320 apr_pool_t *pool)
1321 {
1322 svn_diff__file_baton_t baton = { 0 };
1323
1324 baton.options = options;
1325 baton.files[0].path = original;
1326 baton.files[1].path = modified;
1327 baton.pool = svn_pool_create(pool);
1328
1329 SVN_ERR(svn_diff_diff_2(diff, &baton, &svn_diff__file_vtable, pool));
1330
1331 svn_pool_destroy(baton.pool);
1332 return SVN_NO_ERROR;
1333 }
1334
1335 svn_error_t *
svn_diff_file_diff3_2(svn_diff_t ** diff,const char * original,const char * modified,const char * latest,const svn_diff_file_options_t * options,apr_pool_t * pool)1336 svn_diff_file_diff3_2(svn_diff_t **diff,
1337 const char *original,
1338 const char *modified,
1339 const char *latest,
1340 const svn_diff_file_options_t *options,
1341 apr_pool_t *pool)
1342 {
1343 svn_diff__file_baton_t baton = { 0 };
1344
1345 baton.options = options;
1346 baton.files[0].path = original;
1347 baton.files[1].path = modified;
1348 baton.files[2].path = latest;
1349 baton.pool = svn_pool_create(pool);
1350
1351 SVN_ERR(svn_diff_diff3_2(diff, &baton, &svn_diff__file_vtable, pool));
1352
1353 svn_pool_destroy(baton.pool);
1354 return SVN_NO_ERROR;
1355 }
1356
1357 svn_error_t *
svn_diff_file_diff4_2(svn_diff_t ** diff,const char * original,const char * modified,const char * latest,const char * ancestor,const svn_diff_file_options_t * options,apr_pool_t * pool)1358 svn_diff_file_diff4_2(svn_diff_t **diff,
1359 const char *original,
1360 const char *modified,
1361 const char *latest,
1362 const char *ancestor,
1363 const svn_diff_file_options_t *options,
1364 apr_pool_t *pool)
1365 {
1366 svn_diff__file_baton_t baton = { 0 };
1367
1368 baton.options = options;
1369 baton.files[0].path = original;
1370 baton.files[1].path = modified;
1371 baton.files[2].path = latest;
1372 baton.files[3].path = ancestor;
1373 baton.pool = svn_pool_create(pool);
1374
1375 SVN_ERR(svn_diff_diff4_2(diff, &baton, &svn_diff__file_vtable, pool));
1376
1377 svn_pool_destroy(baton.pool);
1378 return SVN_NO_ERROR;
1379 }
1380
1381
1382 /** Display unified context diffs **/
1383
1384 /* Maximum length of the extra context to show when show_c_function is set.
1385 * GNU diff uses 40, let's be brave and use 50 instead. */
1386 #define SVN_DIFF__EXTRA_CONTEXT_LENGTH 50
1387 typedef struct svn_diff__file_output_baton_t
1388 {
1389 svn_stream_t *output_stream;
1390 const char *header_encoding;
1391
1392 /* Cached markers, in header_encoding. */
1393 const char *context_str;
1394 const char *delete_str;
1395 const char *insert_str;
1396
1397 const char *path[2];
1398 apr_file_t *file[2];
1399
1400 apr_off_t current_line[2];
1401
1402 char buffer[2][4096];
1403 apr_size_t length[2];
1404 char *curp[2];
1405
1406 apr_off_t hunk_start[2];
1407 apr_off_t hunk_length[2];
1408 svn_stringbuf_t *hunk;
1409
1410 /* Should we emit C functions in the unified diff header */
1411 svn_boolean_t show_c_function;
1412 /* Extra strings to skip over if we match. */
1413 apr_array_header_t *extra_skip_match;
1414 /* "Context" to append to the @@ line when the show_c_function option
1415 * is set. */
1416 svn_stringbuf_t *extra_context;
1417 /* Extra context for the current hunk. */
1418 char hunk_extra_context[SVN_DIFF__EXTRA_CONTEXT_LENGTH + 1];
1419
1420 int context_size;
1421
1422 /* Cancel handler */
1423 svn_cancel_func_t cancel_func;
1424 void *cancel_baton;
1425
1426 apr_pool_t *pool;
1427 } svn_diff__file_output_baton_t;
1428
1429 typedef enum svn_diff__file_output_unified_type_e
1430 {
1431 svn_diff__file_output_unified_skip,
1432 svn_diff__file_output_unified_context,
1433 svn_diff__file_output_unified_delete,
1434 svn_diff__file_output_unified_insert
1435 } svn_diff__file_output_unified_type_e;
1436
1437
1438 static svn_error_t *
output_unified_line(svn_diff__file_output_baton_t * baton,svn_diff__file_output_unified_type_e type,int idx)1439 output_unified_line(svn_diff__file_output_baton_t *baton,
1440 svn_diff__file_output_unified_type_e type, int idx)
1441 {
1442 char *curp;
1443 char *eol;
1444 apr_size_t length;
1445 svn_error_t *err;
1446 svn_boolean_t bytes_processed = FALSE;
1447 svn_boolean_t had_cr = FALSE;
1448 /* Are we collecting extra context? */
1449 svn_boolean_t collect_extra = FALSE;
1450
1451 length = baton->length[idx];
1452 curp = baton->curp[idx];
1453
1454 /* Lazily update the current line even if we're at EOF.
1455 * This way we fake output of context at EOF
1456 */
1457 baton->current_line[idx]++;
1458
1459 if (length == 0 && apr_file_eof(baton->file[idx]))
1460 {
1461 return SVN_NO_ERROR;
1462 }
1463
1464 do
1465 {
1466 if (length > 0)
1467 {
1468 if (!bytes_processed)
1469 {
1470 switch (type)
1471 {
1472 case svn_diff__file_output_unified_context:
1473 svn_stringbuf_appendcstr(baton->hunk, baton->context_str);
1474 baton->hunk_length[0]++;
1475 baton->hunk_length[1]++;
1476 break;
1477 case svn_diff__file_output_unified_delete:
1478 svn_stringbuf_appendcstr(baton->hunk, baton->delete_str);
1479 baton->hunk_length[0]++;
1480 break;
1481 case svn_diff__file_output_unified_insert:
1482 svn_stringbuf_appendcstr(baton->hunk, baton->insert_str);
1483 baton->hunk_length[1]++;
1484 break;
1485 default:
1486 break;
1487 }
1488
1489 if (baton->show_c_function
1490 && (type == svn_diff__file_output_unified_skip
1491 || type == svn_diff__file_output_unified_context)
1492 && (svn_ctype_isalpha(*curp) || *curp == '$' || *curp == '_')
1493 && !svn_cstring_match_glob_list(curp,
1494 baton->extra_skip_match))
1495 {
1496 svn_stringbuf_setempty(baton->extra_context);
1497 collect_extra = TRUE;
1498 }
1499 }
1500
1501 eol = svn_eol__find_eol_start(curp, length);
1502
1503 if (eol != NULL)
1504 {
1505 apr_size_t len;
1506
1507 had_cr = (*eol == '\r');
1508 eol++;
1509 len = (apr_size_t)(eol - curp);
1510
1511 if (! had_cr || len < length)
1512 {
1513 if (had_cr && *eol == '\n')
1514 {
1515 ++eol;
1516 ++len;
1517 }
1518
1519 length -= len;
1520
1521 if (type != svn_diff__file_output_unified_skip)
1522 {
1523 svn_stringbuf_appendbytes(baton->hunk, curp, len);
1524 }
1525 if (collect_extra)
1526 {
1527 svn_stringbuf_appendbytes(baton->extra_context,
1528 curp, len);
1529 }
1530
1531 baton->curp[idx] = eol;
1532 baton->length[idx] = length;
1533
1534 err = SVN_NO_ERROR;
1535
1536 break;
1537 }
1538 }
1539
1540 if (type != svn_diff__file_output_unified_skip)
1541 {
1542 svn_stringbuf_appendbytes(baton->hunk, curp, length);
1543 }
1544
1545 if (collect_extra)
1546 {
1547 svn_stringbuf_appendbytes(baton->extra_context, curp, length);
1548 }
1549
1550 bytes_processed = TRUE;
1551 }
1552
1553 curp = baton->buffer[idx];
1554 length = sizeof(baton->buffer[idx]);
1555
1556 err = svn_io_file_read(baton->file[idx], curp, &length, baton->pool);
1557
1558 /* If the last chunk ended with a CR, we look for an LF at the start
1559 of this chunk. */
1560 if (had_cr)
1561 {
1562 if (! err && length > 0 && *curp == '\n')
1563 {
1564 if (type != svn_diff__file_output_unified_skip)
1565 {
1566 svn_stringbuf_appendbyte(baton->hunk, *curp);
1567 }
1568 /* We don't append the LF to extra_context, since it would
1569 * just be stripped anyway. */
1570 ++curp;
1571 --length;
1572 }
1573
1574 baton->curp[idx] = curp;
1575 baton->length[idx] = length;
1576
1577 break;
1578 }
1579 }
1580 while (! err);
1581
1582 if (err && ! APR_STATUS_IS_EOF(err->apr_err))
1583 return err;
1584
1585 if (err && APR_STATUS_IS_EOF(err->apr_err))
1586 {
1587 svn_error_clear(err);
1588 /* Special case if we reach the end of file AND the last line is in the
1589 changed range AND the file doesn't end with a newline */
1590 if (bytes_processed && (type != svn_diff__file_output_unified_skip)
1591 && ! had_cr)
1592 {
1593 SVN_ERR(svn_diff__unified_append_no_newline_msg(
1594 baton->hunk, baton->header_encoding, baton->pool));
1595 }
1596
1597 baton->length[idx] = 0;
1598 }
1599
1600 return SVN_NO_ERROR;
1601 }
1602
1603 static APR_INLINE svn_error_t *
output_unified_diff_range(svn_diff__file_output_baton_t * output_baton,int source,svn_diff__file_output_unified_type_e type,apr_off_t until,svn_cancel_func_t cancel_func,void * cancel_baton)1604 output_unified_diff_range(svn_diff__file_output_baton_t *output_baton,
1605 int source,
1606 svn_diff__file_output_unified_type_e type,
1607 apr_off_t until,
1608 svn_cancel_func_t cancel_func,
1609 void *cancel_baton)
1610 {
1611 while (output_baton->current_line[source] < until)
1612 {
1613 if (cancel_func)
1614 SVN_ERR(cancel_func(cancel_baton));
1615
1616 SVN_ERR(output_unified_line(output_baton, type, source));
1617 }
1618 return SVN_NO_ERROR;
1619 }
1620
1621 static svn_error_t *
output_unified_flush_hunk(svn_diff__file_output_baton_t * baton)1622 output_unified_flush_hunk(svn_diff__file_output_baton_t *baton)
1623 {
1624 apr_off_t target_line;
1625 apr_size_t hunk_len;
1626 apr_off_t old_start;
1627 apr_off_t new_start;
1628
1629 if (svn_stringbuf_isempty(baton->hunk))
1630 {
1631 /* Nothing to flush */
1632 return SVN_NO_ERROR;
1633 }
1634
1635 target_line = baton->hunk_start[0] + baton->hunk_length[0]
1636 + baton->context_size;
1637
1638 /* Add trailing context to the hunk */
1639 SVN_ERR(output_unified_diff_range(baton, 0 /* original */,
1640 svn_diff__file_output_unified_context,
1641 target_line,
1642 baton->cancel_func, baton->cancel_baton));
1643
1644 old_start = baton->hunk_start[0];
1645 new_start = baton->hunk_start[1];
1646
1647 /* If the file is non-empty, convert the line indexes from
1648 zero based to one based */
1649 if (baton->hunk_length[0])
1650 old_start++;
1651 if (baton->hunk_length[1])
1652 new_start++;
1653
1654 /* Write the hunk header */
1655 SVN_ERR(svn_diff__unified_write_hunk_header(
1656 baton->output_stream, baton->header_encoding, "@@",
1657 old_start, baton->hunk_length[0],
1658 new_start, baton->hunk_length[1],
1659 baton->hunk_extra_context,
1660 baton->pool));
1661
1662 /* Output the hunk content */
1663 hunk_len = baton->hunk->len;
1664 SVN_ERR(svn_stream_write(baton->output_stream, baton->hunk->data,
1665 &hunk_len));
1666
1667 /* Prepare for the next hunk */
1668 baton->hunk_length[0] = 0;
1669 baton->hunk_length[1] = 0;
1670 baton->hunk_start[0] = 0;
1671 baton->hunk_start[1] = 0;
1672 svn_stringbuf_setempty(baton->hunk);
1673
1674 return SVN_NO_ERROR;
1675 }
1676
1677 static svn_error_t *
output_unified_diff_modified(void * baton,apr_off_t original_start,apr_off_t original_length,apr_off_t modified_start,apr_off_t modified_length,apr_off_t latest_start,apr_off_t latest_length)1678 output_unified_diff_modified(void *baton,
1679 apr_off_t original_start, apr_off_t original_length,
1680 apr_off_t modified_start, apr_off_t modified_length,
1681 apr_off_t latest_start, apr_off_t latest_length)
1682 {
1683 svn_diff__file_output_baton_t *output_baton = baton;
1684 apr_off_t context_prefix_length;
1685 apr_off_t prev_context_end;
1686 svn_boolean_t init_hunk = FALSE;
1687
1688 if (original_start > output_baton->context_size)
1689 context_prefix_length = output_baton->context_size;
1690 else
1691 context_prefix_length = original_start;
1692
1693 /* Calculate where the previous hunk will end if we would write it now
1694 (including the necessary context at the end) */
1695 if (output_baton->hunk_length[0] > 0 || output_baton->hunk_length[1] > 0)
1696 {
1697 prev_context_end = output_baton->hunk_start[0]
1698 + output_baton->hunk_length[0]
1699 + output_baton->context_size;
1700 }
1701 else
1702 {
1703 prev_context_end = -1;
1704
1705 if (output_baton->hunk_start[0] == 0
1706 && (original_length > 0 || modified_length > 0))
1707 init_hunk = TRUE;
1708 }
1709
1710 /* If the changed range is far enough from the previous range, flush the current
1711 hunk. */
1712 {
1713 apr_off_t new_hunk_start = (original_start - context_prefix_length);
1714
1715 if (output_baton->current_line[0] < new_hunk_start
1716 && prev_context_end <= new_hunk_start)
1717 {
1718 SVN_ERR(output_unified_flush_hunk(output_baton));
1719 init_hunk = TRUE;
1720 }
1721 else if (output_baton->hunk_length[0] > 0
1722 || output_baton->hunk_length[1] > 0)
1723 {
1724 /* We extend the current hunk */
1725
1726
1727 /* Original: Output the context preceding the changed range */
1728 SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1729 svn_diff__file_output_unified_context,
1730 original_start,
1731 output_baton->cancel_func,
1732 output_baton->cancel_baton));
1733 }
1734 }
1735
1736 /* Original: Skip lines until we are at the beginning of the context we want
1737 to display */
1738 SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1739 svn_diff__file_output_unified_skip,
1740 original_start - context_prefix_length,
1741 output_baton->cancel_func,
1742 output_baton->cancel_baton));
1743
1744 /* Note that the above skip stores data for the show_c_function support below */
1745
1746 if (init_hunk)
1747 {
1748 SVN_ERR_ASSERT(output_baton->hunk_length[0] == 0
1749 && output_baton->hunk_length[1] == 0);
1750
1751 output_baton->hunk_start[0] = original_start - context_prefix_length;
1752 output_baton->hunk_start[1] = modified_start - context_prefix_length;
1753 }
1754
1755 if (init_hunk && output_baton->show_c_function)
1756 {
1757 apr_size_t p;
1758 const char *invalid_character;
1759
1760 /* Save the extra context for later use.
1761 * Note that the last byte of the hunk_extra_context array is never
1762 * touched after it is zero-initialized, so the array is always
1763 * 0-terminated. */
1764 strncpy(output_baton->hunk_extra_context,
1765 output_baton->extra_context->data,
1766 SVN_DIFF__EXTRA_CONTEXT_LENGTH);
1767 /* Trim whitespace at the end, most notably to get rid of any
1768 * newline characters. */
1769 p = strlen(output_baton->hunk_extra_context);
1770 while (p > 0
1771 && svn_ctype_isspace(output_baton->hunk_extra_context[p - 1]))
1772 {
1773 output_baton->hunk_extra_context[--p] = '\0';
1774 }
1775 invalid_character =
1776 svn_utf__last_valid(output_baton->hunk_extra_context,
1777 SVN_DIFF__EXTRA_CONTEXT_LENGTH);
1778 for (p = invalid_character - output_baton->hunk_extra_context;
1779 p < SVN_DIFF__EXTRA_CONTEXT_LENGTH; p++)
1780 {
1781 output_baton->hunk_extra_context[p] = '\0';
1782 }
1783 }
1784
1785 /* Modified: Skip lines until we are at the start of the changed range */
1786 SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */,
1787 svn_diff__file_output_unified_skip,
1788 modified_start,
1789 output_baton->cancel_func,
1790 output_baton->cancel_baton));
1791
1792 /* Original: Output the context preceding the changed range */
1793 SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1794 svn_diff__file_output_unified_context,
1795 original_start,
1796 output_baton->cancel_func,
1797 output_baton->cancel_baton));
1798
1799 /* Both: Output the changed range */
1800 SVN_ERR(output_unified_diff_range(output_baton, 0 /* original */,
1801 svn_diff__file_output_unified_delete,
1802 original_start + original_length,
1803 output_baton->cancel_func,
1804 output_baton->cancel_baton));
1805 SVN_ERR(output_unified_diff_range(output_baton, 1 /* modified */,
1806 svn_diff__file_output_unified_insert,
1807 modified_start + modified_length,
1808 output_baton->cancel_func,
1809 output_baton->cancel_baton));
1810
1811 return SVN_NO_ERROR;
1812 }
1813
1814 /* Set *HEADER to a new string consisting of PATH, a tab, and PATH's mtime. */
1815 static svn_error_t *
output_unified_default_hdr(const char ** header,const char * path,apr_pool_t * pool)1816 output_unified_default_hdr(const char **header, const char *path,
1817 apr_pool_t *pool)
1818 {
1819 apr_finfo_t file_info;
1820 apr_time_exp_t exploded_time;
1821 char time_buffer[64];
1822 apr_size_t time_len;
1823 const char *utf8_timestr;
1824
1825 SVN_ERR(svn_io_stat(&file_info, path, APR_FINFO_MTIME, pool));
1826 apr_time_exp_lt(&exploded_time, file_info.mtime);
1827
1828 apr_strftime(time_buffer, &time_len, sizeof(time_buffer) - 1,
1829 /* Order of date components can be different in different languages */
1830 _("%a %b %e %H:%M:%S %Y"), &exploded_time);
1831
1832 SVN_ERR(svn_utf_cstring_to_utf8(&utf8_timestr, time_buffer, pool));
1833
1834 *header = apr_psprintf(pool, "%s\t%s", path, utf8_timestr);
1835
1836 return SVN_NO_ERROR;
1837 }
1838
1839 static const svn_diff_output_fns_t svn_diff__file_output_unified_vtable =
1840 {
1841 NULL, /* output_common */
1842 output_unified_diff_modified,
1843 NULL, /* output_diff_latest */
1844 NULL, /* output_diff_common */
1845 NULL /* output_conflict */
1846 };
1847
1848 svn_error_t *
svn_diff_file_output_unified4(svn_stream_t * output_stream,svn_diff_t * diff,const char * original_path,const char * modified_path,const char * original_header,const char * modified_header,const char * header_encoding,const char * relative_to_dir,svn_boolean_t show_c_function,int context_size,svn_cancel_func_t cancel_func,void * cancel_baton,apr_pool_t * pool)1849 svn_diff_file_output_unified4(svn_stream_t *output_stream,
1850 svn_diff_t *diff,
1851 const char *original_path,
1852 const char *modified_path,
1853 const char *original_header,
1854 const char *modified_header,
1855 const char *header_encoding,
1856 const char *relative_to_dir,
1857 svn_boolean_t show_c_function,
1858 int context_size,
1859 svn_cancel_func_t cancel_func,
1860 void *cancel_baton,
1861 apr_pool_t *pool)
1862 {
1863 if (svn_diff_contains_diffs(diff))
1864 {
1865 svn_diff__file_output_baton_t baton;
1866 int i;
1867
1868 memset(&baton, 0, sizeof(baton));
1869 baton.output_stream = output_stream;
1870 baton.cancel_func = cancel_func;
1871 baton.cancel_baton = cancel_baton;
1872 baton.pool = pool;
1873 baton.header_encoding = header_encoding;
1874 baton.path[0] = original_path;
1875 baton.path[1] = modified_path;
1876 baton.hunk = svn_stringbuf_create_empty(pool);
1877 baton.show_c_function = show_c_function;
1878 baton.extra_context = svn_stringbuf_create_empty(pool);
1879 baton.context_size = (context_size >= 0) ? context_size
1880 : SVN_DIFF__UNIFIED_CONTEXT_SIZE;
1881
1882 if (show_c_function)
1883 {
1884 baton.extra_skip_match = apr_array_make(pool, 3, sizeof(char **));
1885
1886 APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "public:*";
1887 APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "private:*";
1888 APR_ARRAY_PUSH(baton.extra_skip_match, const char *) = "protected:*";
1889 }
1890
1891 SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.context_str, " ",
1892 header_encoding, pool));
1893 SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.delete_str, "-",
1894 header_encoding, pool));
1895 SVN_ERR(svn_utf_cstring_from_utf8_ex2(&baton.insert_str, "+",
1896 header_encoding, pool));
1897
1898 if (relative_to_dir)
1899 {
1900 /* Possibly adjust the "original" and "modified" paths shown in
1901 the output (see issue #2723). */
1902 const char *child_path;
1903
1904 if (! original_header)
1905 {
1906 child_path = svn_dirent_is_child(relative_to_dir,
1907 original_path, pool);
1908 if (child_path)
1909 original_path = child_path;
1910 else
1911 return svn_error_createf(
1912 SVN_ERR_BAD_RELATIVE_PATH, NULL,
1913 _("Path '%s' must be inside "
1914 "the directory '%s'"),
1915 svn_dirent_local_style(original_path, pool),
1916 svn_dirent_local_style(relative_to_dir,
1917 pool));
1918 }
1919
1920 if (! modified_header)
1921 {
1922 child_path = svn_dirent_is_child(relative_to_dir,
1923 modified_path, pool);
1924 if (child_path)
1925 modified_path = child_path;
1926 else
1927 return svn_error_createf(
1928 SVN_ERR_BAD_RELATIVE_PATH, NULL,
1929 _("Path '%s' must be inside "
1930 "the directory '%s'"),
1931 svn_dirent_local_style(modified_path, pool),
1932 svn_dirent_local_style(relative_to_dir,
1933 pool));
1934 }
1935 }
1936
1937 for (i = 0; i < 2; i++)
1938 {
1939 SVN_ERR(svn_io_file_open(&baton.file[i], baton.path[i],
1940 APR_READ, APR_OS_DEFAULT, pool));
1941 }
1942
1943 if (original_header == NULL)
1944 {
1945 SVN_ERR(output_unified_default_hdr(&original_header, original_path,
1946 pool));
1947 }
1948
1949 if (modified_header == NULL)
1950 {
1951 SVN_ERR(output_unified_default_hdr(&modified_header, modified_path,
1952 pool));
1953 }
1954
1955 SVN_ERR(svn_diff__unidiff_write_header(output_stream, header_encoding,
1956 original_header, modified_header,
1957 pool));
1958
1959 SVN_ERR(svn_diff_output2(diff, &baton,
1960 &svn_diff__file_output_unified_vtable,
1961 cancel_func, cancel_baton));
1962 SVN_ERR(output_unified_flush_hunk(&baton));
1963
1964 for (i = 0; i < 2; i++)
1965 {
1966 SVN_ERR(svn_io_file_close(baton.file[i], pool));
1967 }
1968 }
1969
1970 return SVN_NO_ERROR;
1971 }
1972
1973
1974 /** Display diff3 **/
1975
1976 /* A stream to remember *leading* context. Note that this stream does
1977 *not* copy the data that it is remembering; it just saves
1978 *pointers! */
1979 typedef struct context_saver_t {
1980 svn_stream_t *stream;
1981 int context_size;
1982 const char **data; /* const char *data[context_size] */
1983 apr_size_t *len; /* apr_size_t len[context_size] */
1984 apr_size_t next_slot;
1985 apr_ssize_t total_writes;
1986 } context_saver_t;
1987
1988
1989 static svn_error_t *
context_saver_stream_write(void * baton,const char * data,apr_size_t * len)1990 context_saver_stream_write(void *baton,
1991 const char *data,
1992 apr_size_t *len)
1993 {
1994 context_saver_t *cs = baton;
1995
1996 if (cs->context_size > 0)
1997 {
1998 cs->data[cs->next_slot] = data;
1999 cs->len[cs->next_slot] = *len;
2000 cs->next_slot = (cs->next_slot + 1) % cs->context_size;
2001 cs->total_writes++;
2002 }
2003 return SVN_NO_ERROR;
2004 }
2005
2006 typedef struct svn_diff3__file_output_baton_t
2007 {
2008 svn_stream_t *output_stream;
2009
2010 const char *path[3];
2011
2012 apr_off_t current_line[3];
2013
2014 char *buffer[3];
2015 char *endp[3];
2016 char *curp[3];
2017
2018 /* The following four members are in the encoding used for the output. */
2019 const char *conflict_modified;
2020 const char *conflict_original;
2021 const char *conflict_separator;
2022 const char *conflict_latest;
2023
2024 const char *marker_eol;
2025
2026 svn_diff_conflict_display_style_t conflict_style;
2027 int context_size;
2028
2029 /* cancel support */
2030 svn_cancel_func_t cancel_func;
2031 void *cancel_baton;
2032
2033 /* The rest of the fields are for
2034 svn_diff_conflict_display_only_conflicts only. Note that for
2035 these batons, OUTPUT_STREAM is either CONTEXT_SAVER->STREAM or
2036 (soon after a conflict) a "trailing context stream", never the
2037 actual output stream.*/
2038 /* The actual output stream. */
2039 svn_stream_t *real_output_stream;
2040 context_saver_t *context_saver;
2041 /* Used to allocate context_saver and trailing context streams, and
2042 for some printfs. */
2043 apr_pool_t *pool;
2044 } svn_diff3__file_output_baton_t;
2045
2046 static svn_error_t *
flush_context_saver(context_saver_t * cs,svn_stream_t * output_stream)2047 flush_context_saver(context_saver_t *cs,
2048 svn_stream_t *output_stream)
2049 {
2050 int i;
2051 for (i = 0; i < cs->context_size; i++)
2052 {
2053 apr_size_t slot = (i + cs->next_slot) % cs->context_size;
2054 if (cs->data[slot])
2055 {
2056 apr_size_t len = cs->len[slot];
2057 SVN_ERR(svn_stream_write(output_stream, cs->data[slot], &len));
2058 }
2059 }
2060 return SVN_NO_ERROR;
2061 }
2062
2063 static void
make_context_saver(svn_diff3__file_output_baton_t * fob)2064 make_context_saver(svn_diff3__file_output_baton_t *fob)
2065 {
2066 context_saver_t *cs;
2067
2068 assert(fob->context_size > 0); /* Or nothing to save */
2069
2070 svn_pool_clear(fob->pool);
2071 cs = apr_pcalloc(fob->pool, sizeof(*cs));
2072 cs->stream = svn_stream_empty(fob->pool);
2073 svn_stream_set_baton(cs->stream, cs);
2074 svn_stream_set_write(cs->stream, context_saver_stream_write);
2075 fob->context_saver = cs;
2076 fob->output_stream = cs->stream;
2077 cs->context_size = fob->context_size;
2078 cs->data = apr_pcalloc(fob->pool, sizeof(*cs->data) * cs->context_size);
2079 cs->len = apr_pcalloc(fob->pool, sizeof(*cs->len) * cs->context_size);
2080 }
2081
2082
2083 /* A stream which prints LINES_TO_PRINT (based on context size) lines to
2084 BATON->REAL_OUTPUT_STREAM, and then changes BATON->OUTPUT_STREAM to
2085 a context_saver; used for *trailing* context. */
2086
2087 struct trailing_context_printer {
2088 apr_size_t lines_to_print;
2089 svn_diff3__file_output_baton_t *fob;
2090 };
2091
2092
2093
2094 static svn_error_t *
trailing_context_printer_write(void * baton,const char * data,apr_size_t * len)2095 trailing_context_printer_write(void *baton,
2096 const char *data,
2097 apr_size_t *len)
2098 {
2099 struct trailing_context_printer *tcp = baton;
2100 SVN_ERR_ASSERT(tcp->lines_to_print > 0);
2101 SVN_ERR(svn_stream_write(tcp->fob->real_output_stream, data, len));
2102 tcp->lines_to_print--;
2103 if (tcp->lines_to_print == 0)
2104 make_context_saver(tcp->fob);
2105 return SVN_NO_ERROR;
2106 }
2107
2108
2109 static void
make_trailing_context_printer(svn_diff3__file_output_baton_t * btn)2110 make_trailing_context_printer(svn_diff3__file_output_baton_t *btn)
2111 {
2112 struct trailing_context_printer *tcp;
2113 svn_stream_t *s;
2114
2115 svn_pool_clear(btn->pool);
2116
2117 tcp = apr_pcalloc(btn->pool, sizeof(*tcp));
2118 tcp->lines_to_print = btn->context_size;
2119 tcp->fob = btn;
2120 s = svn_stream_empty(btn->pool);
2121 svn_stream_set_baton(s, tcp);
2122 svn_stream_set_write(s, trailing_context_printer_write);
2123 btn->output_stream = s;
2124 }
2125
2126
2127
2128 typedef enum svn_diff3__file_output_type_e
2129 {
2130 svn_diff3__file_output_skip,
2131 svn_diff3__file_output_normal
2132 } svn_diff3__file_output_type_e;
2133
2134
2135 static svn_error_t *
output_line(svn_diff3__file_output_baton_t * baton,svn_diff3__file_output_type_e type,int idx)2136 output_line(svn_diff3__file_output_baton_t *baton,
2137 svn_diff3__file_output_type_e type, int idx)
2138 {
2139 char *curp;
2140 char *endp;
2141 char *eol;
2142 apr_size_t len;
2143
2144 curp = baton->curp[idx];
2145 endp = baton->endp[idx];
2146
2147 /* Lazily update the current line even if we're at EOF.
2148 */
2149 baton->current_line[idx]++;
2150
2151 if (curp == endp)
2152 return SVN_NO_ERROR;
2153
2154 eol = svn_eol__find_eol_start(curp, endp - curp);
2155 if (!eol)
2156 eol = endp;
2157 else
2158 {
2159 svn_boolean_t had_cr = (*eol == '\r');
2160 eol++;
2161 if (had_cr && eol != endp && *eol == '\n')
2162 eol++;
2163 }
2164
2165 if (type != svn_diff3__file_output_skip)
2166 {
2167 len = eol - curp;
2168 /* Note that the trailing context printer assumes that
2169 svn_stream_write is called exactly once per line. */
2170 SVN_ERR(svn_stream_write(baton->output_stream, curp, &len));
2171 }
2172
2173 baton->curp[idx] = eol;
2174
2175 return SVN_NO_ERROR;
2176 }
2177
2178 static svn_error_t *
output_marker_eol(svn_diff3__file_output_baton_t * btn)2179 output_marker_eol(svn_diff3__file_output_baton_t *btn)
2180 {
2181 return svn_stream_puts(btn->output_stream, btn->marker_eol);
2182 }
2183
2184 static svn_error_t *
output_hunk(void * baton,int idx,apr_off_t target_line,apr_off_t target_length)2185 output_hunk(void *baton, int idx, apr_off_t target_line,
2186 apr_off_t target_length)
2187 {
2188 svn_diff3__file_output_baton_t *output_baton = baton;
2189
2190 /* Skip lines until we are at the start of the changed range */
2191 while (output_baton->current_line[idx] < target_line)
2192 {
2193 SVN_ERR(output_line(output_baton, svn_diff3__file_output_skip, idx));
2194 }
2195
2196 target_line += target_length;
2197
2198 while (output_baton->current_line[idx] < target_line)
2199 {
2200 SVN_ERR(output_line(output_baton, svn_diff3__file_output_normal, idx));
2201 }
2202
2203 return SVN_NO_ERROR;
2204 }
2205
2206 static svn_error_t *
output_common(void * baton,apr_off_t original_start,apr_off_t original_length,apr_off_t modified_start,apr_off_t modified_length,apr_off_t latest_start,apr_off_t latest_length)2207 output_common(void *baton, apr_off_t original_start, apr_off_t original_length,
2208 apr_off_t modified_start, apr_off_t modified_length,
2209 apr_off_t latest_start, apr_off_t latest_length)
2210 {
2211 return output_hunk(baton, 1, modified_start, modified_length);
2212 }
2213
2214 static svn_error_t *
output_diff_modified(void * baton,apr_off_t original_start,apr_off_t original_length,apr_off_t modified_start,apr_off_t modified_length,apr_off_t latest_start,apr_off_t latest_length)2215 output_diff_modified(void *baton,
2216 apr_off_t original_start, apr_off_t original_length,
2217 apr_off_t modified_start, apr_off_t modified_length,
2218 apr_off_t latest_start, apr_off_t latest_length)
2219 {
2220 return output_hunk(baton, 1, modified_start, modified_length);
2221 }
2222
2223 static svn_error_t *
output_diff_latest(void * baton,apr_off_t original_start,apr_off_t original_length,apr_off_t modified_start,apr_off_t modified_length,apr_off_t latest_start,apr_off_t latest_length)2224 output_diff_latest(void *baton,
2225 apr_off_t original_start, apr_off_t original_length,
2226 apr_off_t modified_start, apr_off_t modified_length,
2227 apr_off_t latest_start, apr_off_t latest_length)
2228 {
2229 return output_hunk(baton, 2, latest_start, latest_length);
2230 }
2231
2232 static svn_error_t *
2233 output_conflict(void *baton,
2234 apr_off_t original_start, apr_off_t original_length,
2235 apr_off_t modified_start, apr_off_t modified_length,
2236 apr_off_t latest_start, apr_off_t latest_length,
2237 svn_diff_t *diff);
2238
2239 static const svn_diff_output_fns_t svn_diff3__file_output_vtable =
2240 {
2241 output_common,
2242 output_diff_modified,
2243 output_diff_latest,
2244 output_diff_modified, /* output_diff_common */
2245 output_conflict
2246 };
2247
2248 static svn_error_t *
output_conflict_with_context_marker(svn_diff3__file_output_baton_t * btn,const char * label,apr_off_t start,apr_off_t length)2249 output_conflict_with_context_marker(svn_diff3__file_output_baton_t *btn,
2250 const char *label,
2251 apr_off_t start,
2252 apr_off_t length)
2253 {
2254 if (length == 1)
2255 SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2256 "%s (%" APR_OFF_T_FMT ")",
2257 label, start + 1));
2258 else
2259 SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2260 "%s (%" APR_OFF_T_FMT ",%" APR_OFF_T_FMT ")",
2261 label, start + 1, length));
2262
2263 SVN_ERR(output_marker_eol(btn));
2264
2265 return SVN_NO_ERROR;
2266 }
2267
2268 static svn_error_t *
output_conflict_with_context(svn_diff3__file_output_baton_t * btn,apr_off_t original_start,apr_off_t original_length,apr_off_t modified_start,apr_off_t modified_length,apr_off_t latest_start,apr_off_t latest_length)2269 output_conflict_with_context(svn_diff3__file_output_baton_t *btn,
2270 apr_off_t original_start,
2271 apr_off_t original_length,
2272 apr_off_t modified_start,
2273 apr_off_t modified_length,
2274 apr_off_t latest_start,
2275 apr_off_t latest_length)
2276 {
2277 /* Are we currently saving starting context (as opposed to printing
2278 trailing context)? If so, flush it. */
2279 if (btn->output_stream == btn->context_saver->stream)
2280 {
2281 if (btn->context_saver->total_writes > btn->context_size)
2282 SVN_ERR(svn_stream_puts(btn->real_output_stream, "@@\n"));
2283 SVN_ERR(flush_context_saver(btn->context_saver, btn->real_output_stream));
2284 }
2285
2286 /* Print to the real output stream. */
2287 btn->output_stream = btn->real_output_stream;
2288
2289 /* Output the conflict itself. */
2290 SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_modified,
2291 modified_start, modified_length));
2292 SVN_ERR(output_hunk(btn, 1/*modified*/, modified_start, modified_length));
2293
2294 SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_original,
2295 original_start, original_length));
2296 SVN_ERR(output_hunk(btn, 0/*original*/, original_start, original_length));
2297
2298 SVN_ERR(svn_stream_printf(btn->output_stream, btn->pool,
2299 "%s%s", btn->conflict_separator, btn->marker_eol));
2300 SVN_ERR(output_hunk(btn, 2/*latest*/, latest_start, latest_length));
2301 SVN_ERR(output_conflict_with_context_marker(btn, btn->conflict_latest,
2302 latest_start, latest_length));
2303
2304 /* Go into print-trailing-context mode instead. */
2305 make_trailing_context_printer(btn);
2306
2307 return SVN_NO_ERROR;
2308 }
2309
2310
2311 static svn_error_t *
output_conflict(void * baton,apr_off_t original_start,apr_off_t original_length,apr_off_t modified_start,apr_off_t modified_length,apr_off_t latest_start,apr_off_t latest_length,svn_diff_t * diff)2312 output_conflict(void *baton,
2313 apr_off_t original_start, apr_off_t original_length,
2314 apr_off_t modified_start, apr_off_t modified_length,
2315 apr_off_t latest_start, apr_off_t latest_length,
2316 svn_diff_t *diff)
2317 {
2318 svn_diff3__file_output_baton_t *file_baton = baton;
2319
2320 svn_diff_conflict_display_style_t style = file_baton->conflict_style;
2321
2322 if (style == svn_diff_conflict_display_only_conflicts)
2323 return output_conflict_with_context(file_baton,
2324 original_start, original_length,
2325 modified_start, modified_length,
2326 latest_start, latest_length);
2327
2328 if (style == svn_diff_conflict_display_resolved_modified_latest)
2329 {
2330 if (diff)
2331 return svn_diff_output2(diff, baton,
2332 &svn_diff3__file_output_vtable,
2333 file_baton->cancel_func,
2334 file_baton->cancel_baton);
2335 else
2336 style = svn_diff_conflict_display_modified_latest;
2337 }
2338
2339 if (style == svn_diff_conflict_display_modified_latest ||
2340 style == svn_diff_conflict_display_modified_original_latest)
2341 {
2342 SVN_ERR(svn_stream_puts(file_baton->output_stream,
2343 file_baton->conflict_modified));
2344 SVN_ERR(output_marker_eol(file_baton));
2345
2346 SVN_ERR(output_hunk(baton, 1, modified_start, modified_length));
2347
2348 if (style == svn_diff_conflict_display_modified_original_latest)
2349 {
2350 SVN_ERR(svn_stream_puts(file_baton->output_stream,
2351 file_baton->conflict_original));
2352 SVN_ERR(output_marker_eol(file_baton));
2353 SVN_ERR(output_hunk(baton, 0, original_start, original_length));
2354 }
2355
2356 SVN_ERR(svn_stream_puts(file_baton->output_stream,
2357 file_baton->conflict_separator));
2358 SVN_ERR(output_marker_eol(file_baton));
2359
2360 SVN_ERR(output_hunk(baton, 2, latest_start, latest_length));
2361
2362 SVN_ERR(svn_stream_puts(file_baton->output_stream,
2363 file_baton->conflict_latest));
2364 SVN_ERR(output_marker_eol(file_baton));
2365 }
2366 else if (style == svn_diff_conflict_display_modified)
2367 SVN_ERR(output_hunk(baton, 1, modified_start, modified_length));
2368 else if (style == svn_diff_conflict_display_latest)
2369 SVN_ERR(output_hunk(baton, 2, latest_start, latest_length));
2370 else /* unknown style */
2371 SVN_ERR_MALFUNCTION();
2372
2373 return SVN_NO_ERROR;
2374 }
2375
2376 svn_error_t *
svn_diff_file_output_merge3(svn_stream_t * output_stream,svn_diff_t * diff,const char * original_path,const char * modified_path,const char * latest_path,const char * conflict_original,const char * conflict_modified,const char * conflict_latest,const char * conflict_separator,svn_diff_conflict_display_style_t style,svn_cancel_func_t cancel_func,void * cancel_baton,apr_pool_t * scratch_pool)2377 svn_diff_file_output_merge3(svn_stream_t *output_stream,
2378 svn_diff_t *diff,
2379 const char *original_path,
2380 const char *modified_path,
2381 const char *latest_path,
2382 const char *conflict_original,
2383 const char *conflict_modified,
2384 const char *conflict_latest,
2385 const char *conflict_separator,
2386 svn_diff_conflict_display_style_t style,
2387 svn_cancel_func_t cancel_func,
2388 void *cancel_baton,
2389 apr_pool_t *scratch_pool)
2390 {
2391 svn_diff3__file_output_baton_t baton;
2392 apr_file_t *file[3];
2393 int idx;
2394 #if APR_HAS_MMAP
2395 apr_mmap_t *mm[3] = { 0 };
2396 #endif /* APR_HAS_MMAP */
2397 const char *eol;
2398 svn_boolean_t conflicts_only =
2399 (style == svn_diff_conflict_display_only_conflicts);
2400
2401 memset(&baton, 0, sizeof(baton));
2402 baton.context_size = SVN_DIFF__UNIFIED_CONTEXT_SIZE;
2403 if (conflicts_only)
2404 {
2405 baton.pool = svn_pool_create(scratch_pool);
2406 make_context_saver(&baton);
2407 baton.real_output_stream = output_stream;
2408 }
2409 else
2410 baton.output_stream = output_stream;
2411 baton.path[0] = original_path;
2412 baton.path[1] = modified_path;
2413 baton.path[2] = latest_path;
2414 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_modified,
2415 conflict_modified ? conflict_modified
2416 : apr_psprintf(scratch_pool, "<<<<<<< %s",
2417 modified_path),
2418 scratch_pool));
2419 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_original,
2420 conflict_original ? conflict_original
2421 : apr_psprintf(scratch_pool, "||||||| %s",
2422 original_path),
2423 scratch_pool));
2424 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_separator,
2425 conflict_separator ? conflict_separator
2426 : "=======", scratch_pool));
2427 SVN_ERR(svn_utf_cstring_from_utf8(&baton.conflict_latest,
2428 conflict_latest ? conflict_latest
2429 : apr_psprintf(scratch_pool, ">>>>>>> %s",
2430 latest_path),
2431 scratch_pool));
2432
2433 baton.conflict_style = style;
2434
2435 for (idx = 0; idx < 3; idx++)
2436 {
2437 apr_size_t size;
2438
2439 SVN_ERR(map_or_read_file(&file[idx],
2440 MMAP_T_ARG(mm[idx])
2441 &baton.buffer[idx], &size,
2442 baton.path[idx], scratch_pool));
2443
2444 baton.curp[idx] = baton.buffer[idx];
2445 baton.endp[idx] = baton.buffer[idx];
2446
2447 if (baton.endp[idx])
2448 baton.endp[idx] += size;
2449 }
2450
2451 /* Check what eol marker we should use for conflict markers.
2452 We use the eol marker of the modified file and fall back on the
2453 platform's eol marker if that file doesn't contain any newlines. */
2454 eol = svn_eol__detect_eol(baton.buffer[1], baton.endp[1] - baton.buffer[1],
2455 NULL);
2456 if (! eol)
2457 eol = APR_EOL_STR;
2458 baton.marker_eol = eol;
2459
2460 baton.cancel_func = cancel_func;
2461 baton.cancel_baton = cancel_baton;
2462
2463 SVN_ERR(svn_diff_output2(diff, &baton,
2464 &svn_diff3__file_output_vtable,
2465 cancel_func, cancel_baton));
2466
2467 for (idx = 0; idx < 3; idx++)
2468 {
2469 #if APR_HAS_MMAP
2470 if (mm[idx])
2471 {
2472 apr_status_t rv = apr_mmap_delete(mm[idx]);
2473 if (rv != APR_SUCCESS)
2474 {
2475 return svn_error_wrap_apr(rv, _("Failed to delete mmap '%s'"),
2476 baton.path[idx]);
2477 }
2478 }
2479 #endif /* APR_HAS_MMAP */
2480
2481 if (file[idx])
2482 {
2483 SVN_ERR(svn_io_file_close(file[idx], scratch_pool));
2484 }
2485 }
2486
2487 if (conflicts_only)
2488 svn_pool_destroy(baton.pool);
2489
2490 return SVN_NO_ERROR;
2491 }
2492
2493