1 /*
2 * parse-diff.c: functions for parsing diff files
3 *
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
20 * under the License.
21 * ====================================================================
22 */
23
24 #include <stdlib.h>
25 #include <stddef.h>
26 #include <string.h>
27
28 #include "svn_hash.h"
29 #include "svn_types.h"
30 #include "svn_error.h"
31 #include "svn_io.h"
32 #include "svn_pools.h"
33 #include "svn_props.h"
34 #include "svn_string.h"
35 #include "svn_utf.h"
36 #include "svn_dirent_uri.h"
37 #include "svn_diff.h"
38 #include "svn_ctype.h"
39 #include "svn_mergeinfo.h"
40
41 #include "private/svn_eol_private.h"
42 #include "private/svn_dep_compat.h"
43 #include "private/svn_diff_private.h"
44 #include "private/svn_sorts_private.h"
45
46 #include "diff.h"
47
48 #include "svn_private_config.h"
49
50 /* Helper macro for readability */
51 #define starts_with(str, start) \
52 (strncmp((str), (start), strlen(start)) == 0)
53
54 /* Like strlen() but for string literals. */
55 #define STRLEN_LITERAL(str) (sizeof(str) - 1)
56
57 /* This struct describes a range within a file, as well as the
58 * current cursor position within the range. All numbers are in bytes. */
59 struct svn_diff__hunk_range {
60 apr_off_t start;
61 apr_off_t end;
62 apr_off_t current;
63 };
64
65 struct svn_diff_hunk_t {
66 /* The patch this hunk belongs to. */
67 const svn_patch_t *patch;
68
69 /* APR file handle to the patch file this hunk came from. */
70 apr_file_t *apr_file;
71
72 /* Ranges used to keep track of this hunk's texts positions within
73 * the patch file. */
74 struct svn_diff__hunk_range diff_text_range;
75 struct svn_diff__hunk_range original_text_range;
76 struct svn_diff__hunk_range modified_text_range;
77
78 /* Hunk ranges as they appeared in the patch file.
79 * All numbers are lines, not bytes. */
80 svn_linenum_t original_start;
81 svn_linenum_t original_length;
82 svn_linenum_t modified_start;
83 svn_linenum_t modified_length;
84
85 /* Number of lines of leading and trailing hunk context. */
86 svn_linenum_t leading_context;
87 svn_linenum_t trailing_context;
88
89 /* Did we see a 'file does not end with eol' marker in this hunk? */
90 svn_boolean_t original_no_final_eol;
91 svn_boolean_t modified_no_final_eol;
92
93 /* Fuzz penalty, triggered by bad patch targets */
94 svn_linenum_t original_fuzz;
95 svn_linenum_t modified_fuzz;
96 };
97
98 struct svn_diff_binary_patch_t {
99 /* The patch this hunk belongs to. */
100 const svn_patch_t *patch;
101
102 /* APR file handle to the patch file this hunk came from. */
103 apr_file_t *apr_file;
104
105 /* Offsets inside APR_FILE representing the location of the patch */
106 apr_off_t src_start;
107 apr_off_t src_end;
108 svn_filesize_t src_filesize; /* Expanded/final size */
109
110 /* Offsets inside APR_FILE representing the location of the patch */
111 apr_off_t dst_start;
112 apr_off_t dst_end;
113 svn_filesize_t dst_filesize; /* Expanded/final size */
114 };
115
116 /* Common guts of svn_diff_hunk__create_adds_single_line() and
117 * svn_diff_hunk__create_deletes_single_line().
118 *
119 * ADD is TRUE if adding and FALSE if deleting.
120 */
121 static svn_error_t *
add_or_delete_single_line(svn_diff_hunk_t ** hunk_out,const char * line,const svn_patch_t * patch,svn_boolean_t add,apr_pool_t * result_pool,apr_pool_t * scratch_pool)122 add_or_delete_single_line(svn_diff_hunk_t **hunk_out,
123 const char *line,
124 const svn_patch_t *patch,
125 svn_boolean_t add,
126 apr_pool_t *result_pool,
127 apr_pool_t *scratch_pool)
128 {
129 svn_diff_hunk_t *hunk = apr_pcalloc(result_pool, sizeof(*hunk));
130 static const char *hunk_header[] = { "@@ -1 +0,0 @@\n", "@@ -0,0 +1 @@\n" };
131 const apr_size_t header_len = strlen(hunk_header[add]);
132 const apr_size_t len = strlen(line);
133 const apr_size_t end = header_len + (1 + len); /* The +1 is for the \n. */
134 svn_stringbuf_t *buf = svn_stringbuf_create_ensure(end + 1, scratch_pool);
135
136 hunk->patch = patch;
137
138 /* hunk->apr_file is created below. */
139
140 hunk->diff_text_range.start = header_len;
141 hunk->diff_text_range.current = header_len;
142
143 if (add)
144 {
145 hunk->original_text_range.start = 0; /* There's no "original" text. */
146 hunk->original_text_range.current = 0;
147 hunk->original_text_range.end = 0;
148 hunk->original_no_final_eol = FALSE;
149
150 hunk->modified_text_range.start = header_len;
151 hunk->modified_text_range.current = header_len;
152 hunk->modified_text_range.end = end;
153 hunk->modified_no_final_eol = TRUE;
154
155 hunk->original_start = 0;
156 hunk->original_length = 0;
157
158 hunk->modified_start = 1;
159 hunk->modified_length = 1;
160 }
161 else /* delete */
162 {
163 hunk->original_text_range.start = header_len;
164 hunk->original_text_range.current = header_len;
165 hunk->original_text_range.end = end;
166 hunk->original_no_final_eol = TRUE;
167
168 hunk->modified_text_range.start = 0; /* There's no "original" text. */
169 hunk->modified_text_range.current = 0;
170 hunk->modified_text_range.end = 0;
171 hunk->modified_no_final_eol = FALSE;
172
173 hunk->original_start = 1;
174 hunk->original_length = 1;
175
176 hunk->modified_start = 0;
177 hunk->modified_length = 0; /* setting to '1' works too */
178 }
179
180 hunk->leading_context = 0;
181 hunk->trailing_context = 0;
182
183 /* Create APR_FILE and put just a hunk in it (without a diff header).
184 * Save the offset of the last byte of the diff line. */
185 svn_stringbuf_appendbytes(buf, hunk_header[add], header_len);
186 svn_stringbuf_appendbyte(buf, add ? '+' : '-');
187 svn_stringbuf_appendbytes(buf, line, len);
188 svn_stringbuf_appendbyte(buf, '\n');
189 svn_stringbuf_appendcstr(buf, "\\ No newline at end of hunk\n");
190
191 hunk->diff_text_range.end = buf->len;
192
193 SVN_ERR(svn_io_open_unique_file3(&hunk->apr_file, NULL /* filename */,
194 NULL /* system tempdir */,
195 svn_io_file_del_on_pool_cleanup,
196 result_pool, scratch_pool));
197 SVN_ERR(svn_io_file_write_full(hunk->apr_file,
198 buf->data, buf->len,
199 NULL, scratch_pool));
200 /* No need to seek. */
201
202 *hunk_out = hunk;
203 return SVN_NO_ERROR;
204 }
205
206 svn_error_t *
svn_diff_hunk__create_adds_single_line(svn_diff_hunk_t ** hunk_out,const char * line,const svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)207 svn_diff_hunk__create_adds_single_line(svn_diff_hunk_t **hunk_out,
208 const char *line,
209 const svn_patch_t *patch,
210 apr_pool_t *result_pool,
211 apr_pool_t *scratch_pool)
212 {
213 SVN_ERR(add_or_delete_single_line(hunk_out, line, patch,
214 (!patch->reverse),
215 result_pool, scratch_pool));
216 return SVN_NO_ERROR;
217 }
218
219 svn_error_t *
svn_diff_hunk__create_deletes_single_line(svn_diff_hunk_t ** hunk_out,const char * line,const svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)220 svn_diff_hunk__create_deletes_single_line(svn_diff_hunk_t **hunk_out,
221 const char *line,
222 const svn_patch_t *patch,
223 apr_pool_t *result_pool,
224 apr_pool_t *scratch_pool)
225 {
226 SVN_ERR(add_or_delete_single_line(hunk_out, line, patch,
227 patch->reverse,
228 result_pool, scratch_pool));
229 return SVN_NO_ERROR;
230 }
231
232 void
svn_diff_hunk_reset_diff_text(svn_diff_hunk_t * hunk)233 svn_diff_hunk_reset_diff_text(svn_diff_hunk_t *hunk)
234 {
235 hunk->diff_text_range.current = hunk->diff_text_range.start;
236 }
237
238 void
svn_diff_hunk_reset_original_text(svn_diff_hunk_t * hunk)239 svn_diff_hunk_reset_original_text(svn_diff_hunk_t *hunk)
240 {
241 if (hunk->patch->reverse)
242 hunk->modified_text_range.current = hunk->modified_text_range.start;
243 else
244 hunk->original_text_range.current = hunk->original_text_range.start;
245 }
246
247 void
svn_diff_hunk_reset_modified_text(svn_diff_hunk_t * hunk)248 svn_diff_hunk_reset_modified_text(svn_diff_hunk_t *hunk)
249 {
250 if (hunk->patch->reverse)
251 hunk->original_text_range.current = hunk->original_text_range.start;
252 else
253 hunk->modified_text_range.current = hunk->modified_text_range.start;
254 }
255
256 svn_linenum_t
svn_diff_hunk_get_original_start(const svn_diff_hunk_t * hunk)257 svn_diff_hunk_get_original_start(const svn_diff_hunk_t *hunk)
258 {
259 return hunk->patch->reverse ? hunk->modified_start : hunk->original_start;
260 }
261
262 svn_linenum_t
svn_diff_hunk_get_original_length(const svn_diff_hunk_t * hunk)263 svn_diff_hunk_get_original_length(const svn_diff_hunk_t *hunk)
264 {
265 return hunk->patch->reverse ? hunk->modified_length : hunk->original_length;
266 }
267
268 svn_linenum_t
svn_diff_hunk_get_modified_start(const svn_diff_hunk_t * hunk)269 svn_diff_hunk_get_modified_start(const svn_diff_hunk_t *hunk)
270 {
271 return hunk->patch->reverse ? hunk->original_start : hunk->modified_start;
272 }
273
274 svn_linenum_t
svn_diff_hunk_get_modified_length(const svn_diff_hunk_t * hunk)275 svn_diff_hunk_get_modified_length(const svn_diff_hunk_t *hunk)
276 {
277 return hunk->patch->reverse ? hunk->original_length : hunk->modified_length;
278 }
279
280 svn_linenum_t
svn_diff_hunk_get_leading_context(const svn_diff_hunk_t * hunk)281 svn_diff_hunk_get_leading_context(const svn_diff_hunk_t *hunk)
282 {
283 return hunk->leading_context;
284 }
285
286 svn_linenum_t
svn_diff_hunk_get_trailing_context(const svn_diff_hunk_t * hunk)287 svn_diff_hunk_get_trailing_context(const svn_diff_hunk_t *hunk)
288 {
289 return hunk->trailing_context;
290 }
291
292 svn_linenum_t
svn_diff_hunk__get_fuzz_penalty(const svn_diff_hunk_t * hunk)293 svn_diff_hunk__get_fuzz_penalty(const svn_diff_hunk_t *hunk)
294 {
295 return hunk->patch->reverse ? hunk->original_fuzz : hunk->modified_fuzz;
296 }
297
298 /* Baton for the base85 stream implementation */
299 struct base85_baton_t
300 {
301 apr_file_t *file;
302 apr_pool_t *iterpool;
303 char buffer[52]; /* Bytes on current line */
304 apr_off_t next_pos; /* Start position of next line */
305 apr_off_t end_pos; /* Position after last line */
306 apr_size_t buf_size; /* Bytes available (52 unless at eof) */
307 apr_size_t buf_pos; /* Bytes in linebuffer */
308 svn_boolean_t done; /* At eof? */
309 };
310
311 /* Implements svn_read_fn_t for the base85 read stream */
312 static svn_error_t *
read_handler_base85(void * baton,char * buffer,apr_size_t * len)313 read_handler_base85(void *baton, char *buffer, apr_size_t *len)
314 {
315 struct base85_baton_t *b85b = baton;
316 apr_pool_t *iterpool = b85b->iterpool;
317 apr_size_t remaining = *len;
318 char *dest = buffer;
319
320 svn_pool_clear(iterpool);
321
322 if (b85b->done)
323 {
324 *len = 0;
325 return SVN_NO_ERROR;
326 }
327
328 while (remaining && (b85b->buf_size > b85b->buf_pos
329 || b85b->next_pos < b85b->end_pos))
330 {
331 svn_stringbuf_t *line;
332 svn_boolean_t at_eof;
333
334 apr_size_t available = b85b->buf_size - b85b->buf_pos;
335 if (available)
336 {
337 apr_size_t n = (remaining < available) ? remaining : available;
338
339 memcpy(dest, b85b->buffer + b85b->buf_pos, n);
340 dest += n;
341 remaining -= n;
342 b85b->buf_pos += n;
343
344 if (!remaining)
345 return SVN_NO_ERROR; /* *len = OK */
346 }
347
348 if (b85b->next_pos >= b85b->end_pos)
349 break; /* At EOF */
350 SVN_ERR(svn_io_file_seek(b85b->file, APR_SET, &b85b->next_pos,
351 iterpool));
352 SVN_ERR(svn_io_file_readline(b85b->file, &line, NULL, &at_eof,
353 APR_SIZE_MAX, iterpool, iterpool));
354 if (at_eof)
355 b85b->next_pos = b85b->end_pos;
356 else
357 {
358 SVN_ERR(svn_io_file_get_offset(&b85b->next_pos, b85b->file,
359 iterpool));
360 }
361
362 if (line->len && line->data[0] >= 'A' && line->data[0] <= 'Z')
363 b85b->buf_size = line->data[0] - 'A' + 1;
364 else if (line->len && line->data[0] >= 'a' && line->data[0] <= 'z')
365 b85b->buf_size = line->data[0] - 'a' + 26 + 1;
366 else
367 return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL,
368 _("Unexpected data in base85 section"));
369
370 if (b85b->buf_size < 52)
371 b85b->next_pos = b85b->end_pos; /* Handle as EOF */
372
373 SVN_ERR(svn_diff__base85_decode_line(b85b->buffer, b85b->buf_size,
374 line->data + 1, line->len - 1,
375 iterpool));
376 b85b->buf_pos = 0;
377 }
378
379 *len -= remaining;
380 b85b->done = TRUE;
381
382 return SVN_NO_ERROR;
383 }
384
385 /* Implements svn_close_fn_t for the base85 read stream */
386 static svn_error_t *
close_handler_base85(void * baton)387 close_handler_base85(void *baton)
388 {
389 struct base85_baton_t *b85b = baton;
390
391 svn_pool_destroy(b85b->iterpool);
392
393 return SVN_NO_ERROR;
394 }
395
396 /* Gets a stream that reads decoded base85 data from a segment of a file.
397 The current implementation might assume that both start_pos and end_pos
398 are located at line boundaries. */
399 static svn_stream_t *
get_base85_data_stream(apr_file_t * file,apr_off_t start_pos,apr_off_t end_pos,apr_pool_t * result_pool)400 get_base85_data_stream(apr_file_t *file,
401 apr_off_t start_pos,
402 apr_off_t end_pos,
403 apr_pool_t *result_pool)
404 {
405 struct base85_baton_t *b85b = apr_pcalloc(result_pool, sizeof(*b85b));
406 svn_stream_t *base85s = svn_stream_create(b85b, result_pool);
407
408 b85b->file = file;
409 b85b->iterpool = svn_pool_create(result_pool);
410 b85b->next_pos = start_pos;
411 b85b->end_pos = end_pos;
412
413 svn_stream_set_read2(base85s, NULL /* only full read support */,
414 read_handler_base85);
415 svn_stream_set_close(base85s, close_handler_base85);
416 return base85s;
417 }
418
419 /* Baton for the length verification stream functions */
420 struct length_verify_baton_t
421 {
422 svn_stream_t *inner;
423 svn_filesize_t remaining;
424 };
425
426 /* Implements svn_read_fn_t for the length verification stream */
427 static svn_error_t *
read_handler_length_verify(void * baton,char * buffer,apr_size_t * len)428 read_handler_length_verify(void *baton, char *buffer, apr_size_t *len)
429 {
430 struct length_verify_baton_t *lvb = baton;
431 apr_size_t requested_len = *len;
432
433 SVN_ERR(svn_stream_read_full(lvb->inner, buffer, len));
434
435 if (*len > lvb->remaining)
436 return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL,
437 _("Base85 data expands to longer than declared "
438 "filesize"));
439 else if (requested_len > *len && *len != lvb->remaining)
440 return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL,
441 _("Base85 data expands to smaller than declared "
442 "filesize"));
443
444 lvb->remaining -= *len;
445
446 return SVN_NO_ERROR;
447 }
448
449 /* Implements svn_close_fn_t for the length verification stream */
450 static svn_error_t *
close_handler_length_verify(void * baton)451 close_handler_length_verify(void *baton)
452 {
453 struct length_verify_baton_t *lvb = baton;
454
455 return svn_error_trace(svn_stream_close(lvb->inner));
456 }
457
458 /* Gets a stream that verifies on reads that the inner stream is exactly
459 of the specified length */
460 static svn_stream_t *
get_verify_length_stream(svn_stream_t * inner,svn_filesize_t expected_size,apr_pool_t * result_pool)461 get_verify_length_stream(svn_stream_t *inner,
462 svn_filesize_t expected_size,
463 apr_pool_t *result_pool)
464 {
465 struct length_verify_baton_t *lvb = apr_palloc(result_pool, sizeof(*lvb));
466 svn_stream_t *len_stream = svn_stream_create(lvb, result_pool);
467
468 lvb->inner = inner;
469 lvb->remaining = expected_size;
470
471 svn_stream_set_read2(len_stream, NULL /* only full read support */,
472 read_handler_length_verify);
473 svn_stream_set_close(len_stream, close_handler_length_verify);
474
475 return len_stream;
476 }
477
478 svn_stream_t *
svn_diff_get_binary_diff_original_stream(const svn_diff_binary_patch_t * bpatch,apr_pool_t * result_pool)479 svn_diff_get_binary_diff_original_stream(const svn_diff_binary_patch_t *bpatch,
480 apr_pool_t *result_pool)
481 {
482 svn_stream_t *s = get_base85_data_stream(bpatch->apr_file, bpatch->src_start,
483 bpatch->src_end, result_pool);
484
485 s = svn_stream_compressed(s, result_pool);
486
487 /* ### If we (ever) want to support the DELTA format, then we should hook the
488 undelta handling here */
489
490 return get_verify_length_stream(s, bpatch->src_filesize, result_pool);
491 }
492
493 svn_stream_t *
svn_diff_get_binary_diff_result_stream(const svn_diff_binary_patch_t * bpatch,apr_pool_t * result_pool)494 svn_diff_get_binary_diff_result_stream(const svn_diff_binary_patch_t *bpatch,
495 apr_pool_t *result_pool)
496 {
497 svn_stream_t *s = get_base85_data_stream(bpatch->apr_file, bpatch->dst_start,
498 bpatch->dst_end, result_pool);
499
500 s = svn_stream_compressed(s, result_pool);
501
502 /* ### If we (ever) want to support the DELTA format, then we should hook the
503 undelta handling here */
504
505 return get_verify_length_stream(s, bpatch->dst_filesize, result_pool);
506 }
507
508 /* Try to parse a positive number from a decimal number encoded
509 * in the string NUMBER. Return parsed number in OFFSET, and return
510 * TRUE if parsing was successful. */
511 static svn_boolean_t
parse_offset(svn_linenum_t * offset,const char * number)512 parse_offset(svn_linenum_t *offset, const char *number)
513 {
514 svn_error_t *err;
515 apr_uint64_t val;
516
517 err = svn_cstring_strtoui64(&val, number, 0, SVN_LINENUM_MAX_VALUE, 10);
518 if (err)
519 {
520 svn_error_clear(err);
521 return FALSE;
522 }
523
524 *offset = (svn_linenum_t)val;
525
526 return TRUE;
527 }
528
529 /* Try to parse a hunk range specification from the string RANGE.
530 * Return parsed information in *START and *LENGTH, and return TRUE
531 * if the range parsed correctly. Note: This function may modify the
532 * input value RANGE. */
533 static svn_boolean_t
parse_range(svn_linenum_t * start,svn_linenum_t * length,char * range)534 parse_range(svn_linenum_t *start, svn_linenum_t *length, char *range)
535 {
536 char *comma;
537
538 if (*range == 0)
539 return FALSE;
540
541 comma = strstr(range, ",");
542 if (comma)
543 {
544 if (strlen(comma + 1) > 0)
545 {
546 /* Try to parse the length. */
547 if (! parse_offset(length, comma + 1))
548 return FALSE;
549
550 /* Snip off the end of the string,
551 * so we can comfortably parse the line
552 * number the hunk starts at. */
553 *comma = '\0';
554 }
555 else
556 /* A comma but no length? */
557 return FALSE;
558 }
559 else
560 {
561 *length = 1;
562 }
563
564 /* Try to parse the line number the hunk starts at. */
565 return parse_offset(start, range);
566 }
567
568 /* Try to parse a hunk header in string HEADER, putting parsed information
569 * into HUNK. Return TRUE if the header parsed correctly. ATAT is the
570 * character string used to delimit the hunk header.
571 * Do all allocations in POOL. */
572 static svn_boolean_t
parse_hunk_header(const char * header,svn_diff_hunk_t * hunk,const char * atat,apr_pool_t * pool)573 parse_hunk_header(const char *header, svn_diff_hunk_t *hunk,
574 const char *atat, apr_pool_t *pool)
575 {
576 const char *p;
577 const char *start;
578 svn_stringbuf_t *range;
579
580 p = header + strlen(atat);
581 if (*p != ' ')
582 /* No. */
583 return FALSE;
584 p++;
585 if (*p != '-')
586 /* Nah... */
587 return FALSE;
588 /* OK, this may be worth allocating some memory for... */
589 range = svn_stringbuf_create_ensure(31, pool);
590 start = ++p;
591 while (*p && *p != ' ')
592 {
593 p++;
594 }
595
596 if (*p != ' ')
597 /* No no no... */
598 return FALSE;
599
600 svn_stringbuf_appendbytes(range, start, p - start);
601
602 /* Try to parse the first range. */
603 if (! parse_range(&hunk->original_start, &hunk->original_length, range->data))
604 return FALSE;
605
606 /* Clear the stringbuf so we can reuse it for the second range. */
607 svn_stringbuf_setempty(range);
608 p++;
609 if (*p != '+')
610 /* Eeek! */
611 return FALSE;
612 /* OK, this may be worth copying... */
613 start = ++p;
614 while (*p && *p != ' ')
615 {
616 p++;
617 }
618 if (*p != ' ')
619 /* No no no... */
620 return FALSE;
621
622 svn_stringbuf_appendbytes(range, start, p - start);
623
624 /* Check for trailing @@ */
625 p++;
626 if (! starts_with(p, atat))
627 return FALSE;
628
629 /* There may be stuff like C-function names after the trailing @@,
630 * but we ignore that. */
631
632 /* Try to parse the second range. */
633 if (! parse_range(&hunk->modified_start, &hunk->modified_length, range->data))
634 return FALSE;
635
636 /* Hunk header is good. */
637 return TRUE;
638 }
639
640 /* Read a line of original or modified hunk text from the specified
641 * RANGE within FILE. FILE is expected to contain unidiff text.
642 * Leading unidiff symbols ('+', '-', and ' ') are removed from the line,
643 * Any lines commencing with the VERBOTEN character are discarded.
644 * VERBOTEN should be '+' or '-', depending on which form of hunk text
645 * is being read. NO_FINAL_EOL declares if the hunk contains a no final
646 * EOL marker.
647 *
648 * All other parameters are as in svn_diff_hunk_readline_original_text()
649 * and svn_diff_hunk_readline_modified_text().
650 */
651 static svn_error_t *
hunk_readline_original_or_modified(apr_file_t * file,struct svn_diff__hunk_range * range,svn_stringbuf_t ** stringbuf,const char ** eol,svn_boolean_t * eof,char verboten,svn_boolean_t no_final_eol,apr_pool_t * result_pool,apr_pool_t * scratch_pool)652 hunk_readline_original_or_modified(apr_file_t *file,
653 struct svn_diff__hunk_range *range,
654 svn_stringbuf_t **stringbuf,
655 const char **eol,
656 svn_boolean_t *eof,
657 char verboten,
658 svn_boolean_t no_final_eol,
659 apr_pool_t *result_pool,
660 apr_pool_t *scratch_pool)
661 {
662 apr_size_t max_len;
663 svn_boolean_t filtered;
664 apr_off_t pos;
665 svn_stringbuf_t *str;
666 const char *eol_p;
667 apr_pool_t *last_pool;
668
669 if (!eol)
670 eol = &eol_p;
671
672 if (range->current >= range->end)
673 {
674 /* We're past the range. Indicate that no bytes can be read. */
675 *eof = TRUE;
676 *eol = NULL;
677 *stringbuf = svn_stringbuf_create_empty(result_pool);
678 return SVN_NO_ERROR;
679 }
680
681 SVN_ERR(svn_io_file_get_offset(&pos, file, scratch_pool));
682 SVN_ERR(svn_io_file_seek(file, APR_SET, &range->current, scratch_pool));
683
684 /* It's not ITERPOOL because we use data allocated in LAST_POOL out
685 of the loop. */
686 last_pool = svn_pool_create(scratch_pool);
687 do
688 {
689 svn_pool_clear(last_pool);
690
691 max_len = range->end - range->current;
692 SVN_ERR(svn_io_file_readline(file, &str, eol, eof, max_len,
693 last_pool, last_pool));
694 SVN_ERR(svn_io_file_get_offset(&range->current, file, last_pool));
695 filtered = (str->data[0] == verboten || str->data[0] == '\\');
696 }
697 while (filtered && ! *eof);
698
699 if (filtered)
700 {
701 /* EOF, return an empty string. */
702 *stringbuf = svn_stringbuf_create_ensure(0, result_pool);
703 *eol = NULL;
704 }
705 else if (str->data[0] == '+' || str->data[0] == '-' || str->data[0] == ' ')
706 {
707 /* Shave off leading unidiff symbols. */
708 *stringbuf = svn_stringbuf_create(str->data + 1, result_pool);
709 }
710 else
711 {
712 /* Return the line as-is. Handle as a chopped leading spaces */
713 *stringbuf = svn_stringbuf_dup(str, result_pool);
714 }
715
716 if (!filtered && *eof && !*eol && *str->data)
717 {
718 /* Ok, we miss a final EOL in the patch file, but didn't see a
719 no eol marker line.
720
721 We should report that we had an EOL or the patch code will
722 misbehave (and it knows nothing about no eol markers) */
723
724 if (!no_final_eol && eol != &eol_p)
725 {
726 apr_off_t start = 0;
727
728 SVN_ERR(svn_io_file_seek(file, APR_SET, &start, scratch_pool));
729
730 SVN_ERR(svn_io_file_readline(file, &str, eol, NULL, APR_SIZE_MAX,
731 scratch_pool, scratch_pool));
732
733 /* Every patch file that has hunks has at least one EOL*/
734 SVN_ERR_ASSERT(*eol != NULL);
735 }
736
737 *eof = FALSE;
738 /* Fall through to seek back to the right location */
739 }
740 SVN_ERR(svn_io_file_seek(file, APR_SET, &pos, scratch_pool));
741
742 svn_pool_destroy(last_pool);
743 return SVN_NO_ERROR;
744 }
745
746 svn_error_t *
svn_diff_hunk_readline_original_text(svn_diff_hunk_t * hunk,svn_stringbuf_t ** stringbuf,const char ** eol,svn_boolean_t * eof,apr_pool_t * result_pool,apr_pool_t * scratch_pool)747 svn_diff_hunk_readline_original_text(svn_diff_hunk_t *hunk,
748 svn_stringbuf_t **stringbuf,
749 const char **eol,
750 svn_boolean_t *eof,
751 apr_pool_t *result_pool,
752 apr_pool_t *scratch_pool)
753 {
754 return svn_error_trace(
755 hunk_readline_original_or_modified(hunk->apr_file,
756 hunk->patch->reverse ?
757 &hunk->modified_text_range :
758 &hunk->original_text_range,
759 stringbuf, eol, eof,
760 hunk->patch->reverse ? '-' : '+',
761 hunk->patch->reverse
762 ? hunk->modified_no_final_eol
763 : hunk->original_no_final_eol,
764 result_pool, scratch_pool));
765 }
766
767 svn_error_t *
svn_diff_hunk_readline_modified_text(svn_diff_hunk_t * hunk,svn_stringbuf_t ** stringbuf,const char ** eol,svn_boolean_t * eof,apr_pool_t * result_pool,apr_pool_t * scratch_pool)768 svn_diff_hunk_readline_modified_text(svn_diff_hunk_t *hunk,
769 svn_stringbuf_t **stringbuf,
770 const char **eol,
771 svn_boolean_t *eof,
772 apr_pool_t *result_pool,
773 apr_pool_t *scratch_pool)
774 {
775 return svn_error_trace(
776 hunk_readline_original_or_modified(hunk->apr_file,
777 hunk->patch->reverse ?
778 &hunk->original_text_range :
779 &hunk->modified_text_range,
780 stringbuf, eol, eof,
781 hunk->patch->reverse ? '+' : '-',
782 hunk->patch->reverse
783 ? hunk->original_no_final_eol
784 : hunk->modified_no_final_eol,
785 result_pool, scratch_pool));
786 }
787
788 svn_error_t *
svn_diff_hunk_readline_diff_text(svn_diff_hunk_t * hunk,svn_stringbuf_t ** stringbuf,const char ** eol,svn_boolean_t * eof,apr_pool_t * result_pool,apr_pool_t * scratch_pool)789 svn_diff_hunk_readline_diff_text(svn_diff_hunk_t *hunk,
790 svn_stringbuf_t **stringbuf,
791 const char **eol,
792 svn_boolean_t *eof,
793 apr_pool_t *result_pool,
794 apr_pool_t *scratch_pool)
795 {
796 svn_stringbuf_t *line;
797 apr_size_t max_len;
798 apr_off_t pos;
799 const char *eol_p;
800
801 if (!eol)
802 eol = &eol_p;
803
804 if (hunk->diff_text_range.current >= hunk->diff_text_range.end)
805 {
806 /* We're past the range. Indicate that no bytes can be read. */
807 *eof = TRUE;
808 *eol = NULL;
809 *stringbuf = svn_stringbuf_create_empty(result_pool);
810 return SVN_NO_ERROR;
811 }
812
813 SVN_ERR(svn_io_file_get_offset(&pos, hunk->apr_file, scratch_pool));
814 SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET,
815 &hunk->diff_text_range.current, scratch_pool));
816 max_len = hunk->diff_text_range.end - hunk->diff_text_range.current;
817 SVN_ERR(svn_io_file_readline(hunk->apr_file, &line, eol, eof, max_len,
818 result_pool,
819 scratch_pool));
820 SVN_ERR(svn_io_file_get_offset(&hunk->diff_text_range.current,
821 hunk->apr_file, scratch_pool));
822
823 if (*eof && !*eol && *line->data)
824 {
825 /* Ok, we miss a final EOL in the patch file, but didn't see a
826 no eol marker line.
827
828 We should report that we had an EOL or the patch code will
829 misbehave (and it knows nothing about no eol markers) */
830
831 if (eol != &eol_p)
832 {
833 /* Lets pick the first eol we find in our patch file */
834 apr_off_t start = 0;
835 svn_stringbuf_t *str;
836
837 SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET, &start,
838 scratch_pool));
839
840 SVN_ERR(svn_io_file_readline(hunk->apr_file, &str, eol, NULL,
841 APR_SIZE_MAX,
842 scratch_pool, scratch_pool));
843
844 /* Every patch file that has hunks has at least one EOL*/
845 SVN_ERR_ASSERT(*eol != NULL);
846 }
847
848 *eof = FALSE;
849
850 /* Fall through to seek back to the right location */
851 }
852
853 SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET, &pos, scratch_pool));
854
855 if (hunk->patch->reverse)
856 {
857 if (line->data[0] == '+')
858 line->data[0] = '-';
859 else if (line->data[0] == '-')
860 line->data[0] = '+';
861 }
862
863 *stringbuf = line;
864
865 return SVN_NO_ERROR;
866 }
867
868 /* Parse *PROP_NAME from HEADER as the part after the INDICATOR line.
869 * Allocate *PROP_NAME in RESULT_POOL.
870 * Set *PROP_NAME to NULL if no valid property name was found. */
871 static svn_error_t *
parse_prop_name(const char ** prop_name,const char * header,const char * indicator,apr_pool_t * result_pool)872 parse_prop_name(const char **prop_name, const char *header,
873 const char *indicator, apr_pool_t *result_pool)
874 {
875 SVN_ERR(svn_utf_cstring_to_utf8(prop_name,
876 header + strlen(indicator),
877 result_pool));
878 if (**prop_name == '\0')
879 *prop_name = NULL;
880 else if (! svn_prop_name_is_valid(*prop_name))
881 {
882 svn_stringbuf_t *buf = svn_stringbuf_create(*prop_name, result_pool);
883 svn_stringbuf_strip_whitespace(buf);
884 *prop_name = (svn_prop_name_is_valid(buf->data) ? buf->data : NULL);
885 }
886
887 return SVN_NO_ERROR;
888 }
889
890
891 /* A helper function to parse svn:mergeinfo diffs.
892 *
893 * These diffs use a special pretty-print format, for instance:
894 *
895 * Added: svn:mergeinfo
896 * ## -0,0 +0,1 ##
897 * Merged /trunk:r2-3
898 *
899 * The hunk header has the following format:
900 * ## -0,NUMBER_OF_REVERSE_MERGES +0,NUMBER_OF_FORWARD_MERGES ##
901 *
902 * At this point, the number of reverse merges has already been
903 * parsed into HUNK->ORIGINAL_LENGTH, and the number of forward
904 * merges has been parsed into HUNK->MODIFIED_LENGTH.
905 *
906 * The header is followed by a list of mergeinfo, one path per line.
907 * This function parses such lines. Lines describing reverse merges
908 * appear first, and then all lines describing forward merges appear.
909 *
910 * Parts of the line are affected by i18n. The words 'Merged'
911 * and 'Reverse-merged' can appear in any language and at any
912 * position within the line. We can only assume that a leading
913 * '/' starts the merge source path, the path is followed by
914 * ":r", which in turn is followed by a mergeinfo revision range,
915 * which is terminated by whitespace or end-of-string.
916 *
917 * If the current line meets the above criteria and we're able
918 * to parse valid mergeinfo from it, the resulting mergeinfo
919 * is added to patch->mergeinfo or patch->reverse_mergeinfo,
920 * and we proceed to the next line.
921 */
922 static svn_error_t *
parse_mergeinfo(svn_boolean_t * found_mergeinfo,svn_stringbuf_t * line,svn_diff_hunk_t * hunk,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)923 parse_mergeinfo(svn_boolean_t *found_mergeinfo,
924 svn_stringbuf_t *line,
925 svn_diff_hunk_t *hunk,
926 svn_patch_t *patch,
927 apr_pool_t *result_pool,
928 apr_pool_t *scratch_pool)
929 {
930 char *slash = strchr(line->data, '/');
931 char *colon = strrchr(line->data, ':');
932
933 *found_mergeinfo = FALSE;
934
935 if (slash && colon && colon[1] == 'r' && slash < colon)
936 {
937 svn_stringbuf_t *input;
938 svn_mergeinfo_t mergeinfo = NULL;
939 char *s;
940 svn_error_t *err;
941
942 input = svn_stringbuf_create_ensure(line->len, scratch_pool);
943
944 /* Copy the merge source path + colon */
945 s = slash;
946 while (s <= colon)
947 {
948 svn_stringbuf_appendbyte(input, *s);
949 s++;
950 }
951
952 /* skip 'r' after colon */
953 s++;
954
955 /* Copy the revision range. */
956 while (s < line->data + line->len)
957 {
958 if (svn_ctype_isspace(*s))
959 break;
960 svn_stringbuf_appendbyte(input, *s);
961 s++;
962 }
963
964 err = svn_mergeinfo_parse(&mergeinfo, input->data, result_pool);
965 if (err && err->apr_err == SVN_ERR_MERGEINFO_PARSE_ERROR)
966 {
967 svn_error_clear(err);
968 mergeinfo = NULL;
969 }
970 else
971 SVN_ERR(err);
972
973 if (mergeinfo)
974 {
975 if (hunk->original_length > 0) /* reverse merges */
976 {
977 if (patch->reverse)
978 {
979 if (patch->mergeinfo == NULL)
980 patch->mergeinfo = mergeinfo;
981 else
982 SVN_ERR(svn_mergeinfo_merge2(patch->mergeinfo,
983 mergeinfo,
984 result_pool,
985 scratch_pool));
986 }
987 else
988 {
989 if (patch->reverse_mergeinfo == NULL)
990 patch->reverse_mergeinfo = mergeinfo;
991 else
992 SVN_ERR(svn_mergeinfo_merge2(patch->reverse_mergeinfo,
993 mergeinfo,
994 result_pool,
995 scratch_pool));
996 }
997 hunk->original_length--;
998 }
999 else if (hunk->modified_length > 0) /* forward merges */
1000 {
1001 if (patch->reverse)
1002 {
1003 if (patch->reverse_mergeinfo == NULL)
1004 patch->reverse_mergeinfo = mergeinfo;
1005 else
1006 SVN_ERR(svn_mergeinfo_merge2(patch->reverse_mergeinfo,
1007 mergeinfo,
1008 result_pool,
1009 scratch_pool));
1010 }
1011 else
1012 {
1013 if (patch->mergeinfo == NULL)
1014 patch->mergeinfo = mergeinfo;
1015 else
1016 SVN_ERR(svn_mergeinfo_merge2(patch->mergeinfo,
1017 mergeinfo,
1018 result_pool,
1019 scratch_pool));
1020 }
1021 hunk->modified_length--;
1022 }
1023
1024 *found_mergeinfo = TRUE;
1025 }
1026 }
1027
1028 return SVN_NO_ERROR;
1029 }
1030
1031 /* Return the next *HUNK from a PATCH in APR_FILE.
1032 * If no hunk can be found, set *HUNK to NULL.
1033 * Set IS_PROPERTY to TRUE if we have a property hunk. If the returned HUNK
1034 * is the first belonging to a certain property, then PROP_NAME and
1035 * PROP_OPERATION will be set too. If we have a text hunk, PROP_NAME will be
1036 * NULL. If IGNORE_WHITESPACE is TRUE, lines without leading spaces will be
1037 * treated as context lines. Allocate results in RESULT_POOL.
1038 * Use SCRATCH_POOL for all other allocations. */
1039 static svn_error_t *
parse_next_hunk(svn_diff_hunk_t ** hunk,svn_boolean_t * is_property,const char ** prop_name,svn_diff_operation_kind_t * prop_operation,svn_patch_t * patch,apr_file_t * apr_file,svn_boolean_t ignore_whitespace,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1040 parse_next_hunk(svn_diff_hunk_t **hunk,
1041 svn_boolean_t *is_property,
1042 const char **prop_name,
1043 svn_diff_operation_kind_t *prop_operation,
1044 svn_patch_t *patch,
1045 apr_file_t *apr_file,
1046 svn_boolean_t ignore_whitespace,
1047 apr_pool_t *result_pool,
1048 apr_pool_t *scratch_pool)
1049 {
1050 static const char * const minus = "--- ";
1051 static const char * const text_atat = "@@";
1052 static const char * const prop_atat = "##";
1053 svn_stringbuf_t *line;
1054 svn_boolean_t eof, in_hunk, hunk_seen;
1055 apr_off_t pos, last_line;
1056 apr_off_t start, end;
1057 apr_off_t original_end;
1058 apr_off_t modified_end;
1059 svn_boolean_t original_no_final_eol = FALSE;
1060 svn_boolean_t modified_no_final_eol = FALSE;
1061 svn_linenum_t original_lines;
1062 svn_linenum_t modified_lines;
1063 svn_linenum_t leading_context;
1064 svn_linenum_t trailing_context;
1065 svn_boolean_t changed_line_seen;
1066 enum {
1067 noise_line,
1068 original_line,
1069 modified_line,
1070 context_line
1071 } last_line_type;
1072 apr_pool_t *iterpool;
1073
1074 *prop_operation = svn_diff_op_unchanged;
1075
1076 /* We only set this if we have a property hunk header. */
1077 *prop_name = NULL;
1078 *is_property = FALSE;
1079
1080 if (apr_file_eof(apr_file) == APR_EOF)
1081 {
1082 /* No more hunks here. */
1083 *hunk = NULL;
1084 return SVN_NO_ERROR;
1085 }
1086
1087 in_hunk = FALSE;
1088 hunk_seen = FALSE;
1089 leading_context = 0;
1090 trailing_context = 0;
1091 changed_line_seen = FALSE;
1092 original_end = 0;
1093 modified_end = 0;
1094 *hunk = apr_pcalloc(result_pool, sizeof(**hunk));
1095
1096 /* Get current seek position. */
1097 SVN_ERR(svn_io_file_get_offset(&pos, apr_file, scratch_pool));
1098
1099 /* Start out assuming noise. */
1100 last_line_type = noise_line;
1101
1102 iterpool = svn_pool_create(scratch_pool);
1103 do
1104 {
1105
1106 svn_pool_clear(iterpool);
1107
1108 /* Remember the current line's offset, and read the line. */
1109 last_line = pos;
1110 SVN_ERR(svn_io_file_readline(apr_file, &line, NULL, &eof, APR_SIZE_MAX,
1111 iterpool, iterpool));
1112
1113 /* Update line offset for next iteration. */
1114 SVN_ERR(svn_io_file_get_offset(&pos, apr_file, iterpool));
1115
1116 /* Lines starting with a backslash indicate a missing EOL:
1117 * "\ No newline at end of file" or "end of property". */
1118 if (line->data[0] == '\\')
1119 {
1120 if (in_hunk)
1121 {
1122 char eolbuf[2];
1123 apr_size_t len;
1124 apr_off_t off;
1125 apr_off_t hunk_text_end;
1126
1127 /* Comment terminates the hunk text and says the hunk text
1128 * has no trailing EOL. Snip off trailing EOL which is part
1129 * of the patch file but not part of the hunk text. */
1130 off = last_line - 2;
1131 SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &off, iterpool));
1132 len = sizeof(eolbuf);
1133 SVN_ERR(svn_io_file_read_full2(apr_file, eolbuf, len, &len,
1134 &eof, iterpool));
1135 if (eolbuf[0] == '\r' && eolbuf[1] == '\n')
1136 hunk_text_end = last_line - 2;
1137 else if (eolbuf[1] == '\n' || eolbuf[1] == '\r')
1138 hunk_text_end = last_line - 1;
1139 else
1140 hunk_text_end = last_line;
1141
1142 if (last_line_type == original_line && original_end == 0)
1143 original_end = hunk_text_end;
1144 else if (last_line_type == modified_line && modified_end == 0)
1145 modified_end = hunk_text_end;
1146 else if (last_line_type == context_line)
1147 {
1148 if (original_end == 0)
1149 original_end = hunk_text_end;
1150 if (modified_end == 0)
1151 modified_end = hunk_text_end;
1152 }
1153
1154 SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &pos, iterpool));
1155 /* Set for the type and context by using != the other type */
1156 if (last_line_type != modified_line)
1157 original_no_final_eol = TRUE;
1158 if (last_line_type != original_line)
1159 modified_no_final_eol = TRUE;
1160 }
1161
1162 continue;
1163 }
1164
1165 if (in_hunk && *is_property && *prop_name &&
1166 strcmp(*prop_name, SVN_PROP_MERGEINFO) == 0)
1167 {
1168 svn_boolean_t found_mergeinfo;
1169
1170 SVN_ERR(parse_mergeinfo(&found_mergeinfo, line, *hunk, patch,
1171 result_pool, iterpool));
1172 if (found_mergeinfo)
1173 continue; /* Proceed to the next line in the svn:mergeinfo hunk. */
1174 else
1175 {
1176 /* Perhaps we can also use original_lines/modified_lines here */
1177
1178 in_hunk = FALSE; /* On to next property */
1179 }
1180 }
1181
1182 if (in_hunk)
1183 {
1184 char c;
1185 static const char add = '+';
1186 static const char del = '-';
1187
1188 if (! hunk_seen)
1189 {
1190 /* We're reading the first line of the hunk, so the start
1191 * of the line just read is the hunk text's byte offset. */
1192 start = last_line;
1193 }
1194
1195 c = line->data[0];
1196 if (c == ' '
1197 || ((original_lines > 0 && modified_lines > 0)
1198 && (
1199 /* Tolerate chopped leading spaces on empty lines. */
1200 (! eof && line->len == 0)
1201 /* Maybe tolerate chopped leading spaces on non-empty lines. */
1202 || (ignore_whitespace && c != del && c != add))))
1203 {
1204 /* It's a "context" line in the hunk. */
1205 hunk_seen = TRUE;
1206 if (original_lines > 0)
1207 original_lines--;
1208 else
1209 {
1210 (*hunk)->original_length++;
1211 (*hunk)->original_fuzz++;
1212 }
1213 if (modified_lines > 0)
1214 modified_lines--;
1215 else
1216 {
1217 (*hunk)->modified_length++;
1218 (*hunk)->modified_fuzz++;
1219 }
1220 if (changed_line_seen)
1221 trailing_context++;
1222 else
1223 leading_context++;
1224 last_line_type = context_line;
1225 }
1226 else if (c == del
1227 && (original_lines > 0 || line->data[1] != del))
1228 {
1229 /* It's a "deleted" line in the hunk. */
1230 hunk_seen = TRUE;
1231 changed_line_seen = TRUE;
1232
1233 /* A hunk may have context in the middle. We only want
1234 trailing lines of context. */
1235 if (trailing_context > 0)
1236 trailing_context = 0;
1237
1238 if (original_lines > 0)
1239 original_lines--;
1240 else
1241 {
1242 (*hunk)->original_length++;
1243 (*hunk)->original_fuzz++;
1244 }
1245 last_line_type = original_line;
1246 }
1247 else if (c == add
1248 && (modified_lines > 0 || line->data[1] != add))
1249 {
1250 /* It's an "added" line in the hunk. */
1251 hunk_seen = TRUE;
1252 changed_line_seen = TRUE;
1253
1254 /* A hunk may have context in the middle. We only want
1255 trailing lines of context. */
1256 if (trailing_context > 0)
1257 trailing_context = 0;
1258
1259 if (modified_lines > 0)
1260 modified_lines--;
1261 else
1262 {
1263 (*hunk)->modified_length++;
1264 (*hunk)->modified_fuzz++;
1265 }
1266 last_line_type = modified_line;
1267 }
1268 else
1269 {
1270 if (eof)
1271 {
1272 /* The hunk ends at EOF. */
1273 end = pos;
1274 }
1275 else
1276 {
1277 /* The start of the current line marks the first byte
1278 * after the hunk text. */
1279 end = last_line;
1280 }
1281 if (original_end == 0)
1282 original_end = end;
1283 if (modified_end == 0)
1284 modified_end = end;
1285 break; /* Hunk was empty or has been read. */
1286 }
1287 }
1288 else
1289 {
1290 if (starts_with(line->data, text_atat))
1291 {
1292 /* Looks like we have a hunk header, try to rip it apart. */
1293 in_hunk = parse_hunk_header(line->data, *hunk, text_atat,
1294 iterpool);
1295 if (in_hunk)
1296 {
1297 original_lines = (*hunk)->original_length;
1298 modified_lines = (*hunk)->modified_length;
1299 *is_property = FALSE;
1300 }
1301 }
1302 else if (starts_with(line->data, prop_atat))
1303 {
1304 /* Looks like we have a property hunk header, try to rip it
1305 * apart. */
1306 in_hunk = parse_hunk_header(line->data, *hunk, prop_atat,
1307 iterpool);
1308 if (in_hunk)
1309 {
1310 original_lines = (*hunk)->original_length;
1311 modified_lines = (*hunk)->modified_length;
1312 *is_property = TRUE;
1313 }
1314 }
1315 else if (starts_with(line->data, "Added: "))
1316 {
1317 SVN_ERR(parse_prop_name(prop_name, line->data, "Added: ",
1318 result_pool));
1319 if (*prop_name)
1320 *prop_operation = (patch->reverse ? svn_diff_op_deleted
1321 : svn_diff_op_added);
1322 }
1323 else if (starts_with(line->data, "Deleted: "))
1324 {
1325 SVN_ERR(parse_prop_name(prop_name, line->data, "Deleted: ",
1326 result_pool));
1327 if (*prop_name)
1328 *prop_operation = (patch->reverse ? svn_diff_op_added
1329 : svn_diff_op_deleted);
1330 }
1331 else if (starts_with(line->data, "Modified: "))
1332 {
1333 SVN_ERR(parse_prop_name(prop_name, line->data, "Modified: ",
1334 result_pool));
1335 if (*prop_name)
1336 *prop_operation = svn_diff_op_modified;
1337 }
1338 else if (starts_with(line->data, minus)
1339 || starts_with(line->data, "diff --git "))
1340 /* This could be a header of another patch. Bail out. */
1341 break;
1342 }
1343 }
1344 /* Check for the line length since a file may not have a newline at the
1345 * end and we depend upon the last line to be an empty one. */
1346 while (! eof || line->len > 0);
1347 svn_pool_destroy(iterpool);
1348
1349 if (! eof)
1350 /* Rewind to the start of the line just read, so subsequent calls
1351 * to this function or svn_diff_parse_next_patch() don't end
1352 * up skipping the line -- it may contain a patch or hunk header. */
1353 SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &last_line, scratch_pool));
1354
1355 if (hunk_seen && start < end)
1356 {
1357 /* Did we get the number of context lines announced in the header?
1358
1359 If not... let's limit the number from the header to what we
1360 actually have, and apply a fuzz penalty */
1361 if (original_lines)
1362 {
1363 (*hunk)->original_length -= original_lines;
1364 (*hunk)->original_fuzz += original_lines;
1365 }
1366 if (modified_lines)
1367 {
1368 (*hunk)->modified_length -= modified_lines;
1369 (*hunk)->modified_fuzz += modified_lines;
1370 }
1371
1372 (*hunk)->patch = patch;
1373 (*hunk)->apr_file = apr_file;
1374 (*hunk)->leading_context = leading_context;
1375 (*hunk)->trailing_context = trailing_context;
1376 (*hunk)->diff_text_range.start = start;
1377 (*hunk)->diff_text_range.current = start;
1378 (*hunk)->diff_text_range.end = end;
1379 (*hunk)->original_text_range.start = start;
1380 (*hunk)->original_text_range.current = start;
1381 (*hunk)->original_text_range.end = original_end;
1382 (*hunk)->modified_text_range.start = start;
1383 (*hunk)->modified_text_range.current = start;
1384 (*hunk)->modified_text_range.end = modified_end;
1385 (*hunk)->original_no_final_eol = original_no_final_eol;
1386 (*hunk)->modified_no_final_eol = modified_no_final_eol;
1387 }
1388 else
1389 /* Something went wrong, just discard the result. */
1390 *hunk = NULL;
1391
1392 return SVN_NO_ERROR;
1393 }
1394
1395 /* Compare function for sorting hunks after parsing.
1396 * We sort hunks by their original line offset. */
1397 static int
compare_hunks(const void * a,const void * b)1398 compare_hunks(const void *a, const void *b)
1399 {
1400 const svn_diff_hunk_t *ha = *((const svn_diff_hunk_t *const *)a);
1401 const svn_diff_hunk_t *hb = *((const svn_diff_hunk_t *const *)b);
1402
1403 if (ha->original_start < hb->original_start)
1404 return -1;
1405 if (ha->original_start > hb->original_start)
1406 return 1;
1407 return 0;
1408 }
1409
1410 /* Possible states of the diff header parser. */
1411 enum parse_state
1412 {
1413 state_start, /* initial */
1414 state_git_diff_seen, /* diff --git */
1415 state_git_tree_seen, /* a tree operation, rather than content change */
1416 state_git_minus_seen, /* --- /dev/null; or --- a/ */
1417 state_git_plus_seen, /* +++ /dev/null; or +++ a/ */
1418 state_old_mode_seen, /* old mode 100644 */
1419 state_git_mode_seen, /* new mode 100644 */
1420 state_move_from_seen, /* rename from foo.c */
1421 state_copy_from_seen, /* copy from foo.c */
1422 state_minus_seen, /* --- foo.c */
1423 state_unidiff_found, /* valid start of a regular unidiff header */
1424 state_git_header_found, /* valid start of a --git diff header */
1425 state_binary_patch_found /* valid start of binary patch */
1426 };
1427
1428 /* Data type describing a valid state transition of the parser. */
1429 struct transition
1430 {
1431 const char *expected_input;
1432 enum parse_state required_state;
1433
1434 /* A callback called upon each parser state transition. */
1435 svn_error_t *(*fn)(enum parse_state *new_state, char *input,
1436 svn_patch_t *patch, apr_pool_t *result_pool,
1437 apr_pool_t *scratch_pool);
1438 };
1439
1440 /* UTF-8 encode and canonicalize the content of LINE as FILE_NAME. */
1441 static svn_error_t *
grab_filename(const char ** file_name,const char * line,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1442 grab_filename(const char **file_name, const char *line, apr_pool_t *result_pool,
1443 apr_pool_t *scratch_pool)
1444 {
1445 const char *utf8_path;
1446 const char *canon_path;
1447
1448 /* Grab the filename and encode it in UTF-8. */
1449 /* TODO: Allow specifying the patch file's encoding.
1450 * For now, we assume its encoding is native. */
1451 /* ### This can fail if the filename cannot be represented in the current
1452 * ### locale's encoding. */
1453 SVN_ERR(svn_utf_cstring_to_utf8(&utf8_path,
1454 line,
1455 scratch_pool));
1456
1457 /* Canonicalize the path name. */
1458 canon_path = svn_dirent_canonicalize(utf8_path, scratch_pool);
1459
1460 *file_name = apr_pstrdup(result_pool, canon_path);
1461
1462 return SVN_NO_ERROR;
1463 }
1464
1465 /* Parse the '--- ' line of a regular unidiff. */
1466 static svn_error_t *
diff_minus(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1467 diff_minus(enum parse_state *new_state, char *line, svn_patch_t *patch,
1468 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1469 {
1470 /* If we can find a tab, it separates the filename from
1471 * the rest of the line which we can discard. */
1472 char *tab = strchr(line, '\t');
1473 if (tab)
1474 *tab = '\0';
1475
1476 SVN_ERR(grab_filename(&patch->old_filename, line + STRLEN_LITERAL("--- "),
1477 result_pool, scratch_pool));
1478
1479 *new_state = state_minus_seen;
1480
1481 return SVN_NO_ERROR;
1482 }
1483
1484 /* Parse the '+++ ' line of a regular unidiff. */
1485 static svn_error_t *
diff_plus(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1486 diff_plus(enum parse_state *new_state, char *line, svn_patch_t *patch,
1487 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1488 {
1489 /* If we can find a tab, it separates the filename from
1490 * the rest of the line which we can discard. */
1491 char *tab = strchr(line, '\t');
1492 if (tab)
1493 *tab = '\0';
1494
1495 SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("+++ "),
1496 result_pool, scratch_pool));
1497
1498 *new_state = state_unidiff_found;
1499
1500 return SVN_NO_ERROR;
1501 }
1502
1503 /* Parse the first line of a git extended unidiff. */
1504 static svn_error_t *
git_start(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1505 git_start(enum parse_state *new_state, char *line, svn_patch_t *patch,
1506 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1507 {
1508 const char *old_path_start;
1509 char *old_path_end;
1510 const char *new_path_start;
1511 const char *new_path_end;
1512 char *new_path_marker;
1513 const char *old_path_marker;
1514
1515 /* ### Add handling of escaped paths
1516 * http://www.kernel.org/pub/software/scm/git/docs/git-diff.html:
1517 *
1518 * TAB, LF, double quote and backslash characters in pathnames are
1519 * represented as \t, \n, \" and \\, respectively. If there is need for
1520 * such substitution then the whole pathname is put in double quotes.
1521 */
1522
1523 /* Our line should look like this: 'diff --git a/path b/path'.
1524 *
1525 * If we find any deviations from that format, we return with state reset
1526 * to start.
1527 */
1528 old_path_marker = strstr(line, " a/");
1529
1530 if (! old_path_marker)
1531 {
1532 *new_state = state_start;
1533 return SVN_NO_ERROR;
1534 }
1535
1536 if (! *(old_path_marker + 3))
1537 {
1538 *new_state = state_start;
1539 return SVN_NO_ERROR;
1540 }
1541
1542 new_path_marker = strstr(old_path_marker, " b/");
1543
1544 if (! new_path_marker)
1545 {
1546 *new_state = state_start;
1547 return SVN_NO_ERROR;
1548 }
1549
1550 if (! *(new_path_marker + 3))
1551 {
1552 *new_state = state_start;
1553 return SVN_NO_ERROR;
1554 }
1555
1556 /* By now, we know that we have a line on the form '--git diff a/.+ b/.+'
1557 * We only need the filenames when we have deleted or added empty
1558 * files. In those cases the old_path and new_path is identical on the
1559 * 'diff --git' line. For all other cases we fetch the filenames from
1560 * other header lines. */
1561 old_path_start = line + STRLEN_LITERAL("diff --git a/");
1562 new_path_end = line + strlen(line);
1563 new_path_start = old_path_start;
1564
1565 while (TRUE)
1566 {
1567 ptrdiff_t len_old;
1568 ptrdiff_t len_new;
1569
1570 new_path_marker = strstr(new_path_start, " b/");
1571
1572 /* No new path marker, bail out. */
1573 if (! new_path_marker)
1574 break;
1575
1576 old_path_end = new_path_marker;
1577 new_path_start = new_path_marker + STRLEN_LITERAL(" b/");
1578
1579 /* No path after the marker. */
1580 if (! *new_path_start)
1581 break;
1582
1583 len_old = old_path_end - old_path_start;
1584 len_new = new_path_end - new_path_start;
1585
1586 /* Are the paths before and after the " b/" marker the same? */
1587 if (len_old == len_new
1588 && ! strncmp(old_path_start, new_path_start, len_old))
1589 {
1590 *old_path_end = '\0';
1591 SVN_ERR(grab_filename(&patch->old_filename, old_path_start,
1592 result_pool, scratch_pool));
1593
1594 SVN_ERR(grab_filename(&patch->new_filename, new_path_start,
1595 result_pool, scratch_pool));
1596 break;
1597 }
1598 }
1599
1600 /* We assume that the path is only modified until we've found a 'tree'
1601 * header */
1602 patch->operation = svn_diff_op_modified;
1603
1604 *new_state = state_git_diff_seen;
1605 return SVN_NO_ERROR;
1606 }
1607
1608 /* Parse the '--- ' line of a git extended unidiff. */
1609 static svn_error_t *
git_minus(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1610 git_minus(enum parse_state *new_state, char *line, svn_patch_t *patch,
1611 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1612 {
1613 /* If we can find a tab, it separates the filename from
1614 * the rest of the line which we can discard. */
1615 char *tab = strchr(line, '\t');
1616 if (tab)
1617 *tab = '\0';
1618
1619 if (starts_with(line, "--- /dev/null"))
1620 SVN_ERR(grab_filename(&patch->old_filename, "/dev/null",
1621 result_pool, scratch_pool));
1622 else
1623 SVN_ERR(grab_filename(&patch->old_filename, line + STRLEN_LITERAL("--- a/"),
1624 result_pool, scratch_pool));
1625
1626 *new_state = state_git_minus_seen;
1627 return SVN_NO_ERROR;
1628 }
1629
1630 /* Parse the '+++ ' line of a git extended unidiff. */
1631 static svn_error_t *
git_plus(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1632 git_plus(enum parse_state *new_state, char *line, svn_patch_t *patch,
1633 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1634 {
1635 /* If we can find a tab, it separates the filename from
1636 * the rest of the line which we can discard. */
1637 char *tab = strchr(line, '\t');
1638 if (tab)
1639 *tab = '\0';
1640
1641 if (starts_with(line, "+++ /dev/null"))
1642 SVN_ERR(grab_filename(&patch->new_filename, "/dev/null",
1643 result_pool, scratch_pool));
1644 else
1645 SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("+++ b/"),
1646 result_pool, scratch_pool));
1647
1648 *new_state = state_git_header_found;
1649 return SVN_NO_ERROR;
1650 }
1651
1652 /* Helper for git_old_mode() and git_new_mode(). Translate the git
1653 * file mode MODE_STR into a binary "executable?" and "symlink?" state. */
1654 static svn_error_t *
parse_git_mode_bits(svn_tristate_t * executable_p,svn_tristate_t * symlink_p,const char * mode_str)1655 parse_git_mode_bits(svn_tristate_t *executable_p,
1656 svn_tristate_t *symlink_p,
1657 const char *mode_str)
1658 {
1659 apr_uint64_t mode;
1660 SVN_ERR(svn_cstring_strtoui64(&mode, mode_str,
1661 0 /* min */,
1662 0777777 /* max: six octal digits */,
1663 010 /* radix (octal) */));
1664
1665 /* Note: 0644 and 0755 are the only modes that can occur for plain files.
1666 * We deliberately choose to parse only those values: we are strict in what
1667 * we accept _and_ in what we produce.
1668 *
1669 * (Having said that, though, we could consider relaxing the parser to also
1670 * map
1671 * (mode & 0111) == 0000 -> svn_tristate_false
1672 * (mode & 0111) == 0111 -> svn_tristate_true
1673 * [anything else] -> svn_tristate_unknown
1674 * .)
1675 */
1676
1677 switch (mode & 0777)
1678 {
1679 case 0644:
1680 *executable_p = svn_tristate_false;
1681 break;
1682
1683 case 0755:
1684 *executable_p = svn_tristate_true;
1685 break;
1686
1687 default:
1688 /* Ignore unknown values. */
1689 *executable_p = svn_tristate_unknown;
1690 break;
1691 }
1692
1693 switch (mode & 0170000 /* S_IFMT */)
1694 {
1695 case 0120000: /* S_IFLNK */
1696 *symlink_p = svn_tristate_true;
1697 break;
1698
1699 case 0100000: /* S_IFREG */
1700 case 0040000: /* S_IFDIR */
1701 *symlink_p = svn_tristate_false;
1702 break;
1703
1704 default:
1705 /* Ignore unknown values.
1706 (Including those generated by Subversion <= 1.9) */
1707 *symlink_p = svn_tristate_unknown;
1708 break;
1709 }
1710
1711 return SVN_NO_ERROR;
1712 }
1713
1714 /* Parse the 'old mode ' line of a git extended unidiff. */
1715 static svn_error_t *
git_old_mode(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1716 git_old_mode(enum parse_state *new_state, char *line, svn_patch_t *patch,
1717 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1718 {
1719 SVN_ERR(parse_git_mode_bits(&patch->old_executable_bit,
1720 &patch->old_symlink_bit,
1721 line + STRLEN_LITERAL("old mode ")));
1722
1723 #ifdef SVN_DEBUG
1724 /* If this assert trips, the "old mode" is neither ...644 nor ...755 . */
1725 SVN_ERR_ASSERT(patch->old_executable_bit != svn_tristate_unknown);
1726 #endif
1727
1728 *new_state = state_old_mode_seen;
1729 return SVN_NO_ERROR;
1730 }
1731
1732 /* Parse the 'new mode ' line of a git extended unidiff. */
1733 static svn_error_t *
git_new_mode(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1734 git_new_mode(enum parse_state *new_state, char *line, svn_patch_t *patch,
1735 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1736 {
1737 SVN_ERR(parse_git_mode_bits(&patch->new_executable_bit,
1738 &patch->new_symlink_bit,
1739 line + STRLEN_LITERAL("new mode ")));
1740
1741 #ifdef SVN_DEBUG
1742 /* If this assert trips, the "old mode" is neither ...644 nor ...755 . */
1743 SVN_ERR_ASSERT(patch->new_executable_bit != svn_tristate_unknown);
1744 #endif
1745
1746 /* Don't touch patch->operation. */
1747
1748 *new_state = state_git_mode_seen;
1749 return SVN_NO_ERROR;
1750 }
1751
1752 static svn_error_t *
git_index(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1753 git_index(enum parse_state *new_state, char *line, svn_patch_t *patch,
1754 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1755 {
1756 /* We either have something like "index 33e5b38..0000000" (which we just
1757 ignore as we are not interested in git specific shas) or something like
1758 "index 33e5b38..0000000 120000" which tells us the mode, that isn't
1759 changed by applying this patch.
1760
1761 If the mode would have changed then we would see 'old mode' and 'new mode'
1762 lines.
1763 */
1764 line = strchr(line + STRLEN_LITERAL("index "), ' ');
1765
1766 if (line && patch->new_executable_bit == svn_tristate_unknown
1767 && patch->new_symlink_bit == svn_tristate_unknown
1768 && patch->operation != svn_diff_op_added
1769 && patch->operation != svn_diff_op_deleted)
1770 {
1771 SVN_ERR(parse_git_mode_bits(&patch->new_executable_bit,
1772 &patch->new_symlink_bit,
1773 line + 1));
1774
1775 /* There is no change.. so set the old values to the new values */
1776 patch->old_executable_bit = patch->new_executable_bit;
1777 patch->old_symlink_bit = patch->new_symlink_bit;
1778 }
1779
1780 /* This function doesn't change the state! */
1781 /* *new_state = *new_state */
1782 return SVN_NO_ERROR;
1783 }
1784
1785 /* Parse the 'rename from ' line of a git extended unidiff. */
1786 static svn_error_t *
git_move_from(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1787 git_move_from(enum parse_state *new_state, char *line, svn_patch_t *patch,
1788 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1789 {
1790 SVN_ERR(grab_filename(&patch->old_filename,
1791 line + STRLEN_LITERAL("rename from "),
1792 result_pool, scratch_pool));
1793
1794 *new_state = state_move_from_seen;
1795 return SVN_NO_ERROR;
1796 }
1797
1798 /* Parse the 'rename to ' line of a git extended unidiff. */
1799 static svn_error_t *
git_move_to(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1800 git_move_to(enum parse_state *new_state, char *line, svn_patch_t *patch,
1801 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1802 {
1803 SVN_ERR(grab_filename(&patch->new_filename,
1804 line + STRLEN_LITERAL("rename to "),
1805 result_pool, scratch_pool));
1806
1807 patch->operation = svn_diff_op_moved;
1808
1809 *new_state = state_git_tree_seen;
1810 return SVN_NO_ERROR;
1811 }
1812
1813 /* Parse the 'copy from ' line of a git extended unidiff. */
1814 static svn_error_t *
git_copy_from(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1815 git_copy_from(enum parse_state *new_state, char *line, svn_patch_t *patch,
1816 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1817 {
1818 SVN_ERR(grab_filename(&patch->old_filename,
1819 line + STRLEN_LITERAL("copy from "),
1820 result_pool, scratch_pool));
1821
1822 *new_state = state_copy_from_seen;
1823 return SVN_NO_ERROR;
1824 }
1825
1826 /* Parse the 'copy to ' line of a git extended unidiff. */
1827 static svn_error_t *
git_copy_to(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1828 git_copy_to(enum parse_state *new_state, char *line, svn_patch_t *patch,
1829 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1830 {
1831 SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("copy to "),
1832 result_pool, scratch_pool));
1833
1834 patch->operation = svn_diff_op_copied;
1835
1836 *new_state = state_git_tree_seen;
1837 return SVN_NO_ERROR;
1838 }
1839
1840 /* Parse the 'new file ' line of a git extended unidiff. */
1841 static svn_error_t *
git_new_file(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1842 git_new_file(enum parse_state *new_state, char *line, svn_patch_t *patch,
1843 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1844 {
1845 SVN_ERR(parse_git_mode_bits(&patch->new_executable_bit,
1846 &patch->new_symlink_bit,
1847 line + STRLEN_LITERAL("new file mode ")));
1848
1849 patch->operation = svn_diff_op_added;
1850
1851 /* Filename already retrieved from diff --git header. */
1852
1853 *new_state = state_git_tree_seen;
1854 return SVN_NO_ERROR;
1855 }
1856
1857 /* Parse the 'deleted file ' line of a git extended unidiff. */
1858 static svn_error_t *
git_deleted_file(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1859 git_deleted_file(enum parse_state *new_state, char *line, svn_patch_t *patch,
1860 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1861 {
1862 SVN_ERR(parse_git_mode_bits(&patch->old_executable_bit,
1863 &patch->old_symlink_bit,
1864 line + STRLEN_LITERAL("deleted file mode ")));
1865
1866 patch->operation = svn_diff_op_deleted;
1867
1868 /* Filename already retrieved from diff --git header. */
1869
1870 *new_state = state_git_tree_seen;
1871 return SVN_NO_ERROR;
1872 }
1873
1874 /* Parse the 'GIT binary patch' header */
1875 static svn_error_t *
binary_patch_start(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1876 binary_patch_start(enum parse_state *new_state, char *line, svn_patch_t *patch,
1877 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1878 {
1879 *new_state = state_binary_patch_found;
1880 return SVN_NO_ERROR;
1881 }
1882
1883
1884 /* Add a HUNK associated with the property PROP_NAME to PATCH. */
1885 static svn_error_t *
add_property_hunk(svn_patch_t * patch,const char * prop_name,svn_diff_hunk_t * hunk,svn_diff_operation_kind_t operation,apr_pool_t * result_pool)1886 add_property_hunk(svn_patch_t *patch, const char *prop_name,
1887 svn_diff_hunk_t *hunk, svn_diff_operation_kind_t operation,
1888 apr_pool_t *result_pool)
1889 {
1890 svn_prop_patch_t *prop_patch;
1891
1892 prop_patch = svn_hash_gets(patch->prop_patches, prop_name);
1893
1894 if (! prop_patch)
1895 {
1896 prop_patch = apr_palloc(result_pool, sizeof(svn_prop_patch_t));
1897 prop_patch->name = prop_name;
1898 prop_patch->operation = operation;
1899 prop_patch->hunks = apr_array_make(result_pool, 1,
1900 sizeof(svn_diff_hunk_t *));
1901
1902 svn_hash_sets(patch->prop_patches, prop_name, prop_patch);
1903 }
1904
1905 APR_ARRAY_PUSH(prop_patch->hunks, svn_diff_hunk_t *) = hunk;
1906
1907 return SVN_NO_ERROR;
1908 }
1909
1910 struct svn_patch_file_t
1911 {
1912 /* The APR file handle to the patch file. */
1913 apr_file_t *apr_file;
1914
1915 /* The file offset at which the next patch is expected. */
1916 apr_off_t next_patch_offset;
1917 };
1918
1919 svn_error_t *
svn_diff_open_patch_file(svn_patch_file_t ** patch_file,const char * local_abspath,apr_pool_t * result_pool)1920 svn_diff_open_patch_file(svn_patch_file_t **patch_file,
1921 const char *local_abspath,
1922 apr_pool_t *result_pool)
1923 {
1924 svn_patch_file_t *p;
1925
1926 p = apr_palloc(result_pool, sizeof(*p));
1927 SVN_ERR(svn_io_file_open(&p->apr_file, local_abspath,
1928 APR_READ | APR_BUFFERED, APR_OS_DEFAULT,
1929 result_pool));
1930 p->next_patch_offset = 0;
1931 *patch_file = p;
1932
1933 return SVN_NO_ERROR;
1934 }
1935
1936 /* Parse hunks from APR_FILE and store them in PATCH->HUNKS.
1937 * Parsing stops if no valid next hunk can be found.
1938 * If IGNORE_WHITESPACE is TRUE, lines without
1939 * leading spaces will be treated as context lines.
1940 * Allocate results in RESULT_POOL.
1941 * Use SCRATCH_POOL for temporary allocations. */
1942 static svn_error_t *
parse_hunks(svn_patch_t * patch,apr_file_t * apr_file,svn_boolean_t ignore_whitespace,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1943 parse_hunks(svn_patch_t *patch, apr_file_t *apr_file,
1944 svn_boolean_t ignore_whitespace,
1945 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1946 {
1947 svn_diff_hunk_t *hunk;
1948 svn_boolean_t is_property;
1949 const char *last_prop_name;
1950 const char *prop_name;
1951 svn_diff_operation_kind_t prop_operation;
1952 apr_pool_t *iterpool;
1953
1954 last_prop_name = NULL;
1955
1956 patch->hunks = apr_array_make(result_pool, 10, sizeof(svn_diff_hunk_t *));
1957 patch->prop_patches = apr_hash_make(result_pool);
1958 iterpool = svn_pool_create(scratch_pool);
1959 do
1960 {
1961 svn_pool_clear(iterpool);
1962
1963 SVN_ERR(parse_next_hunk(&hunk, &is_property, &prop_name, &prop_operation,
1964 patch, apr_file, ignore_whitespace, result_pool,
1965 iterpool));
1966
1967 if (hunk && is_property)
1968 {
1969 if (! prop_name)
1970 prop_name = last_prop_name;
1971 else
1972 last_prop_name = prop_name;
1973
1974 /* Skip svn:mergeinfo properties.
1975 * Mergeinfo data cannot be represented as a hunk and
1976 * is therefore stored in PATCH itself. */
1977 if (strcmp(prop_name, SVN_PROP_MERGEINFO) == 0)
1978 continue;
1979
1980 SVN_ERR(add_property_hunk(patch, prop_name, hunk, prop_operation,
1981 result_pool));
1982 }
1983 else if (hunk)
1984 {
1985 APR_ARRAY_PUSH(patch->hunks, svn_diff_hunk_t *) = hunk;
1986 last_prop_name = NULL;
1987 }
1988
1989 }
1990 while (hunk);
1991 svn_pool_destroy(iterpool);
1992
1993 return SVN_NO_ERROR;
1994 }
1995
1996 static svn_error_t *
parse_binary_patch(svn_patch_t * patch,apr_file_t * apr_file,svn_boolean_t reverse,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1997 parse_binary_patch(svn_patch_t *patch, apr_file_t *apr_file,
1998 svn_boolean_t reverse,
1999 apr_pool_t *result_pool, apr_pool_t *scratch_pool)
2000 {
2001 apr_pool_t *iterpool = svn_pool_create(scratch_pool);
2002 apr_off_t pos, last_line;
2003 svn_stringbuf_t *line;
2004 svn_boolean_t eof = FALSE;
2005 svn_diff_binary_patch_t *bpatch = apr_pcalloc(result_pool, sizeof(*bpatch));
2006 svn_boolean_t in_blob = FALSE;
2007 svn_boolean_t in_src = FALSE;
2008
2009 bpatch->apr_file = apr_file;
2010
2011 patch->prop_patches = apr_hash_make(result_pool);
2012
2013 SVN_ERR(svn_io_file_get_offset(&pos, apr_file, scratch_pool));
2014
2015 while (!eof)
2016 {
2017 last_line = pos;
2018 SVN_ERR(svn_io_file_readline(apr_file, &line, NULL, &eof, APR_SIZE_MAX,
2019 iterpool, iterpool));
2020
2021 /* Update line offset for next iteration. */
2022 SVN_ERR(svn_io_file_get_offset(&pos, apr_file, iterpool));
2023
2024 if (in_blob)
2025 {
2026 char c = line->data[0];
2027
2028 /* 66 = len byte + (52/4*5) chars */
2029 if (((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
2030 && line->len <= 66
2031 && !strchr(line->data, ':')
2032 && !strchr(line->data, ' '))
2033 {
2034 /* One more blop line */
2035 if (in_src)
2036 bpatch->src_end = pos;
2037 else
2038 bpatch->dst_end = pos;
2039 }
2040 else if (svn_stringbuf_first_non_whitespace(line) < line->len
2041 && !(in_src && bpatch->src_start < last_line))
2042 {
2043 break; /* Bad patch */
2044 }
2045 else if (in_src)
2046 {
2047 patch->binary_patch = bpatch; /* SUCCESS! */
2048 break;
2049 }
2050 else
2051 {
2052 in_blob = FALSE;
2053 in_src = TRUE;
2054 }
2055 }
2056 else if (starts_with(line->data, "literal "))
2057 {
2058 apr_uint64_t expanded_size;
2059 svn_error_t *err = svn_cstring_strtoui64(&expanded_size,
2060 &line->data[8],
2061 0, APR_UINT64_MAX, 10);
2062
2063 if (err)
2064 {
2065 svn_error_clear(err);
2066 break;
2067 }
2068
2069 if (in_src)
2070 {
2071 bpatch->src_start = pos;
2072 bpatch->src_filesize = expanded_size;
2073 }
2074 else
2075 {
2076 bpatch->dst_start = pos;
2077 bpatch->dst_filesize = expanded_size;
2078 }
2079 in_blob = TRUE;
2080 }
2081 else
2082 break; /* We don't support GIT deltas (yet) */
2083 }
2084 svn_pool_destroy(iterpool);
2085
2086 if (!eof)
2087 /* Rewind to the start of the line just read, so subsequent calls
2088 * don't end up skipping the line. It may contain a patch or hunk header.*/
2089 SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &last_line, scratch_pool));
2090 else if (in_src
2091 && ((bpatch->src_end > bpatch->src_start) || !bpatch->src_filesize))
2092 {
2093 patch->binary_patch = bpatch; /* SUCCESS */
2094 }
2095
2096 /* Reverse patch if requested */
2097 if (reverse && patch->binary_patch)
2098 {
2099 apr_off_t tmp_start = bpatch->src_start;
2100 apr_off_t tmp_end = bpatch->src_end;
2101 svn_filesize_t tmp_filesize = bpatch->src_filesize;
2102
2103 bpatch->src_start = bpatch->dst_start;
2104 bpatch->src_end = bpatch->dst_end;
2105 bpatch->src_filesize = bpatch->dst_filesize;
2106
2107 bpatch->dst_start = tmp_start;
2108 bpatch->dst_end = tmp_end;
2109 bpatch->dst_filesize = tmp_filesize;
2110 }
2111
2112 return SVN_NO_ERROR;
2113 }
2114
2115 /* State machine for the diff header parser.
2116 * Expected Input Required state Function to call */
2117 static struct transition transitions[] =
2118 {
2119 {"--- ", state_start, diff_minus},
2120 {"+++ ", state_minus_seen, diff_plus},
2121
2122 {"diff --git", state_start, git_start},
2123 {"--- a/", state_git_diff_seen, git_minus},
2124 {"--- a/", state_git_mode_seen, git_minus},
2125 {"--- a/", state_git_tree_seen, git_minus},
2126 {"--- /dev/null", state_git_mode_seen, git_minus},
2127 {"--- /dev/null", state_git_tree_seen, git_minus},
2128 {"+++ b/", state_git_minus_seen, git_plus},
2129 {"+++ /dev/null", state_git_minus_seen, git_plus},
2130
2131 {"old mode ", state_git_diff_seen, git_old_mode},
2132 {"new mode ", state_old_mode_seen, git_new_mode},
2133
2134 {"rename from ", state_git_diff_seen, git_move_from},
2135 {"rename from ", state_git_mode_seen, git_move_from},
2136 {"rename to ", state_move_from_seen, git_move_to},
2137
2138 {"copy from ", state_git_diff_seen, git_copy_from},
2139 {"copy from ", state_git_mode_seen, git_copy_from},
2140 {"copy to ", state_copy_from_seen, git_copy_to},
2141
2142 {"new file ", state_git_diff_seen, git_new_file},
2143
2144 {"deleted file ", state_git_diff_seen, git_deleted_file},
2145
2146 {"index ", state_git_diff_seen, git_index},
2147 {"index ", state_git_tree_seen, git_index},
2148 {"index ", state_git_mode_seen, git_index},
2149
2150 {"GIT binary patch", state_git_diff_seen, binary_patch_start},
2151 {"GIT binary patch", state_git_tree_seen, binary_patch_start},
2152 {"GIT binary patch", state_git_mode_seen, binary_patch_start},
2153 };
2154
2155 svn_error_t *
svn_diff_parse_next_patch(svn_patch_t ** patch_p,svn_patch_file_t * patch_file,svn_boolean_t reverse,svn_boolean_t ignore_whitespace,apr_pool_t * result_pool,apr_pool_t * scratch_pool)2156 svn_diff_parse_next_patch(svn_patch_t **patch_p,
2157 svn_patch_file_t *patch_file,
2158 svn_boolean_t reverse,
2159 svn_boolean_t ignore_whitespace,
2160 apr_pool_t *result_pool,
2161 apr_pool_t *scratch_pool)
2162 {
2163 apr_off_t pos, last_line;
2164 svn_boolean_t eof;
2165 svn_boolean_t line_after_tree_header_read = FALSE;
2166 apr_pool_t *iterpool;
2167 svn_patch_t *patch;
2168 enum parse_state state = state_start;
2169
2170 if (apr_file_eof(patch_file->apr_file) == APR_EOF)
2171 {
2172 /* No more patches here. */
2173 *patch_p = NULL;
2174 return SVN_NO_ERROR;
2175 }
2176
2177 patch = apr_pcalloc(result_pool, sizeof(*patch));
2178 patch->old_executable_bit = svn_tristate_unknown;
2179 patch->new_executable_bit = svn_tristate_unknown;
2180 patch->old_symlink_bit = svn_tristate_unknown;
2181 patch->new_symlink_bit = svn_tristate_unknown;
2182
2183 pos = patch_file->next_patch_offset;
2184 SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &pos, scratch_pool));
2185
2186 iterpool = svn_pool_create(scratch_pool);
2187 do
2188 {
2189 svn_stringbuf_t *line;
2190 svn_boolean_t valid_header_line = FALSE;
2191 int i;
2192
2193 svn_pool_clear(iterpool);
2194
2195 /* Remember the current line's offset, and read the line. */
2196 last_line = pos;
2197 SVN_ERR(svn_io_file_readline(patch_file->apr_file, &line, NULL, &eof,
2198 APR_SIZE_MAX, iterpool, iterpool));
2199
2200 if (! eof)
2201 {
2202 /* Update line offset for next iteration. */
2203 SVN_ERR(svn_io_file_get_offset(&pos, patch_file->apr_file,
2204 iterpool));
2205 }
2206
2207 /* Run the state machine. */
2208 for (i = 0; i < (sizeof(transitions) / sizeof(transitions[0])); i++)
2209 {
2210 if (starts_with(line->data, transitions[i].expected_input)
2211 && state == transitions[i].required_state)
2212 {
2213 SVN_ERR(transitions[i].fn(&state, line->data, patch,
2214 result_pool, iterpool));
2215 valid_header_line = TRUE;
2216 break;
2217 }
2218 }
2219
2220 if (state == state_unidiff_found
2221 || state == state_git_header_found
2222 || state == state_binary_patch_found)
2223 {
2224 /* We have a valid diff header, yay! */
2225 break;
2226 }
2227 else if ((state == state_git_tree_seen || state == state_git_mode_seen)
2228 && line_after_tree_header_read
2229 && !valid_header_line)
2230 {
2231 /* We have a valid diff header for a patch with only tree changes.
2232 * Rewind to the start of the line just read, so subsequent calls
2233 * to this function don't end up skipping the line -- it may
2234 * contain a patch. */
2235 SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &last_line,
2236 scratch_pool));
2237 break;
2238 }
2239 else if (state == state_git_tree_seen
2240 || state == state_git_mode_seen)
2241 {
2242 line_after_tree_header_read = TRUE;
2243 }
2244 else if (! valid_header_line && state != state_start
2245 && state != state_git_diff_seen)
2246 {
2247 /* We've encountered an invalid diff header.
2248 *
2249 * Rewind to the start of the line just read - it may be a new
2250 * header that begins there. */
2251 SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &last_line,
2252 scratch_pool));
2253 state = state_start;
2254 }
2255
2256 }
2257 while (! eof);
2258
2259 patch->reverse = reverse;
2260 if (reverse)
2261 {
2262 const char *temp;
2263 svn_tristate_t ts_tmp;
2264
2265 temp = patch->old_filename;
2266 patch->old_filename = patch->new_filename;
2267 patch->new_filename = temp;
2268
2269 switch (patch->operation)
2270 {
2271 case svn_diff_op_added:
2272 patch->operation = svn_diff_op_deleted;
2273 break;
2274 case svn_diff_op_deleted:
2275 patch->operation = svn_diff_op_added;
2276 break;
2277
2278 case svn_diff_op_modified:
2279 break; /* Stays modified. */
2280
2281 case svn_diff_op_copied:
2282 case svn_diff_op_moved:
2283 break; /* Stays copied or moved, just in the other direction. */
2284 case svn_diff_op_unchanged:
2285 break; /* Stays unchanged, of course. */
2286 }
2287
2288 ts_tmp = patch->old_executable_bit;
2289 patch->old_executable_bit = patch->new_executable_bit;
2290 patch->new_executable_bit = ts_tmp;
2291
2292 ts_tmp = patch->old_symlink_bit;
2293 patch->old_symlink_bit = patch->new_symlink_bit;
2294 patch->new_symlink_bit = ts_tmp;
2295 }
2296
2297 if (patch->old_filename == NULL || patch->new_filename == NULL)
2298 {
2299 /* Something went wrong, just discard the result. */
2300 patch = NULL;
2301 }
2302 else
2303 {
2304 if (state == state_binary_patch_found)
2305 {
2306 SVN_ERR(parse_binary_patch(patch, patch_file->apr_file, reverse,
2307 result_pool, iterpool));
2308 /* And fall through in property parsing */
2309 }
2310
2311 SVN_ERR(parse_hunks(patch, patch_file->apr_file, ignore_whitespace,
2312 result_pool, iterpool));
2313 }
2314
2315 svn_pool_destroy(iterpool);
2316
2317 SVN_ERR(svn_io_file_get_offset(&patch_file->next_patch_offset,
2318 patch_file->apr_file, scratch_pool));
2319
2320 if (patch && patch->hunks)
2321 {
2322 /* Usually, hunks appear in the patch sorted by their original line
2323 * offset. But just in case they weren't parsed in this order for
2324 * some reason, we sort them so that our caller can assume that hunks
2325 * are sorted as if parsed from a usual patch. */
2326 svn_sort__array(patch->hunks, compare_hunks);
2327 }
2328
2329 *patch_p = patch;
2330 return SVN_NO_ERROR;
2331 }
2332
2333 svn_error_t *
svn_diff_close_patch_file(svn_patch_file_t * patch_file,apr_pool_t * scratch_pool)2334 svn_diff_close_patch_file(svn_patch_file_t *patch_file,
2335 apr_pool_t *scratch_pool)
2336 {
2337 return svn_error_trace(svn_io_file_close(patch_file->apr_file,
2338 scratch_pool));
2339 }
2340