1 /*
2  * parse-diff.c: functions for parsing diff files
3  *
4  * ====================================================================
5  *    Licensed to the Apache Software Foundation (ASF) under one
6  *    or more contributor license agreements.  See the NOTICE file
7  *    distributed with this work for additional information
8  *    regarding copyright ownership.  The ASF licenses this file
9  *    to you under the Apache License, Version 2.0 (the
10  *    "License"); you may not use this file except in compliance
11  *    with the License.  You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  *    Unless required by applicable law or agreed to in writing,
16  *    software distributed under the License is distributed on an
17  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18  *    KIND, either express or implied.  See the License for the
19  *    specific language governing permissions and limitations
20  *    under the License.
21  * ====================================================================
22  */
23 
24 #include <stdlib.h>
25 #include <stddef.h>
26 #include <string.h>
27 
28 #include "svn_hash.h"
29 #include "svn_types.h"
30 #include "svn_error.h"
31 #include "svn_io.h"
32 #include "svn_pools.h"
33 #include "svn_props.h"
34 #include "svn_string.h"
35 #include "svn_utf.h"
36 #include "svn_dirent_uri.h"
37 #include "svn_diff.h"
38 #include "svn_ctype.h"
39 #include "svn_mergeinfo.h"
40 
41 #include "private/svn_eol_private.h"
42 #include "private/svn_dep_compat.h"
43 #include "private/svn_diff_private.h"
44 #include "private/svn_sorts_private.h"
45 
46 #include "diff.h"
47 
48 #include "svn_private_config.h"
49 
50 /* Helper macro for readability */
51 #define starts_with(str, start)  \
52   (strncmp((str), (start), strlen(start)) == 0)
53 
54 /* Like strlen() but for string literals. */
55 #define STRLEN_LITERAL(str) (sizeof(str) - 1)
56 
57 /* This struct describes a range within a file, as well as the
58  * current cursor position within the range. All numbers are in bytes. */
59 struct svn_diff__hunk_range {
60   apr_off_t start;
61   apr_off_t end;
62   apr_off_t current;
63 };
64 
65 struct svn_diff_hunk_t {
66   /* The patch this hunk belongs to. */
67   const svn_patch_t *patch;
68 
69   /* APR file handle to the patch file this hunk came from. */
70   apr_file_t *apr_file;
71 
72   /* Ranges used to keep track of this hunk's texts positions within
73    * the patch file. */
74   struct svn_diff__hunk_range diff_text_range;
75   struct svn_diff__hunk_range original_text_range;
76   struct svn_diff__hunk_range modified_text_range;
77 
78   /* Hunk ranges as they appeared in the patch file.
79    * All numbers are lines, not bytes. */
80   svn_linenum_t original_start;
81   svn_linenum_t original_length;
82   svn_linenum_t modified_start;
83   svn_linenum_t modified_length;
84 
85   /* Number of lines of leading and trailing hunk context. */
86   svn_linenum_t leading_context;
87   svn_linenum_t trailing_context;
88 
89   /* Did we see a 'file does not end with eol' marker in this hunk? */
90   svn_boolean_t original_no_final_eol;
91   svn_boolean_t modified_no_final_eol;
92 
93   /* Fuzz penalty, triggered by bad patch targets */
94   svn_linenum_t original_fuzz;
95   svn_linenum_t modified_fuzz;
96 };
97 
98 struct svn_diff_binary_patch_t {
99   /* The patch this hunk belongs to. */
100   const svn_patch_t *patch;
101 
102   /* APR file handle to the patch file this hunk came from. */
103   apr_file_t *apr_file;
104 
105   /* Offsets inside APR_FILE representing the location of the patch */
106   apr_off_t src_start;
107   apr_off_t src_end;
108   svn_filesize_t src_filesize; /* Expanded/final size */
109 
110   /* Offsets inside APR_FILE representing the location of the patch */
111   apr_off_t dst_start;
112   apr_off_t dst_end;
113   svn_filesize_t dst_filesize; /* Expanded/final size */
114 };
115 
116 /* Common guts of svn_diff_hunk__create_adds_single_line() and
117  * svn_diff_hunk__create_deletes_single_line().
118  *
119  * ADD is TRUE if adding and FALSE if deleting.
120  */
121 static svn_error_t *
add_or_delete_single_line(svn_diff_hunk_t ** hunk_out,const char * line,const svn_patch_t * patch,svn_boolean_t add,apr_pool_t * result_pool,apr_pool_t * scratch_pool)122 add_or_delete_single_line(svn_diff_hunk_t **hunk_out,
123                           const char *line,
124                           const svn_patch_t *patch,
125                           svn_boolean_t add,
126                           apr_pool_t *result_pool,
127                           apr_pool_t *scratch_pool)
128 {
129   svn_diff_hunk_t *hunk = apr_pcalloc(result_pool, sizeof(*hunk));
130   static const char *hunk_header[] = { "@@ -1 +0,0 @@\n", "@@ -0,0 +1 @@\n" };
131   const apr_size_t header_len = strlen(hunk_header[add]);
132   const apr_size_t len = strlen(line);
133   const apr_size_t end = header_len + (1 + len); /* The +1 is for the \n. */
134   svn_stringbuf_t *buf = svn_stringbuf_create_ensure(end + 1, scratch_pool);
135 
136   hunk->patch = patch;
137 
138   /* hunk->apr_file is created below. */
139 
140   hunk->diff_text_range.start = header_len;
141   hunk->diff_text_range.current = header_len;
142 
143   if (add)
144     {
145       hunk->original_text_range.start = 0; /* There's no "original" text. */
146       hunk->original_text_range.current = 0;
147       hunk->original_text_range.end = 0;
148       hunk->original_no_final_eol = FALSE;
149 
150       hunk->modified_text_range.start = header_len;
151       hunk->modified_text_range.current = header_len;
152       hunk->modified_text_range.end = end;
153       hunk->modified_no_final_eol = TRUE;
154 
155       hunk->original_start = 0;
156       hunk->original_length = 0;
157 
158       hunk->modified_start = 1;
159       hunk->modified_length = 1;
160     }
161   else /* delete */
162     {
163       hunk->original_text_range.start = header_len;
164       hunk->original_text_range.current = header_len;
165       hunk->original_text_range.end = end;
166       hunk->original_no_final_eol = TRUE;
167 
168       hunk->modified_text_range.start = 0; /* There's no "original" text. */
169       hunk->modified_text_range.current = 0;
170       hunk->modified_text_range.end = 0;
171       hunk->modified_no_final_eol = FALSE;
172 
173       hunk->original_start = 1;
174       hunk->original_length = 1;
175 
176       hunk->modified_start = 0;
177       hunk->modified_length = 0; /* setting to '1' works too */
178     }
179 
180   hunk->leading_context = 0;
181   hunk->trailing_context = 0;
182 
183   /* Create APR_FILE and put just a hunk in it (without a diff header).
184    * Save the offset of the last byte of the diff line. */
185   svn_stringbuf_appendbytes(buf, hunk_header[add], header_len);
186   svn_stringbuf_appendbyte(buf, add ? '+' : '-');
187   svn_stringbuf_appendbytes(buf, line, len);
188   svn_stringbuf_appendbyte(buf, '\n');
189   svn_stringbuf_appendcstr(buf, "\\ No newline at end of hunk\n");
190 
191   hunk->diff_text_range.end = buf->len;
192 
193   SVN_ERR(svn_io_open_unique_file3(&hunk->apr_file, NULL /* filename */,
194                                    NULL /* system tempdir */,
195                                    svn_io_file_del_on_pool_cleanup,
196                                    result_pool, scratch_pool));
197   SVN_ERR(svn_io_file_write_full(hunk->apr_file,
198                                  buf->data, buf->len,
199                                  NULL, scratch_pool));
200   /* No need to seek. */
201 
202   *hunk_out = hunk;
203   return SVN_NO_ERROR;
204 }
205 
206 svn_error_t *
svn_diff_hunk__create_adds_single_line(svn_diff_hunk_t ** hunk_out,const char * line,const svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)207 svn_diff_hunk__create_adds_single_line(svn_diff_hunk_t **hunk_out,
208                                        const char *line,
209                                        const svn_patch_t *patch,
210                                        apr_pool_t *result_pool,
211                                        apr_pool_t *scratch_pool)
212 {
213   SVN_ERR(add_or_delete_single_line(hunk_out, line, patch,
214                                     (!patch->reverse),
215                                     result_pool, scratch_pool));
216   return SVN_NO_ERROR;
217 }
218 
219 svn_error_t *
svn_diff_hunk__create_deletes_single_line(svn_diff_hunk_t ** hunk_out,const char * line,const svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)220 svn_diff_hunk__create_deletes_single_line(svn_diff_hunk_t **hunk_out,
221                                           const char *line,
222                                           const svn_patch_t *patch,
223                                           apr_pool_t *result_pool,
224                                           apr_pool_t *scratch_pool)
225 {
226   SVN_ERR(add_or_delete_single_line(hunk_out, line, patch,
227                                     patch->reverse,
228                                     result_pool, scratch_pool));
229   return SVN_NO_ERROR;
230 }
231 
232 void
svn_diff_hunk_reset_diff_text(svn_diff_hunk_t * hunk)233 svn_diff_hunk_reset_diff_text(svn_diff_hunk_t *hunk)
234 {
235   hunk->diff_text_range.current = hunk->diff_text_range.start;
236 }
237 
238 void
svn_diff_hunk_reset_original_text(svn_diff_hunk_t * hunk)239 svn_diff_hunk_reset_original_text(svn_diff_hunk_t *hunk)
240 {
241   if (hunk->patch->reverse)
242     hunk->modified_text_range.current = hunk->modified_text_range.start;
243   else
244     hunk->original_text_range.current = hunk->original_text_range.start;
245 }
246 
247 void
svn_diff_hunk_reset_modified_text(svn_diff_hunk_t * hunk)248 svn_diff_hunk_reset_modified_text(svn_diff_hunk_t *hunk)
249 {
250   if (hunk->patch->reverse)
251     hunk->original_text_range.current = hunk->original_text_range.start;
252   else
253     hunk->modified_text_range.current = hunk->modified_text_range.start;
254 }
255 
256 svn_linenum_t
svn_diff_hunk_get_original_start(const svn_diff_hunk_t * hunk)257 svn_diff_hunk_get_original_start(const svn_diff_hunk_t *hunk)
258 {
259   return hunk->patch->reverse ? hunk->modified_start : hunk->original_start;
260 }
261 
262 svn_linenum_t
svn_diff_hunk_get_original_length(const svn_diff_hunk_t * hunk)263 svn_diff_hunk_get_original_length(const svn_diff_hunk_t *hunk)
264 {
265   return hunk->patch->reverse ? hunk->modified_length : hunk->original_length;
266 }
267 
268 svn_linenum_t
svn_diff_hunk_get_modified_start(const svn_diff_hunk_t * hunk)269 svn_diff_hunk_get_modified_start(const svn_diff_hunk_t *hunk)
270 {
271   return hunk->patch->reverse ? hunk->original_start : hunk->modified_start;
272 }
273 
274 svn_linenum_t
svn_diff_hunk_get_modified_length(const svn_diff_hunk_t * hunk)275 svn_diff_hunk_get_modified_length(const svn_diff_hunk_t *hunk)
276 {
277   return hunk->patch->reverse ? hunk->original_length : hunk->modified_length;
278 }
279 
280 svn_linenum_t
svn_diff_hunk_get_leading_context(const svn_diff_hunk_t * hunk)281 svn_diff_hunk_get_leading_context(const svn_diff_hunk_t *hunk)
282 {
283   return hunk->leading_context;
284 }
285 
286 svn_linenum_t
svn_diff_hunk_get_trailing_context(const svn_diff_hunk_t * hunk)287 svn_diff_hunk_get_trailing_context(const svn_diff_hunk_t *hunk)
288 {
289   return hunk->trailing_context;
290 }
291 
292 svn_linenum_t
svn_diff_hunk__get_fuzz_penalty(const svn_diff_hunk_t * hunk)293 svn_diff_hunk__get_fuzz_penalty(const svn_diff_hunk_t *hunk)
294 {
295   return hunk->patch->reverse ? hunk->original_fuzz : hunk->modified_fuzz;
296 }
297 
298 /* Baton for the base85 stream implementation */
299 struct base85_baton_t
300 {
301   apr_file_t *file;
302   apr_pool_t *iterpool;
303   char buffer[52];        /* Bytes on current line */
304   apr_off_t next_pos;     /* Start position of next line */
305   apr_off_t end_pos;      /* Position after last line */
306   apr_size_t buf_size;    /* Bytes available (52 unless at eof) */
307   apr_size_t buf_pos;     /* Bytes in linebuffer */
308   svn_boolean_t done;     /* At eof? */
309 };
310 
311 /* Implements svn_read_fn_t for the base85 read stream */
312 static svn_error_t *
read_handler_base85(void * baton,char * buffer,apr_size_t * len)313 read_handler_base85(void *baton, char *buffer, apr_size_t *len)
314 {
315   struct base85_baton_t *b85b = baton;
316   apr_pool_t *iterpool = b85b->iterpool;
317   apr_size_t remaining = *len;
318   char *dest = buffer;
319 
320   svn_pool_clear(iterpool);
321 
322   if (b85b->done)
323     {
324       *len = 0;
325       return SVN_NO_ERROR;
326     }
327 
328   while (remaining && (b85b->buf_size > b85b->buf_pos
329                        || b85b->next_pos < b85b->end_pos))
330     {
331       svn_stringbuf_t *line;
332       svn_boolean_t at_eof;
333 
334       apr_size_t available = b85b->buf_size - b85b->buf_pos;
335       if (available)
336         {
337           apr_size_t n = (remaining < available) ? remaining : available;
338 
339           memcpy(dest, b85b->buffer + b85b->buf_pos, n);
340           dest += n;
341           remaining -= n;
342           b85b->buf_pos += n;
343 
344           if (!remaining)
345             return SVN_NO_ERROR; /* *len = OK */
346         }
347 
348       if (b85b->next_pos >= b85b->end_pos)
349         break; /* At EOF */
350       SVN_ERR(svn_io_file_seek(b85b->file, APR_SET, &b85b->next_pos,
351                                iterpool));
352       SVN_ERR(svn_io_file_readline(b85b->file, &line, NULL, &at_eof,
353                                    APR_SIZE_MAX, iterpool, iterpool));
354       if (at_eof)
355         b85b->next_pos = b85b->end_pos;
356       else
357         {
358           SVN_ERR(svn_io_file_get_offset(&b85b->next_pos, b85b->file,
359                                          iterpool));
360         }
361 
362       if (line->len && line->data[0] >= 'A' && line->data[0] <= 'Z')
363         b85b->buf_size = line->data[0] - 'A' + 1;
364       else if (line->len && line->data[0] >= 'a' && line->data[0] <= 'z')
365         b85b->buf_size = line->data[0] - 'a' + 26 + 1;
366       else
367         return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL,
368                                 _("Unexpected data in base85 section"));
369 
370       if (b85b->buf_size < 52)
371         b85b->next_pos = b85b->end_pos; /* Handle as EOF */
372 
373       SVN_ERR(svn_diff__base85_decode_line(b85b->buffer, b85b->buf_size,
374                                            line->data + 1, line->len - 1,
375                                            iterpool));
376       b85b->buf_pos = 0;
377     }
378 
379   *len -= remaining;
380   b85b->done = TRUE;
381 
382   return SVN_NO_ERROR;
383 }
384 
385 /* Implements svn_close_fn_t for the base85 read stream */
386 static svn_error_t *
close_handler_base85(void * baton)387 close_handler_base85(void *baton)
388 {
389   struct base85_baton_t *b85b = baton;
390 
391   svn_pool_destroy(b85b->iterpool);
392 
393   return SVN_NO_ERROR;
394 }
395 
396 /* Gets a stream that reads decoded base85 data from a segment of a file.
397    The current implementation might assume that both start_pos and end_pos
398    are located at line boundaries. */
399 static svn_stream_t *
get_base85_data_stream(apr_file_t * file,apr_off_t start_pos,apr_off_t end_pos,apr_pool_t * result_pool)400 get_base85_data_stream(apr_file_t *file,
401                        apr_off_t start_pos,
402                        apr_off_t end_pos,
403                        apr_pool_t *result_pool)
404 {
405   struct base85_baton_t *b85b = apr_pcalloc(result_pool, sizeof(*b85b));
406   svn_stream_t *base85s = svn_stream_create(b85b, result_pool);
407 
408   b85b->file = file;
409   b85b->iterpool = svn_pool_create(result_pool);
410   b85b->next_pos = start_pos;
411   b85b->end_pos = end_pos;
412 
413   svn_stream_set_read2(base85s, NULL /* only full read support */,
414                        read_handler_base85);
415   svn_stream_set_close(base85s, close_handler_base85);
416   return base85s;
417 }
418 
419 /* Baton for the length verification stream functions */
420 struct length_verify_baton_t
421 {
422   svn_stream_t *inner;
423   svn_filesize_t remaining;
424 };
425 
426 /* Implements svn_read_fn_t for the length verification stream */
427 static svn_error_t *
read_handler_length_verify(void * baton,char * buffer,apr_size_t * len)428 read_handler_length_verify(void *baton, char *buffer, apr_size_t *len)
429 {
430   struct length_verify_baton_t *lvb = baton;
431   apr_size_t requested_len = *len;
432 
433   SVN_ERR(svn_stream_read_full(lvb->inner, buffer, len));
434 
435   if (*len > lvb->remaining)
436     return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL,
437                             _("Base85 data expands to longer than declared "
438                               "filesize"));
439   else if (requested_len > *len && *len != lvb->remaining)
440     return svn_error_create(SVN_ERR_DIFF_UNEXPECTED_DATA, NULL,
441                             _("Base85 data expands to smaller than declared "
442                               "filesize"));
443 
444   lvb->remaining -= *len;
445 
446   return SVN_NO_ERROR;
447 }
448 
449 /* Implements svn_close_fn_t for the length verification stream */
450 static svn_error_t *
close_handler_length_verify(void * baton)451 close_handler_length_verify(void *baton)
452 {
453   struct length_verify_baton_t *lvb = baton;
454 
455   return svn_error_trace(svn_stream_close(lvb->inner));
456 }
457 
458 /* Gets a stream that verifies on reads that the inner stream is exactly
459    of the specified length */
460 static svn_stream_t *
get_verify_length_stream(svn_stream_t * inner,svn_filesize_t expected_size,apr_pool_t * result_pool)461 get_verify_length_stream(svn_stream_t *inner,
462                          svn_filesize_t expected_size,
463                          apr_pool_t *result_pool)
464 {
465   struct length_verify_baton_t *lvb = apr_palloc(result_pool, sizeof(*lvb));
466   svn_stream_t *len_stream = svn_stream_create(lvb, result_pool);
467 
468   lvb->inner = inner;
469   lvb->remaining = expected_size;
470 
471   svn_stream_set_read2(len_stream, NULL /* only full read support */,
472                        read_handler_length_verify);
473   svn_stream_set_close(len_stream, close_handler_length_verify);
474 
475   return len_stream;
476 }
477 
478 svn_stream_t *
svn_diff_get_binary_diff_original_stream(const svn_diff_binary_patch_t * bpatch,apr_pool_t * result_pool)479 svn_diff_get_binary_diff_original_stream(const svn_diff_binary_patch_t *bpatch,
480                                          apr_pool_t *result_pool)
481 {
482   svn_stream_t *s = get_base85_data_stream(bpatch->apr_file, bpatch->src_start,
483                                            bpatch->src_end, result_pool);
484 
485   s = svn_stream_compressed(s, result_pool);
486 
487   /* ### If we (ever) want to support the DELTA format, then we should hook the
488          undelta handling here */
489 
490   return get_verify_length_stream(s, bpatch->src_filesize, result_pool);
491 }
492 
493 svn_stream_t *
svn_diff_get_binary_diff_result_stream(const svn_diff_binary_patch_t * bpatch,apr_pool_t * result_pool)494 svn_diff_get_binary_diff_result_stream(const svn_diff_binary_patch_t *bpatch,
495                                        apr_pool_t *result_pool)
496 {
497   svn_stream_t *s = get_base85_data_stream(bpatch->apr_file, bpatch->dst_start,
498                                            bpatch->dst_end, result_pool);
499 
500   s = svn_stream_compressed(s, result_pool);
501 
502   /* ### If we (ever) want to support the DELTA format, then we should hook the
503   undelta handling here */
504 
505   return get_verify_length_stream(s, bpatch->dst_filesize, result_pool);
506 }
507 
508 /* Try to parse a positive number from a decimal number encoded
509  * in the string NUMBER. Return parsed number in OFFSET, and return
510  * TRUE if parsing was successful. */
511 static svn_boolean_t
parse_offset(svn_linenum_t * offset,const char * number)512 parse_offset(svn_linenum_t *offset, const char *number)
513 {
514   svn_error_t *err;
515   apr_uint64_t val;
516 
517   err = svn_cstring_strtoui64(&val, number, 0, SVN_LINENUM_MAX_VALUE, 10);
518   if (err)
519     {
520       svn_error_clear(err);
521       return FALSE;
522     }
523 
524   *offset = (svn_linenum_t)val;
525 
526   return TRUE;
527 }
528 
529 /* Try to parse a hunk range specification from the string RANGE.
530  * Return parsed information in *START and *LENGTH, and return TRUE
531  * if the range parsed correctly. Note: This function may modify the
532  * input value RANGE. */
533 static svn_boolean_t
parse_range(svn_linenum_t * start,svn_linenum_t * length,char * range)534 parse_range(svn_linenum_t *start, svn_linenum_t *length, char *range)
535 {
536   char *comma;
537 
538   if (*range == 0)
539     return FALSE;
540 
541   comma = strstr(range, ",");
542   if (comma)
543     {
544       if (strlen(comma + 1) > 0)
545         {
546           /* Try to parse the length. */
547           if (! parse_offset(length, comma + 1))
548             return FALSE;
549 
550           /* Snip off the end of the string,
551            * so we can comfortably parse the line
552            * number the hunk starts at. */
553           *comma = '\0';
554         }
555        else
556          /* A comma but no length? */
557          return FALSE;
558     }
559   else
560     {
561       *length = 1;
562     }
563 
564   /* Try to parse the line number the hunk starts at. */
565   return parse_offset(start, range);
566 }
567 
568 /* Try to parse a hunk header in string HEADER, putting parsed information
569  * into HUNK. Return TRUE if the header parsed correctly. ATAT is the
570  * character string used to delimit the hunk header.
571  * Do all allocations in POOL. */
572 static svn_boolean_t
parse_hunk_header(const char * header,svn_diff_hunk_t * hunk,const char * atat,apr_pool_t * pool)573 parse_hunk_header(const char *header, svn_diff_hunk_t *hunk,
574                   const char *atat, apr_pool_t *pool)
575 {
576   const char *p;
577   const char *start;
578   svn_stringbuf_t *range;
579 
580   p = header + strlen(atat);
581   if (*p != ' ')
582     /* No. */
583     return FALSE;
584   p++;
585   if (*p != '-')
586     /* Nah... */
587     return FALSE;
588   /* OK, this may be worth allocating some memory for... */
589   range = svn_stringbuf_create_ensure(31, pool);
590   start = ++p;
591   while (*p && *p != ' ')
592     {
593       p++;
594     }
595 
596   if (*p != ' ')
597     /* No no no... */
598     return FALSE;
599 
600   svn_stringbuf_appendbytes(range, start, p - start);
601 
602   /* Try to parse the first range. */
603   if (! parse_range(&hunk->original_start, &hunk->original_length, range->data))
604     return FALSE;
605 
606   /* Clear the stringbuf so we can reuse it for the second range. */
607   svn_stringbuf_setempty(range);
608   p++;
609   if (*p != '+')
610     /* Eeek! */
611     return FALSE;
612   /* OK, this may be worth copying... */
613   start = ++p;
614   while (*p && *p != ' ')
615     {
616       p++;
617     }
618   if (*p != ' ')
619     /* No no no... */
620     return FALSE;
621 
622   svn_stringbuf_appendbytes(range, start, p - start);
623 
624   /* Check for trailing @@ */
625   p++;
626   if (! starts_with(p, atat))
627     return FALSE;
628 
629   /* There may be stuff like C-function names after the trailing @@,
630    * but we ignore that. */
631 
632   /* Try to parse the second range. */
633   if (! parse_range(&hunk->modified_start, &hunk->modified_length, range->data))
634     return FALSE;
635 
636   /* Hunk header is good. */
637   return TRUE;
638 }
639 
640 /* Read a line of original or modified hunk text from the specified
641  * RANGE within FILE. FILE is expected to contain unidiff text.
642  * Leading unidiff symbols ('+', '-', and ' ') are removed from the line,
643  * Any lines commencing with the VERBOTEN character are discarded.
644  * VERBOTEN should be '+' or '-', depending on which form of hunk text
645  * is being read. NO_FINAL_EOL declares if the hunk contains a no final
646  * EOL marker.
647  *
648  * All other parameters are as in svn_diff_hunk_readline_original_text()
649  * and svn_diff_hunk_readline_modified_text().
650  */
651 static svn_error_t *
hunk_readline_original_or_modified(apr_file_t * file,struct svn_diff__hunk_range * range,svn_stringbuf_t ** stringbuf,const char ** eol,svn_boolean_t * eof,char verboten,svn_boolean_t no_final_eol,apr_pool_t * result_pool,apr_pool_t * scratch_pool)652 hunk_readline_original_or_modified(apr_file_t *file,
653                                    struct svn_diff__hunk_range *range,
654                                    svn_stringbuf_t **stringbuf,
655                                    const char **eol,
656                                    svn_boolean_t *eof,
657                                    char verboten,
658                                    svn_boolean_t no_final_eol,
659                                    apr_pool_t *result_pool,
660                                    apr_pool_t *scratch_pool)
661 {
662   apr_size_t max_len;
663   svn_boolean_t filtered;
664   apr_off_t pos;
665   svn_stringbuf_t *str;
666   const char *eol_p;
667   apr_pool_t *last_pool;
668 
669   if (!eol)
670     eol = &eol_p;
671 
672   if (range->current >= range->end)
673     {
674       /* We're past the range. Indicate that no bytes can be read. */
675       *eof = TRUE;
676       *eol = NULL;
677       *stringbuf = svn_stringbuf_create_empty(result_pool);
678       return SVN_NO_ERROR;
679     }
680 
681   SVN_ERR(svn_io_file_get_offset(&pos, file, scratch_pool));
682   SVN_ERR(svn_io_file_seek(file, APR_SET, &range->current, scratch_pool));
683 
684   /* It's not ITERPOOL because we use data allocated in LAST_POOL out
685      of the loop. */
686   last_pool = svn_pool_create(scratch_pool);
687   do
688     {
689       svn_pool_clear(last_pool);
690 
691       max_len = range->end - range->current;
692       SVN_ERR(svn_io_file_readline(file, &str, eol, eof, max_len,
693                                    last_pool, last_pool));
694       SVN_ERR(svn_io_file_get_offset(&range->current, file, last_pool));
695       filtered = (str->data[0] == verboten || str->data[0] == '\\');
696     }
697   while (filtered && ! *eof);
698 
699   if (filtered)
700     {
701       /* EOF, return an empty string. */
702       *stringbuf = svn_stringbuf_create_ensure(0, result_pool);
703       *eol = NULL;
704     }
705   else if (str->data[0] == '+' || str->data[0] == '-' || str->data[0] == ' ')
706     {
707       /* Shave off leading unidiff symbols. */
708       *stringbuf = svn_stringbuf_create(str->data + 1, result_pool);
709     }
710   else
711     {
712       /* Return the line as-is. Handle as a chopped leading spaces */
713       *stringbuf = svn_stringbuf_dup(str, result_pool);
714     }
715 
716   if (!filtered && *eof && !*eol && *str->data)
717     {
718       /* Ok, we miss a final EOL in the patch file, but didn't see a
719          no eol marker line.
720 
721          We should report that we had an EOL or the patch code will
722          misbehave (and it knows nothing about no eol markers) */
723 
724       if (!no_final_eol && eol != &eol_p)
725         {
726           apr_off_t start = 0;
727 
728           SVN_ERR(svn_io_file_seek(file, APR_SET, &start, scratch_pool));
729 
730           SVN_ERR(svn_io_file_readline(file, &str, eol, NULL, APR_SIZE_MAX,
731                                        scratch_pool, scratch_pool));
732 
733           /* Every patch file that has hunks has at least one EOL*/
734           SVN_ERR_ASSERT(*eol != NULL);
735         }
736 
737       *eof = FALSE;
738       /* Fall through to seek back to the right location */
739     }
740   SVN_ERR(svn_io_file_seek(file, APR_SET, &pos, scratch_pool));
741 
742   svn_pool_destroy(last_pool);
743   return SVN_NO_ERROR;
744 }
745 
746 svn_error_t *
svn_diff_hunk_readline_original_text(svn_diff_hunk_t * hunk,svn_stringbuf_t ** stringbuf,const char ** eol,svn_boolean_t * eof,apr_pool_t * result_pool,apr_pool_t * scratch_pool)747 svn_diff_hunk_readline_original_text(svn_diff_hunk_t *hunk,
748                                      svn_stringbuf_t **stringbuf,
749                                      const char **eol,
750                                      svn_boolean_t *eof,
751                                      apr_pool_t *result_pool,
752                                      apr_pool_t *scratch_pool)
753 {
754   return svn_error_trace(
755     hunk_readline_original_or_modified(hunk->apr_file,
756                                        hunk->patch->reverse ?
757                                          &hunk->modified_text_range :
758                                          &hunk->original_text_range,
759                                        stringbuf, eol, eof,
760                                        hunk->patch->reverse ? '-' : '+',
761                                        hunk->patch->reverse
762                                           ? hunk->modified_no_final_eol
763                                           : hunk->original_no_final_eol,
764                                        result_pool, scratch_pool));
765 }
766 
767 svn_error_t *
svn_diff_hunk_readline_modified_text(svn_diff_hunk_t * hunk,svn_stringbuf_t ** stringbuf,const char ** eol,svn_boolean_t * eof,apr_pool_t * result_pool,apr_pool_t * scratch_pool)768 svn_diff_hunk_readline_modified_text(svn_diff_hunk_t *hunk,
769                                      svn_stringbuf_t **stringbuf,
770                                      const char **eol,
771                                      svn_boolean_t *eof,
772                                      apr_pool_t *result_pool,
773                                      apr_pool_t *scratch_pool)
774 {
775   return svn_error_trace(
776     hunk_readline_original_or_modified(hunk->apr_file,
777                                        hunk->patch->reverse ?
778                                          &hunk->original_text_range :
779                                          &hunk->modified_text_range,
780                                        stringbuf, eol, eof,
781                                        hunk->patch->reverse ? '+' : '-',
782                                        hunk->patch->reverse
783                                           ? hunk->original_no_final_eol
784                                           : hunk->modified_no_final_eol,
785                                        result_pool, scratch_pool));
786 }
787 
788 svn_error_t *
svn_diff_hunk_readline_diff_text(svn_diff_hunk_t * hunk,svn_stringbuf_t ** stringbuf,const char ** eol,svn_boolean_t * eof,apr_pool_t * result_pool,apr_pool_t * scratch_pool)789 svn_diff_hunk_readline_diff_text(svn_diff_hunk_t *hunk,
790                                  svn_stringbuf_t **stringbuf,
791                                  const char **eol,
792                                  svn_boolean_t *eof,
793                                  apr_pool_t *result_pool,
794                                  apr_pool_t *scratch_pool)
795 {
796   svn_stringbuf_t *line;
797   apr_size_t max_len;
798   apr_off_t pos;
799   const char *eol_p;
800 
801   if (!eol)
802     eol = &eol_p;
803 
804   if (hunk->diff_text_range.current >= hunk->diff_text_range.end)
805     {
806       /* We're past the range. Indicate that no bytes can be read. */
807       *eof = TRUE;
808       *eol = NULL;
809       *stringbuf = svn_stringbuf_create_empty(result_pool);
810       return SVN_NO_ERROR;
811     }
812 
813   SVN_ERR(svn_io_file_get_offset(&pos, hunk->apr_file, scratch_pool));
814   SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET,
815                            &hunk->diff_text_range.current, scratch_pool));
816   max_len = hunk->diff_text_range.end - hunk->diff_text_range.current;
817   SVN_ERR(svn_io_file_readline(hunk->apr_file, &line, eol, eof, max_len,
818                                result_pool,
819                    scratch_pool));
820   SVN_ERR(svn_io_file_get_offset(&hunk->diff_text_range.current,
821                                  hunk->apr_file, scratch_pool));
822 
823   if (*eof && !*eol && *line->data)
824     {
825       /* Ok, we miss a final EOL in the patch file, but didn't see a
826           no eol marker line.
827 
828           We should report that we had an EOL or the patch code will
829           misbehave (and it knows nothing about no eol markers) */
830 
831       if (eol != &eol_p)
832         {
833           /* Lets pick the first eol we find in our patch file */
834           apr_off_t start = 0;
835           svn_stringbuf_t *str;
836 
837           SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET, &start,
838                                    scratch_pool));
839 
840           SVN_ERR(svn_io_file_readline(hunk->apr_file, &str, eol, NULL,
841                                        APR_SIZE_MAX,
842                                        scratch_pool, scratch_pool));
843 
844           /* Every patch file that has hunks has at least one EOL*/
845           SVN_ERR_ASSERT(*eol != NULL);
846         }
847 
848       *eof = FALSE;
849 
850       /* Fall through to seek back to the right location */
851     }
852 
853   SVN_ERR(svn_io_file_seek(hunk->apr_file, APR_SET, &pos, scratch_pool));
854 
855   if (hunk->patch->reverse)
856     {
857       if (line->data[0] == '+')
858         line->data[0] = '-';
859       else if (line->data[0] == '-')
860         line->data[0] = '+';
861     }
862 
863   *stringbuf = line;
864 
865   return SVN_NO_ERROR;
866 }
867 
868 /* Parse *PROP_NAME from HEADER as the part after the INDICATOR line.
869  * Allocate *PROP_NAME in RESULT_POOL.
870  * Set *PROP_NAME to NULL if no valid property name was found. */
871 static svn_error_t *
parse_prop_name(const char ** prop_name,const char * header,const char * indicator,apr_pool_t * result_pool)872 parse_prop_name(const char **prop_name, const char *header,
873                 const char *indicator, apr_pool_t *result_pool)
874 {
875   SVN_ERR(svn_utf_cstring_to_utf8(prop_name,
876                                   header + strlen(indicator),
877                                   result_pool));
878   if (**prop_name == '\0')
879     *prop_name = NULL;
880   else if (! svn_prop_name_is_valid(*prop_name))
881     {
882       svn_stringbuf_t *buf = svn_stringbuf_create(*prop_name, result_pool);
883       svn_stringbuf_strip_whitespace(buf);
884       *prop_name = (svn_prop_name_is_valid(buf->data) ? buf->data : NULL);
885     }
886 
887   return SVN_NO_ERROR;
888 }
889 
890 
891 /* A helper function to parse svn:mergeinfo diffs.
892  *
893  * These diffs use a special pretty-print format, for instance:
894  *
895  * Added: svn:mergeinfo
896  * ## -0,0 +0,1 ##
897  *   Merged /trunk:r2-3
898  *
899  * The hunk header has the following format:
900  * ## -0,NUMBER_OF_REVERSE_MERGES +0,NUMBER_OF_FORWARD_MERGES ##
901  *
902  * At this point, the number of reverse merges has already been
903  * parsed into HUNK->ORIGINAL_LENGTH, and the number of forward
904  * merges has been parsed into HUNK->MODIFIED_LENGTH.
905  *
906  * The header is followed by a list of mergeinfo, one path per line.
907  * This function parses such lines. Lines describing reverse merges
908  * appear first, and then all lines describing forward merges appear.
909  *
910  * Parts of the line are affected by i18n. The words 'Merged'
911  * and 'Reverse-merged' can appear in any language and at any
912  * position within the line. We can only assume that a leading
913  * '/' starts the merge source path, the path is followed by
914  * ":r", which in turn is followed by a mergeinfo revision range,
915  *  which is terminated by whitespace or end-of-string.
916  *
917  * If the current line meets the above criteria and we're able
918  * to parse valid mergeinfo from it, the resulting mergeinfo
919  * is added to patch->mergeinfo or patch->reverse_mergeinfo,
920  * and we proceed to the next line.
921  */
922 static svn_error_t *
parse_mergeinfo(svn_boolean_t * found_mergeinfo,svn_stringbuf_t * line,svn_diff_hunk_t * hunk,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)923 parse_mergeinfo(svn_boolean_t *found_mergeinfo,
924                 svn_stringbuf_t *line,
925                 svn_diff_hunk_t *hunk,
926                 svn_patch_t *patch,
927                 apr_pool_t *result_pool,
928                 apr_pool_t *scratch_pool)
929 {
930   char *slash = strchr(line->data, '/');
931   char *colon = strrchr(line->data, ':');
932 
933   *found_mergeinfo = FALSE;
934 
935   if (slash && colon && colon[1] == 'r' && slash < colon)
936     {
937       svn_stringbuf_t *input;
938       svn_mergeinfo_t mergeinfo = NULL;
939       char *s;
940       svn_error_t *err;
941 
942       input = svn_stringbuf_create_ensure(line->len, scratch_pool);
943 
944       /* Copy the merge source path + colon */
945       s = slash;
946       while (s <= colon)
947         {
948           svn_stringbuf_appendbyte(input, *s);
949           s++;
950         }
951 
952       /* skip 'r' after colon */
953       s++;
954 
955       /* Copy the revision range. */
956       while (s < line->data + line->len)
957         {
958           if (svn_ctype_isspace(*s))
959             break;
960           svn_stringbuf_appendbyte(input, *s);
961           s++;
962         }
963 
964       err = svn_mergeinfo_parse(&mergeinfo, input->data, result_pool);
965       if (err && err->apr_err == SVN_ERR_MERGEINFO_PARSE_ERROR)
966         {
967           svn_error_clear(err);
968           mergeinfo = NULL;
969         }
970       else
971         SVN_ERR(err);
972 
973       if (mergeinfo)
974         {
975           if (hunk->original_length > 0) /* reverse merges */
976             {
977               if (patch->reverse)
978                 {
979                   if (patch->mergeinfo == NULL)
980                     patch->mergeinfo = mergeinfo;
981                   else
982                     SVN_ERR(svn_mergeinfo_merge2(patch->mergeinfo,
983                                                  mergeinfo,
984                                                  result_pool,
985                                                  scratch_pool));
986                 }
987               else
988                 {
989                   if (patch->reverse_mergeinfo == NULL)
990                     patch->reverse_mergeinfo = mergeinfo;
991                   else
992                     SVN_ERR(svn_mergeinfo_merge2(patch->reverse_mergeinfo,
993                                                  mergeinfo,
994                                                  result_pool,
995                                                  scratch_pool));
996                 }
997               hunk->original_length--;
998             }
999           else if (hunk->modified_length > 0) /* forward merges */
1000             {
1001               if (patch->reverse)
1002                 {
1003                   if (patch->reverse_mergeinfo == NULL)
1004                     patch->reverse_mergeinfo = mergeinfo;
1005                   else
1006                     SVN_ERR(svn_mergeinfo_merge2(patch->reverse_mergeinfo,
1007                                                  mergeinfo,
1008                                                  result_pool,
1009                                                  scratch_pool));
1010                 }
1011               else
1012                 {
1013                   if (patch->mergeinfo == NULL)
1014                     patch->mergeinfo = mergeinfo;
1015                   else
1016                     SVN_ERR(svn_mergeinfo_merge2(patch->mergeinfo,
1017                                                  mergeinfo,
1018                                                  result_pool,
1019                                                  scratch_pool));
1020                 }
1021               hunk->modified_length--;
1022             }
1023 
1024           *found_mergeinfo = TRUE;
1025         }
1026     }
1027 
1028   return SVN_NO_ERROR;
1029 }
1030 
1031 /* Return the next *HUNK from a PATCH in APR_FILE.
1032  * If no hunk can be found, set *HUNK to NULL.
1033  * Set IS_PROPERTY to TRUE if we have a property hunk. If the returned HUNK
1034  * is the first belonging to a certain property, then PROP_NAME and
1035  * PROP_OPERATION will be set too. If we have a text hunk, PROP_NAME will be
1036  * NULL.  If IGNORE_WHITESPACE is TRUE, lines without leading spaces will be
1037  * treated as context lines.  Allocate results in RESULT_POOL.
1038  * Use SCRATCH_POOL for all other allocations. */
1039 static svn_error_t *
parse_next_hunk(svn_diff_hunk_t ** hunk,svn_boolean_t * is_property,const char ** prop_name,svn_diff_operation_kind_t * prop_operation,svn_patch_t * patch,apr_file_t * apr_file,svn_boolean_t ignore_whitespace,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1040 parse_next_hunk(svn_diff_hunk_t **hunk,
1041                 svn_boolean_t *is_property,
1042                 const char **prop_name,
1043                 svn_diff_operation_kind_t *prop_operation,
1044                 svn_patch_t *patch,
1045                 apr_file_t *apr_file,
1046                 svn_boolean_t ignore_whitespace,
1047                 apr_pool_t *result_pool,
1048                 apr_pool_t *scratch_pool)
1049 {
1050   static const char * const minus = "--- ";
1051   static const char * const text_atat = "@@";
1052   static const char * const prop_atat = "##";
1053   svn_stringbuf_t *line;
1054   svn_boolean_t eof, in_hunk, hunk_seen;
1055   apr_off_t pos, last_line;
1056   apr_off_t start, end;
1057   apr_off_t original_end;
1058   apr_off_t modified_end;
1059   svn_boolean_t original_no_final_eol = FALSE;
1060   svn_boolean_t modified_no_final_eol = FALSE;
1061   svn_linenum_t original_lines;
1062   svn_linenum_t modified_lines;
1063   svn_linenum_t leading_context;
1064   svn_linenum_t trailing_context;
1065   svn_boolean_t changed_line_seen;
1066   enum {
1067     noise_line,
1068     original_line,
1069     modified_line,
1070     context_line
1071   } last_line_type;
1072   apr_pool_t *iterpool;
1073 
1074   *prop_operation = svn_diff_op_unchanged;
1075 
1076   /* We only set this if we have a property hunk header. */
1077   *prop_name = NULL;
1078   *is_property = FALSE;
1079 
1080   if (apr_file_eof(apr_file) == APR_EOF)
1081     {
1082       /* No more hunks here. */
1083       *hunk = NULL;
1084       return SVN_NO_ERROR;
1085     }
1086 
1087   in_hunk = FALSE;
1088   hunk_seen = FALSE;
1089   leading_context = 0;
1090   trailing_context = 0;
1091   changed_line_seen = FALSE;
1092   original_end = 0;
1093   modified_end = 0;
1094   *hunk = apr_pcalloc(result_pool, sizeof(**hunk));
1095 
1096   /* Get current seek position. */
1097   SVN_ERR(svn_io_file_get_offset(&pos, apr_file, scratch_pool));
1098 
1099   /* Start out assuming noise. */
1100   last_line_type = noise_line;
1101 
1102   iterpool = svn_pool_create(scratch_pool);
1103   do
1104     {
1105 
1106       svn_pool_clear(iterpool);
1107 
1108       /* Remember the current line's offset, and read the line. */
1109       last_line = pos;
1110       SVN_ERR(svn_io_file_readline(apr_file, &line, NULL, &eof, APR_SIZE_MAX,
1111                                    iterpool, iterpool));
1112 
1113       /* Update line offset for next iteration. */
1114       SVN_ERR(svn_io_file_get_offset(&pos, apr_file, iterpool));
1115 
1116       /* Lines starting with a backslash indicate a missing EOL:
1117        * "\ No newline at end of file" or "end of property". */
1118       if (line->data[0] == '\\')
1119         {
1120           if (in_hunk)
1121             {
1122               char eolbuf[2];
1123               apr_size_t len;
1124               apr_off_t off;
1125               apr_off_t hunk_text_end;
1126 
1127               /* Comment terminates the hunk text and says the hunk text
1128                * has no trailing EOL. Snip off trailing EOL which is part
1129                * of the patch file but not part of the hunk text. */
1130               off = last_line - 2;
1131               SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &off, iterpool));
1132               len = sizeof(eolbuf);
1133               SVN_ERR(svn_io_file_read_full2(apr_file, eolbuf, len, &len,
1134                                              &eof, iterpool));
1135               if (eolbuf[0] == '\r' && eolbuf[1] == '\n')
1136                 hunk_text_end = last_line - 2;
1137               else if (eolbuf[1] == '\n' || eolbuf[1] == '\r')
1138                 hunk_text_end = last_line - 1;
1139               else
1140                 hunk_text_end = last_line;
1141 
1142               if (last_line_type == original_line && original_end == 0)
1143                 original_end = hunk_text_end;
1144               else if (last_line_type == modified_line && modified_end == 0)
1145                 modified_end = hunk_text_end;
1146               else if (last_line_type == context_line)
1147                 {
1148                   if (original_end == 0)
1149                     original_end = hunk_text_end;
1150                   if (modified_end == 0)
1151                     modified_end = hunk_text_end;
1152                 }
1153 
1154               SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &pos, iterpool));
1155               /* Set for the type and context by using != the other type */
1156               if (last_line_type != modified_line)
1157                 original_no_final_eol = TRUE;
1158               if (last_line_type != original_line)
1159                 modified_no_final_eol = TRUE;
1160             }
1161 
1162           continue;
1163         }
1164 
1165       if (in_hunk && *is_property && *prop_name &&
1166           strcmp(*prop_name, SVN_PROP_MERGEINFO) == 0)
1167         {
1168           svn_boolean_t found_mergeinfo;
1169 
1170           SVN_ERR(parse_mergeinfo(&found_mergeinfo, line, *hunk, patch,
1171                                   result_pool, iterpool));
1172           if (found_mergeinfo)
1173             continue; /* Proceed to the next line in the svn:mergeinfo hunk. */
1174           else
1175             {
1176               /* Perhaps we can also use original_lines/modified_lines here */
1177 
1178               in_hunk = FALSE; /* On to next property */
1179             }
1180         }
1181 
1182       if (in_hunk)
1183         {
1184           char c;
1185           static const char add = '+';
1186           static const char del = '-';
1187 
1188           if (! hunk_seen)
1189             {
1190               /* We're reading the first line of the hunk, so the start
1191                * of the line just read is the hunk text's byte offset. */
1192               start = last_line;
1193             }
1194 
1195           c = line->data[0];
1196           if (c == ' '
1197               || ((original_lines > 0 && modified_lines > 0)
1198                   && (
1199                /* Tolerate chopped leading spaces on empty lines. */
1200                       (! eof && line->len == 0)
1201                /* Maybe tolerate chopped leading spaces on non-empty lines. */
1202                       || (ignore_whitespace && c != del && c != add))))
1203             {
1204               /* It's a "context" line in the hunk. */
1205               hunk_seen = TRUE;
1206               if (original_lines > 0)
1207                 original_lines--;
1208               else
1209                 {
1210                   (*hunk)->original_length++;
1211                   (*hunk)->original_fuzz++;
1212                 }
1213               if (modified_lines > 0)
1214                 modified_lines--;
1215               else
1216                 {
1217                   (*hunk)->modified_length++;
1218                   (*hunk)->modified_fuzz++;
1219                 }
1220               if (changed_line_seen)
1221                 trailing_context++;
1222               else
1223                 leading_context++;
1224               last_line_type = context_line;
1225             }
1226           else if (c == del
1227                    && (original_lines > 0 || line->data[1] != del))
1228             {
1229               /* It's a "deleted" line in the hunk. */
1230               hunk_seen = TRUE;
1231               changed_line_seen = TRUE;
1232 
1233               /* A hunk may have context in the middle. We only want
1234                  trailing lines of context. */
1235               if (trailing_context > 0)
1236                 trailing_context = 0;
1237 
1238               if (original_lines > 0)
1239                 original_lines--;
1240               else
1241                 {
1242                   (*hunk)->original_length++;
1243                   (*hunk)->original_fuzz++;
1244                 }
1245               last_line_type = original_line;
1246             }
1247           else if (c == add
1248                    && (modified_lines > 0 || line->data[1] != add))
1249             {
1250               /* It's an "added" line in the hunk. */
1251               hunk_seen = TRUE;
1252               changed_line_seen = TRUE;
1253 
1254               /* A hunk may have context in the middle. We only want
1255                  trailing lines of context. */
1256               if (trailing_context > 0)
1257                 trailing_context = 0;
1258 
1259               if (modified_lines > 0)
1260                 modified_lines--;
1261               else
1262                 {
1263                   (*hunk)->modified_length++;
1264                   (*hunk)->modified_fuzz++;
1265                 }
1266               last_line_type = modified_line;
1267             }
1268           else
1269             {
1270               if (eof)
1271                 {
1272                   /* The hunk ends at EOF. */
1273                   end = pos;
1274                 }
1275               else
1276                 {
1277                   /* The start of the current line marks the first byte
1278                    * after the hunk text. */
1279                   end = last_line;
1280                 }
1281               if (original_end == 0)
1282                 original_end = end;
1283               if (modified_end == 0)
1284                 modified_end = end;
1285               break; /* Hunk was empty or has been read. */
1286             }
1287         }
1288       else
1289         {
1290           if (starts_with(line->data, text_atat))
1291             {
1292               /* Looks like we have a hunk header, try to rip it apart. */
1293               in_hunk = parse_hunk_header(line->data, *hunk, text_atat,
1294                                           iterpool);
1295               if (in_hunk)
1296                 {
1297                   original_lines = (*hunk)->original_length;
1298                   modified_lines = (*hunk)->modified_length;
1299                   *is_property = FALSE;
1300                 }
1301               }
1302           else if (starts_with(line->data, prop_atat))
1303             {
1304               /* Looks like we have a property hunk header, try to rip it
1305                * apart. */
1306               in_hunk = parse_hunk_header(line->data, *hunk, prop_atat,
1307                                           iterpool);
1308               if (in_hunk)
1309                 {
1310                   original_lines = (*hunk)->original_length;
1311                   modified_lines = (*hunk)->modified_length;
1312                   *is_property = TRUE;
1313                 }
1314             }
1315           else if (starts_with(line->data, "Added: "))
1316             {
1317               SVN_ERR(parse_prop_name(prop_name, line->data, "Added: ",
1318                                       result_pool));
1319               if (*prop_name)
1320                 *prop_operation = (patch->reverse ? svn_diff_op_deleted
1321                                                   : svn_diff_op_added);
1322             }
1323           else if (starts_with(line->data, "Deleted: "))
1324             {
1325               SVN_ERR(parse_prop_name(prop_name, line->data, "Deleted: ",
1326                                       result_pool));
1327               if (*prop_name)
1328                 *prop_operation = (patch->reverse ? svn_diff_op_added
1329                                                   : svn_diff_op_deleted);
1330             }
1331           else if (starts_with(line->data, "Modified: "))
1332             {
1333               SVN_ERR(parse_prop_name(prop_name, line->data, "Modified: ",
1334                                       result_pool));
1335               if (*prop_name)
1336                 *prop_operation = svn_diff_op_modified;
1337             }
1338           else if (starts_with(line->data, minus)
1339                    || starts_with(line->data, "diff --git "))
1340             /* This could be a header of another patch. Bail out. */
1341             break;
1342         }
1343     }
1344   /* Check for the line length since a file may not have a newline at the
1345    * end and we depend upon the last line to be an empty one. */
1346   while (! eof || line->len > 0);
1347   svn_pool_destroy(iterpool);
1348 
1349   if (! eof)
1350     /* Rewind to the start of the line just read, so subsequent calls
1351      * to this function or svn_diff_parse_next_patch() don't end
1352      * up skipping the line -- it may contain a patch or hunk header. */
1353     SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &last_line, scratch_pool));
1354 
1355   if (hunk_seen && start < end)
1356     {
1357       /* Did we get the number of context lines announced in the header?
1358 
1359          If not... let's limit the number from the header to what we
1360          actually have, and apply a fuzz penalty */
1361       if (original_lines)
1362         {
1363           (*hunk)->original_length -= original_lines;
1364           (*hunk)->original_fuzz += original_lines;
1365         }
1366       if (modified_lines)
1367         {
1368           (*hunk)->modified_length -= modified_lines;
1369           (*hunk)->modified_fuzz += modified_lines;
1370         }
1371 
1372       (*hunk)->patch = patch;
1373       (*hunk)->apr_file = apr_file;
1374       (*hunk)->leading_context = leading_context;
1375       (*hunk)->trailing_context = trailing_context;
1376       (*hunk)->diff_text_range.start = start;
1377       (*hunk)->diff_text_range.current = start;
1378       (*hunk)->diff_text_range.end = end;
1379       (*hunk)->original_text_range.start = start;
1380       (*hunk)->original_text_range.current = start;
1381       (*hunk)->original_text_range.end = original_end;
1382       (*hunk)->modified_text_range.start = start;
1383       (*hunk)->modified_text_range.current = start;
1384       (*hunk)->modified_text_range.end = modified_end;
1385       (*hunk)->original_no_final_eol = original_no_final_eol;
1386       (*hunk)->modified_no_final_eol = modified_no_final_eol;
1387     }
1388   else
1389     /* Something went wrong, just discard the result. */
1390     *hunk = NULL;
1391 
1392   return SVN_NO_ERROR;
1393 }
1394 
1395 /* Compare function for sorting hunks after parsing.
1396  * We sort hunks by their original line offset. */
1397 static int
compare_hunks(const void * a,const void * b)1398 compare_hunks(const void *a, const void *b)
1399 {
1400   const svn_diff_hunk_t *ha = *((const svn_diff_hunk_t *const *)a);
1401   const svn_diff_hunk_t *hb = *((const svn_diff_hunk_t *const *)b);
1402 
1403   if (ha->original_start < hb->original_start)
1404     return -1;
1405   if (ha->original_start > hb->original_start)
1406     return 1;
1407   return 0;
1408 }
1409 
1410 /* Possible states of the diff header parser. */
1411 enum parse_state
1412 {
1413    state_start,             /* initial */
1414    state_git_diff_seen,     /* diff --git */
1415    state_git_tree_seen,     /* a tree operation, rather than content change */
1416    state_git_minus_seen,    /* --- /dev/null; or --- a/ */
1417    state_git_plus_seen,     /* +++ /dev/null; or +++ a/ */
1418    state_old_mode_seen,     /* old mode 100644 */
1419    state_git_mode_seen,     /* new mode 100644 */
1420    state_move_from_seen,    /* rename from foo.c */
1421    state_copy_from_seen,    /* copy from foo.c */
1422    state_minus_seen,        /* --- foo.c */
1423    state_unidiff_found,     /* valid start of a regular unidiff header */
1424    state_git_header_found,  /* valid start of a --git diff header */
1425    state_binary_patch_found /* valid start of binary patch */
1426 };
1427 
1428 /* Data type describing a valid state transition of the parser. */
1429 struct transition
1430 {
1431   const char *expected_input;
1432   enum parse_state required_state;
1433 
1434   /* A callback called upon each parser state transition. */
1435   svn_error_t *(*fn)(enum parse_state *new_state, char *input,
1436                      svn_patch_t *patch, apr_pool_t *result_pool,
1437                      apr_pool_t *scratch_pool);
1438 };
1439 
1440 /* UTF-8 encode and canonicalize the content of LINE as FILE_NAME. */
1441 static svn_error_t *
grab_filename(const char ** file_name,const char * line,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1442 grab_filename(const char **file_name, const char *line, apr_pool_t *result_pool,
1443               apr_pool_t *scratch_pool)
1444 {
1445   const char *utf8_path;
1446   const char *canon_path;
1447 
1448   /* Grab the filename and encode it in UTF-8. */
1449   /* TODO: Allow specifying the patch file's encoding.
1450    *       For now, we assume its encoding is native. */
1451   /* ### This can fail if the filename cannot be represented in the current
1452    * ### locale's encoding. */
1453   SVN_ERR(svn_utf_cstring_to_utf8(&utf8_path,
1454                                   line,
1455                                   scratch_pool));
1456 
1457   /* Canonicalize the path name. */
1458   canon_path = svn_dirent_canonicalize(utf8_path, scratch_pool);
1459 
1460   *file_name = apr_pstrdup(result_pool, canon_path);
1461 
1462   return SVN_NO_ERROR;
1463 }
1464 
1465 /* Parse the '--- ' line of a regular unidiff. */
1466 static svn_error_t *
diff_minus(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1467 diff_minus(enum parse_state *new_state, char *line, svn_patch_t *patch,
1468            apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1469 {
1470   /* If we can find a tab, it separates the filename from
1471    * the rest of the line which we can discard. */
1472   char *tab = strchr(line, '\t');
1473   if (tab)
1474     *tab = '\0';
1475 
1476   SVN_ERR(grab_filename(&patch->old_filename, line + STRLEN_LITERAL("--- "),
1477                         result_pool, scratch_pool));
1478 
1479   *new_state = state_minus_seen;
1480 
1481   return SVN_NO_ERROR;
1482 }
1483 
1484 /* Parse the '+++ ' line of a regular unidiff. */
1485 static svn_error_t *
diff_plus(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1486 diff_plus(enum parse_state *new_state, char *line, svn_patch_t *patch,
1487            apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1488 {
1489   /* If we can find a tab, it separates the filename from
1490    * the rest of the line which we can discard. */
1491   char *tab = strchr(line, '\t');
1492   if (tab)
1493     *tab = '\0';
1494 
1495   SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("+++ "),
1496                         result_pool, scratch_pool));
1497 
1498   *new_state = state_unidiff_found;
1499 
1500   return SVN_NO_ERROR;
1501 }
1502 
1503 /* Parse the first line of a git extended unidiff. */
1504 static svn_error_t *
git_start(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1505 git_start(enum parse_state *new_state, char *line, svn_patch_t *patch,
1506           apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1507 {
1508   const char *old_path_start;
1509   char *old_path_end;
1510   const char *new_path_start;
1511   const char *new_path_end;
1512   char *new_path_marker;
1513   const char *old_path_marker;
1514 
1515   /* ### Add handling of escaped paths
1516    * http://www.kernel.org/pub/software/scm/git/docs/git-diff.html:
1517    *
1518    * TAB, LF, double quote and backslash characters in pathnames are
1519    * represented as \t, \n, \" and \\, respectively. If there is need for
1520    * such substitution then the whole pathname is put in double quotes.
1521    */
1522 
1523   /* Our line should look like this: 'diff --git a/path b/path'.
1524    *
1525    * If we find any deviations from that format, we return with state reset
1526    * to start.
1527    */
1528   old_path_marker = strstr(line, " a/");
1529 
1530   if (! old_path_marker)
1531     {
1532       *new_state = state_start;
1533       return SVN_NO_ERROR;
1534     }
1535 
1536   if (! *(old_path_marker + 3))
1537     {
1538       *new_state = state_start;
1539       return SVN_NO_ERROR;
1540     }
1541 
1542   new_path_marker = strstr(old_path_marker, " b/");
1543 
1544   if (! new_path_marker)
1545     {
1546       *new_state = state_start;
1547       return SVN_NO_ERROR;
1548     }
1549 
1550   if (! *(new_path_marker + 3))
1551     {
1552       *new_state = state_start;
1553       return SVN_NO_ERROR;
1554     }
1555 
1556   /* By now, we know that we have a line on the form '--git diff a/.+ b/.+'
1557    * We only need the filenames when we have deleted or added empty
1558    * files. In those cases the old_path and new_path is identical on the
1559    * 'diff --git' line.  For all other cases we fetch the filenames from
1560    * other header lines. */
1561   old_path_start = line + STRLEN_LITERAL("diff --git a/");
1562   new_path_end = line + strlen(line);
1563   new_path_start = old_path_start;
1564 
1565   while (TRUE)
1566     {
1567       ptrdiff_t len_old;
1568       ptrdiff_t len_new;
1569 
1570       new_path_marker = strstr(new_path_start, " b/");
1571 
1572       /* No new path marker, bail out. */
1573       if (! new_path_marker)
1574         break;
1575 
1576       old_path_end = new_path_marker;
1577       new_path_start = new_path_marker + STRLEN_LITERAL(" b/");
1578 
1579       /* No path after the marker. */
1580       if (! *new_path_start)
1581         break;
1582 
1583       len_old = old_path_end - old_path_start;
1584       len_new = new_path_end - new_path_start;
1585 
1586       /* Are the paths before and after the " b/" marker the same? */
1587       if (len_old == len_new
1588           && ! strncmp(old_path_start, new_path_start, len_old))
1589         {
1590           *old_path_end = '\0';
1591           SVN_ERR(grab_filename(&patch->old_filename, old_path_start,
1592                                 result_pool, scratch_pool));
1593 
1594           SVN_ERR(grab_filename(&patch->new_filename, new_path_start,
1595                                 result_pool, scratch_pool));
1596           break;
1597         }
1598     }
1599 
1600   /* We assume that the path is only modified until we've found a 'tree'
1601    * header */
1602   patch->operation = svn_diff_op_modified;
1603 
1604   *new_state = state_git_diff_seen;
1605   return SVN_NO_ERROR;
1606 }
1607 
1608 /* Parse the '--- ' line of a git extended unidiff. */
1609 static svn_error_t *
git_minus(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1610 git_minus(enum parse_state *new_state, char *line, svn_patch_t *patch,
1611           apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1612 {
1613   /* If we can find a tab, it separates the filename from
1614    * the rest of the line which we can discard. */
1615   char *tab = strchr(line, '\t');
1616   if (tab)
1617     *tab = '\0';
1618 
1619   if (starts_with(line, "--- /dev/null"))
1620     SVN_ERR(grab_filename(&patch->old_filename, "/dev/null",
1621                           result_pool, scratch_pool));
1622   else
1623     SVN_ERR(grab_filename(&patch->old_filename, line + STRLEN_LITERAL("--- a/"),
1624                           result_pool, scratch_pool));
1625 
1626   *new_state = state_git_minus_seen;
1627   return SVN_NO_ERROR;
1628 }
1629 
1630 /* Parse the '+++ ' line of a git extended unidiff. */
1631 static svn_error_t *
git_plus(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1632 git_plus(enum parse_state *new_state, char *line, svn_patch_t *patch,
1633           apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1634 {
1635   /* If we can find a tab, it separates the filename from
1636    * the rest of the line which we can discard. */
1637   char *tab = strchr(line, '\t');
1638   if (tab)
1639     *tab = '\0';
1640 
1641   if (starts_with(line, "+++ /dev/null"))
1642     SVN_ERR(grab_filename(&patch->new_filename, "/dev/null",
1643                           result_pool, scratch_pool));
1644   else
1645     SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("+++ b/"),
1646                           result_pool, scratch_pool));
1647 
1648   *new_state = state_git_header_found;
1649   return SVN_NO_ERROR;
1650 }
1651 
1652 /* Helper for git_old_mode() and git_new_mode().  Translate the git
1653  * file mode MODE_STR into a binary "executable?" and "symlink?" state. */
1654 static svn_error_t *
parse_git_mode_bits(svn_tristate_t * executable_p,svn_tristate_t * symlink_p,const char * mode_str)1655 parse_git_mode_bits(svn_tristate_t *executable_p,
1656                     svn_tristate_t *symlink_p,
1657                     const char *mode_str)
1658 {
1659   apr_uint64_t mode;
1660   SVN_ERR(svn_cstring_strtoui64(&mode, mode_str,
1661                                 0 /* min */,
1662                                 0777777 /* max: six octal digits */,
1663                                 010 /* radix (octal) */));
1664 
1665   /* Note: 0644 and 0755 are the only modes that can occur for plain files.
1666    * We deliberately choose to parse only those values: we are strict in what
1667    * we accept _and_ in what we produce.
1668    *
1669    * (Having said that, though, we could consider relaxing the parser to also
1670    * map
1671    *     (mode & 0111) == 0000 -> svn_tristate_false
1672    *     (mode & 0111) == 0111 -> svn_tristate_true
1673    *        [anything else]    -> svn_tristate_unknown
1674    * .)
1675    */
1676 
1677   switch (mode & 0777)
1678     {
1679       case 0644:
1680         *executable_p = svn_tristate_false;
1681         break;
1682 
1683       case 0755:
1684         *executable_p = svn_tristate_true;
1685         break;
1686 
1687       default:
1688         /* Ignore unknown values. */
1689         *executable_p = svn_tristate_unknown;
1690         break;
1691     }
1692 
1693   switch (mode & 0170000 /* S_IFMT */)
1694     {
1695       case 0120000: /* S_IFLNK */
1696         *symlink_p = svn_tristate_true;
1697         break;
1698 
1699       case 0100000: /* S_IFREG */
1700       case 0040000: /* S_IFDIR */
1701         *symlink_p = svn_tristate_false;
1702         break;
1703 
1704       default:
1705         /* Ignore unknown values.
1706            (Including those generated by Subversion <= 1.9) */
1707         *symlink_p = svn_tristate_unknown;
1708         break;
1709     }
1710 
1711   return SVN_NO_ERROR;
1712 }
1713 
1714 /* Parse the 'old mode ' line of a git extended unidiff. */
1715 static svn_error_t *
git_old_mode(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1716 git_old_mode(enum parse_state *new_state, char *line, svn_patch_t *patch,
1717              apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1718 {
1719   SVN_ERR(parse_git_mode_bits(&patch->old_executable_bit,
1720                               &patch->old_symlink_bit,
1721                               line + STRLEN_LITERAL("old mode ")));
1722 
1723 #ifdef SVN_DEBUG
1724   /* If this assert trips, the "old mode" is neither ...644 nor ...755 . */
1725   SVN_ERR_ASSERT(patch->old_executable_bit != svn_tristate_unknown);
1726 #endif
1727 
1728   *new_state = state_old_mode_seen;
1729   return SVN_NO_ERROR;
1730 }
1731 
1732 /* Parse the 'new mode ' line of a git extended unidiff. */
1733 static svn_error_t *
git_new_mode(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1734 git_new_mode(enum parse_state *new_state, char *line, svn_patch_t *patch,
1735              apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1736 {
1737   SVN_ERR(parse_git_mode_bits(&patch->new_executable_bit,
1738                               &patch->new_symlink_bit,
1739                               line + STRLEN_LITERAL("new mode ")));
1740 
1741 #ifdef SVN_DEBUG
1742   /* If this assert trips, the "old mode" is neither ...644 nor ...755 . */
1743   SVN_ERR_ASSERT(patch->new_executable_bit != svn_tristate_unknown);
1744 #endif
1745 
1746   /* Don't touch patch->operation. */
1747 
1748   *new_state = state_git_mode_seen;
1749   return SVN_NO_ERROR;
1750 }
1751 
1752 static svn_error_t *
git_index(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1753 git_index(enum parse_state *new_state, char *line, svn_patch_t *patch,
1754           apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1755 {
1756   /* We either have something like "index 33e5b38..0000000" (which we just
1757      ignore as we are not interested in git specific shas) or something like
1758      "index 33e5b38..0000000 120000" which tells us the mode, that isn't
1759      changed by applying this patch.
1760 
1761      If the mode would have changed then we would see 'old mode' and 'new mode'
1762      lines.
1763   */
1764   line = strchr(line + STRLEN_LITERAL("index "), ' ');
1765 
1766   if (line && patch->new_executable_bit == svn_tristate_unknown
1767            && patch->new_symlink_bit == svn_tristate_unknown
1768            && patch->operation != svn_diff_op_added
1769            && patch->operation != svn_diff_op_deleted)
1770     {
1771       SVN_ERR(parse_git_mode_bits(&patch->new_executable_bit,
1772                                   &patch->new_symlink_bit,
1773                                   line + 1));
1774 
1775       /* There is no change.. so set the old values to the new values */
1776       patch->old_executable_bit = patch->new_executable_bit;
1777       patch->old_symlink_bit = patch->new_symlink_bit;
1778     }
1779 
1780   /* This function doesn't change the state! */
1781   /* *new_state = *new_state */
1782   return SVN_NO_ERROR;
1783 }
1784 
1785 /* Parse the 'rename from ' line of a git extended unidiff. */
1786 static svn_error_t *
git_move_from(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1787 git_move_from(enum parse_state *new_state, char *line, svn_patch_t *patch,
1788               apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1789 {
1790   SVN_ERR(grab_filename(&patch->old_filename,
1791                         line + STRLEN_LITERAL("rename from "),
1792                         result_pool, scratch_pool));
1793 
1794   *new_state = state_move_from_seen;
1795   return SVN_NO_ERROR;
1796 }
1797 
1798 /* Parse the 'rename to ' line of a git extended unidiff. */
1799 static svn_error_t *
git_move_to(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1800 git_move_to(enum parse_state *new_state, char *line, svn_patch_t *patch,
1801             apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1802 {
1803   SVN_ERR(grab_filename(&patch->new_filename,
1804                         line + STRLEN_LITERAL("rename to "),
1805                         result_pool, scratch_pool));
1806 
1807   patch->operation = svn_diff_op_moved;
1808 
1809   *new_state = state_git_tree_seen;
1810   return SVN_NO_ERROR;
1811 }
1812 
1813 /* Parse the 'copy from ' line of a git extended unidiff. */
1814 static svn_error_t *
git_copy_from(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1815 git_copy_from(enum parse_state *new_state, char *line, svn_patch_t *patch,
1816               apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1817 {
1818   SVN_ERR(grab_filename(&patch->old_filename,
1819                         line + STRLEN_LITERAL("copy from "),
1820                         result_pool, scratch_pool));
1821 
1822   *new_state = state_copy_from_seen;
1823   return SVN_NO_ERROR;
1824 }
1825 
1826 /* Parse the 'copy to ' line of a git extended unidiff. */
1827 static svn_error_t *
git_copy_to(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1828 git_copy_to(enum parse_state *new_state, char *line, svn_patch_t *patch,
1829             apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1830 {
1831   SVN_ERR(grab_filename(&patch->new_filename, line + STRLEN_LITERAL("copy to "),
1832                         result_pool, scratch_pool));
1833 
1834   patch->operation = svn_diff_op_copied;
1835 
1836   *new_state = state_git_tree_seen;
1837   return SVN_NO_ERROR;
1838 }
1839 
1840 /* Parse the 'new file ' line of a git extended unidiff. */
1841 static svn_error_t *
git_new_file(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1842 git_new_file(enum parse_state *new_state, char *line, svn_patch_t *patch,
1843              apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1844 {
1845   SVN_ERR(parse_git_mode_bits(&patch->new_executable_bit,
1846                               &patch->new_symlink_bit,
1847                               line + STRLEN_LITERAL("new file mode ")));
1848 
1849   patch->operation = svn_diff_op_added;
1850 
1851   /* Filename already retrieved from diff --git header. */
1852 
1853   *new_state = state_git_tree_seen;
1854   return SVN_NO_ERROR;
1855 }
1856 
1857 /* Parse the 'deleted file ' line of a git extended unidiff. */
1858 static svn_error_t *
git_deleted_file(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1859 git_deleted_file(enum parse_state *new_state, char *line, svn_patch_t *patch,
1860                  apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1861 {
1862   SVN_ERR(parse_git_mode_bits(&patch->old_executable_bit,
1863                               &patch->old_symlink_bit,
1864                               line + STRLEN_LITERAL("deleted file mode ")));
1865 
1866   patch->operation = svn_diff_op_deleted;
1867 
1868   /* Filename already retrieved from diff --git header. */
1869 
1870   *new_state = state_git_tree_seen;
1871   return SVN_NO_ERROR;
1872 }
1873 
1874 /* Parse the 'GIT binary patch' header */
1875 static svn_error_t *
binary_patch_start(enum parse_state * new_state,char * line,svn_patch_t * patch,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1876 binary_patch_start(enum parse_state *new_state, char *line, svn_patch_t *patch,
1877              apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1878 {
1879   *new_state = state_binary_patch_found;
1880   return SVN_NO_ERROR;
1881 }
1882 
1883 
1884 /* Add a HUNK associated with the property PROP_NAME to PATCH. */
1885 static svn_error_t *
add_property_hunk(svn_patch_t * patch,const char * prop_name,svn_diff_hunk_t * hunk,svn_diff_operation_kind_t operation,apr_pool_t * result_pool)1886 add_property_hunk(svn_patch_t *patch, const char *prop_name,
1887                   svn_diff_hunk_t *hunk, svn_diff_operation_kind_t operation,
1888                   apr_pool_t *result_pool)
1889 {
1890   svn_prop_patch_t *prop_patch;
1891 
1892   prop_patch = svn_hash_gets(patch->prop_patches, prop_name);
1893 
1894   if (! prop_patch)
1895     {
1896       prop_patch = apr_palloc(result_pool, sizeof(svn_prop_patch_t));
1897       prop_patch->name = prop_name;
1898       prop_patch->operation = operation;
1899       prop_patch->hunks = apr_array_make(result_pool, 1,
1900                                          sizeof(svn_diff_hunk_t *));
1901 
1902       svn_hash_sets(patch->prop_patches, prop_name, prop_patch);
1903     }
1904 
1905   APR_ARRAY_PUSH(prop_patch->hunks, svn_diff_hunk_t *) = hunk;
1906 
1907   return SVN_NO_ERROR;
1908 }
1909 
1910 struct svn_patch_file_t
1911 {
1912   /* The APR file handle to the patch file. */
1913   apr_file_t *apr_file;
1914 
1915   /* The file offset at which the next patch is expected. */
1916   apr_off_t next_patch_offset;
1917 };
1918 
1919 svn_error_t *
svn_diff_open_patch_file(svn_patch_file_t ** patch_file,const char * local_abspath,apr_pool_t * result_pool)1920 svn_diff_open_patch_file(svn_patch_file_t **patch_file,
1921                          const char *local_abspath,
1922                          apr_pool_t *result_pool)
1923 {
1924   svn_patch_file_t *p;
1925 
1926   p = apr_palloc(result_pool, sizeof(*p));
1927   SVN_ERR(svn_io_file_open(&p->apr_file, local_abspath,
1928                            APR_READ | APR_BUFFERED, APR_OS_DEFAULT,
1929                            result_pool));
1930   p->next_patch_offset = 0;
1931   *patch_file = p;
1932 
1933   return SVN_NO_ERROR;
1934 }
1935 
1936 /* Parse hunks from APR_FILE and store them in PATCH->HUNKS.
1937  * Parsing stops if no valid next hunk can be found.
1938  * If IGNORE_WHITESPACE is TRUE, lines without
1939  * leading spaces will be treated as context lines.
1940  * Allocate results in RESULT_POOL.
1941  * Use SCRATCH_POOL for temporary allocations. */
1942 static svn_error_t *
parse_hunks(svn_patch_t * patch,apr_file_t * apr_file,svn_boolean_t ignore_whitespace,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1943 parse_hunks(svn_patch_t *patch, apr_file_t *apr_file,
1944             svn_boolean_t ignore_whitespace,
1945             apr_pool_t *result_pool, apr_pool_t *scratch_pool)
1946 {
1947   svn_diff_hunk_t *hunk;
1948   svn_boolean_t is_property;
1949   const char *last_prop_name;
1950   const char *prop_name;
1951   svn_diff_operation_kind_t prop_operation;
1952   apr_pool_t *iterpool;
1953 
1954   last_prop_name = NULL;
1955 
1956   patch->hunks = apr_array_make(result_pool, 10, sizeof(svn_diff_hunk_t *));
1957   patch->prop_patches = apr_hash_make(result_pool);
1958   iterpool = svn_pool_create(scratch_pool);
1959   do
1960     {
1961       svn_pool_clear(iterpool);
1962 
1963       SVN_ERR(parse_next_hunk(&hunk, &is_property, &prop_name, &prop_operation,
1964                               patch, apr_file, ignore_whitespace, result_pool,
1965                               iterpool));
1966 
1967       if (hunk && is_property)
1968         {
1969           if (! prop_name)
1970             prop_name = last_prop_name;
1971           else
1972             last_prop_name = prop_name;
1973 
1974           /* Skip svn:mergeinfo properties.
1975            * Mergeinfo data cannot be represented as a hunk and
1976            * is therefore stored in PATCH itself. */
1977           if (strcmp(prop_name, SVN_PROP_MERGEINFO) == 0)
1978             continue;
1979 
1980           SVN_ERR(add_property_hunk(patch, prop_name, hunk, prop_operation,
1981                                     result_pool));
1982         }
1983       else if (hunk)
1984         {
1985           APR_ARRAY_PUSH(patch->hunks, svn_diff_hunk_t *) = hunk;
1986           last_prop_name = NULL;
1987         }
1988 
1989     }
1990   while (hunk);
1991   svn_pool_destroy(iterpool);
1992 
1993   return SVN_NO_ERROR;
1994 }
1995 
1996 static svn_error_t *
parse_binary_patch(svn_patch_t * patch,apr_file_t * apr_file,svn_boolean_t reverse,apr_pool_t * result_pool,apr_pool_t * scratch_pool)1997 parse_binary_patch(svn_patch_t *patch, apr_file_t *apr_file,
1998                    svn_boolean_t reverse,
1999                    apr_pool_t *result_pool, apr_pool_t *scratch_pool)
2000 {
2001   apr_pool_t *iterpool = svn_pool_create(scratch_pool);
2002   apr_off_t pos, last_line;
2003   svn_stringbuf_t *line;
2004   svn_boolean_t eof = FALSE;
2005   svn_diff_binary_patch_t *bpatch = apr_pcalloc(result_pool, sizeof(*bpatch));
2006   svn_boolean_t in_blob = FALSE;
2007   svn_boolean_t in_src = FALSE;
2008 
2009   bpatch->apr_file = apr_file;
2010 
2011   patch->prop_patches = apr_hash_make(result_pool);
2012 
2013   SVN_ERR(svn_io_file_get_offset(&pos, apr_file, scratch_pool));
2014 
2015   while (!eof)
2016     {
2017       last_line = pos;
2018       SVN_ERR(svn_io_file_readline(apr_file, &line, NULL, &eof, APR_SIZE_MAX,
2019                                iterpool, iterpool));
2020 
2021       /* Update line offset for next iteration. */
2022       SVN_ERR(svn_io_file_get_offset(&pos, apr_file, iterpool));
2023 
2024       if (in_blob)
2025         {
2026           char c = line->data[0];
2027 
2028           /* 66 = len byte + (52/4*5) chars */
2029           if (((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
2030               && line->len <= 66
2031               && !strchr(line->data, ':')
2032               && !strchr(line->data, ' '))
2033             {
2034               /* One more blop line */
2035               if (in_src)
2036                 bpatch->src_end = pos;
2037               else
2038                 bpatch->dst_end = pos;
2039             }
2040           else if (svn_stringbuf_first_non_whitespace(line) < line->len
2041                    && !(in_src && bpatch->src_start < last_line))
2042             {
2043               break; /* Bad patch */
2044             }
2045           else if (in_src)
2046             {
2047               patch->binary_patch = bpatch; /* SUCCESS! */
2048               break;
2049             }
2050           else
2051             {
2052               in_blob = FALSE;
2053               in_src = TRUE;
2054             }
2055         }
2056       else if (starts_with(line->data, "literal "))
2057         {
2058           apr_uint64_t expanded_size;
2059           svn_error_t *err = svn_cstring_strtoui64(&expanded_size,
2060                                                    &line->data[8],
2061                                                    0, APR_UINT64_MAX, 10);
2062 
2063           if (err)
2064             {
2065               svn_error_clear(err);
2066               break;
2067             }
2068 
2069           if (in_src)
2070             {
2071               bpatch->src_start = pos;
2072               bpatch->src_filesize = expanded_size;
2073             }
2074           else
2075             {
2076               bpatch->dst_start = pos;
2077               bpatch->dst_filesize = expanded_size;
2078             }
2079           in_blob = TRUE;
2080         }
2081       else
2082         break; /* We don't support GIT deltas (yet) */
2083     }
2084   svn_pool_destroy(iterpool);
2085 
2086   if (!eof)
2087     /* Rewind to the start of the line just read, so subsequent calls
2088      * don't end up skipping the line. It may contain a patch or hunk header.*/
2089     SVN_ERR(svn_io_file_seek(apr_file, APR_SET, &last_line, scratch_pool));
2090   else if (in_src
2091            && ((bpatch->src_end > bpatch->src_start) || !bpatch->src_filesize))
2092     {
2093       patch->binary_patch = bpatch; /* SUCCESS */
2094     }
2095 
2096   /* Reverse patch if requested */
2097   if (reverse && patch->binary_patch)
2098     {
2099       apr_off_t tmp_start = bpatch->src_start;
2100       apr_off_t tmp_end = bpatch->src_end;
2101       svn_filesize_t tmp_filesize = bpatch->src_filesize;
2102 
2103       bpatch->src_start = bpatch->dst_start;
2104       bpatch->src_end = bpatch->dst_end;
2105       bpatch->src_filesize = bpatch->dst_filesize;
2106 
2107       bpatch->dst_start = tmp_start;
2108       bpatch->dst_end = tmp_end;
2109       bpatch->dst_filesize = tmp_filesize;
2110     }
2111 
2112   return SVN_NO_ERROR;
2113 }
2114 
2115 /* State machine for the diff header parser.
2116  * Expected Input   Required state          Function to call */
2117 static struct transition transitions[] =
2118 {
2119   {"--- ",              state_start,            diff_minus},
2120   {"+++ ",              state_minus_seen,       diff_plus},
2121 
2122   {"diff --git",        state_start,            git_start},
2123   {"--- a/",            state_git_diff_seen,    git_minus},
2124   {"--- a/",            state_git_mode_seen,    git_minus},
2125   {"--- a/",            state_git_tree_seen,    git_minus},
2126   {"--- /dev/null",     state_git_mode_seen,    git_minus},
2127   {"--- /dev/null",     state_git_tree_seen,    git_minus},
2128   {"+++ b/",            state_git_minus_seen,   git_plus},
2129   {"+++ /dev/null",     state_git_minus_seen,   git_plus},
2130 
2131   {"old mode ",         state_git_diff_seen,    git_old_mode},
2132   {"new mode ",         state_old_mode_seen,    git_new_mode},
2133 
2134   {"rename from ",      state_git_diff_seen,    git_move_from},
2135   {"rename from ",      state_git_mode_seen,    git_move_from},
2136   {"rename to ",        state_move_from_seen,   git_move_to},
2137 
2138   {"copy from ",        state_git_diff_seen,    git_copy_from},
2139   {"copy from ",        state_git_mode_seen,    git_copy_from},
2140   {"copy to ",          state_copy_from_seen,   git_copy_to},
2141 
2142   {"new file ",         state_git_diff_seen,    git_new_file},
2143 
2144   {"deleted file ",     state_git_diff_seen,    git_deleted_file},
2145 
2146   {"index ",            state_git_diff_seen,    git_index},
2147   {"index ",            state_git_tree_seen,    git_index},
2148   {"index ",            state_git_mode_seen,    git_index},
2149 
2150   {"GIT binary patch",  state_git_diff_seen,    binary_patch_start},
2151   {"GIT binary patch",  state_git_tree_seen,    binary_patch_start},
2152   {"GIT binary patch",  state_git_mode_seen,    binary_patch_start},
2153 };
2154 
2155 svn_error_t *
svn_diff_parse_next_patch(svn_patch_t ** patch_p,svn_patch_file_t * patch_file,svn_boolean_t reverse,svn_boolean_t ignore_whitespace,apr_pool_t * result_pool,apr_pool_t * scratch_pool)2156 svn_diff_parse_next_patch(svn_patch_t **patch_p,
2157                           svn_patch_file_t *patch_file,
2158                           svn_boolean_t reverse,
2159                           svn_boolean_t ignore_whitespace,
2160                           apr_pool_t *result_pool,
2161                           apr_pool_t *scratch_pool)
2162 {
2163   apr_off_t pos, last_line;
2164   svn_boolean_t eof;
2165   svn_boolean_t line_after_tree_header_read = FALSE;
2166   apr_pool_t *iterpool;
2167   svn_patch_t *patch;
2168   enum parse_state state = state_start;
2169 
2170   if (apr_file_eof(patch_file->apr_file) == APR_EOF)
2171     {
2172       /* No more patches here. */
2173       *patch_p = NULL;
2174       return SVN_NO_ERROR;
2175     }
2176 
2177   patch = apr_pcalloc(result_pool, sizeof(*patch));
2178   patch->old_executable_bit = svn_tristate_unknown;
2179   patch->new_executable_bit = svn_tristate_unknown;
2180   patch->old_symlink_bit = svn_tristate_unknown;
2181   patch->new_symlink_bit = svn_tristate_unknown;
2182 
2183   pos = patch_file->next_patch_offset;
2184   SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &pos, scratch_pool));
2185 
2186   iterpool = svn_pool_create(scratch_pool);
2187   do
2188     {
2189       svn_stringbuf_t *line;
2190       svn_boolean_t valid_header_line = FALSE;
2191       int i;
2192 
2193       svn_pool_clear(iterpool);
2194 
2195       /* Remember the current line's offset, and read the line. */
2196       last_line = pos;
2197       SVN_ERR(svn_io_file_readline(patch_file->apr_file, &line, NULL, &eof,
2198                                    APR_SIZE_MAX, iterpool, iterpool));
2199 
2200       if (! eof)
2201         {
2202           /* Update line offset for next iteration. */
2203           SVN_ERR(svn_io_file_get_offset(&pos, patch_file->apr_file,
2204                                          iterpool));
2205         }
2206 
2207       /* Run the state machine. */
2208       for (i = 0; i < (sizeof(transitions) / sizeof(transitions[0])); i++)
2209         {
2210           if (starts_with(line->data, transitions[i].expected_input)
2211               && state == transitions[i].required_state)
2212             {
2213               SVN_ERR(transitions[i].fn(&state, line->data, patch,
2214                                         result_pool, iterpool));
2215               valid_header_line = TRUE;
2216               break;
2217             }
2218         }
2219 
2220       if (state == state_unidiff_found
2221           || state == state_git_header_found
2222           || state == state_binary_patch_found)
2223         {
2224           /* We have a valid diff header, yay! */
2225           break;
2226         }
2227       else if ((state == state_git_tree_seen || state == state_git_mode_seen)
2228                && line_after_tree_header_read
2229                && !valid_header_line)
2230         {
2231           /* We have a valid diff header for a patch with only tree changes.
2232            * Rewind to the start of the line just read, so subsequent calls
2233            * to this function don't end up skipping the line -- it may
2234            * contain a patch. */
2235           SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &last_line,
2236                                    scratch_pool));
2237           break;
2238         }
2239       else if (state == state_git_tree_seen
2240                || state == state_git_mode_seen)
2241         {
2242           line_after_tree_header_read = TRUE;
2243         }
2244       else if (! valid_header_line && state != state_start
2245                && state != state_git_diff_seen)
2246         {
2247           /* We've encountered an invalid diff header.
2248            *
2249            * Rewind to the start of the line just read - it may be a new
2250            * header that begins there. */
2251           SVN_ERR(svn_io_file_seek(patch_file->apr_file, APR_SET, &last_line,
2252                                    scratch_pool));
2253           state = state_start;
2254         }
2255 
2256     }
2257   while (! eof);
2258 
2259   patch->reverse = reverse;
2260   if (reverse)
2261     {
2262       const char *temp;
2263       svn_tristate_t ts_tmp;
2264 
2265       temp = patch->old_filename;
2266       patch->old_filename = patch->new_filename;
2267       patch->new_filename = temp;
2268 
2269       switch (patch->operation)
2270         {
2271           case svn_diff_op_added:
2272             patch->operation = svn_diff_op_deleted;
2273             break;
2274           case svn_diff_op_deleted:
2275             patch->operation = svn_diff_op_added;
2276             break;
2277 
2278           case svn_diff_op_modified:
2279             break; /* Stays modified. */
2280 
2281           case svn_diff_op_copied:
2282           case svn_diff_op_moved:
2283             break; /* Stays copied or moved, just in the other direction. */
2284           case svn_diff_op_unchanged:
2285             break; /* Stays unchanged, of course. */
2286         }
2287 
2288       ts_tmp = patch->old_executable_bit;
2289       patch->old_executable_bit = patch->new_executable_bit;
2290       patch->new_executable_bit = ts_tmp;
2291 
2292       ts_tmp = patch->old_symlink_bit;
2293       patch->old_symlink_bit = patch->new_symlink_bit;
2294       patch->new_symlink_bit = ts_tmp;
2295     }
2296 
2297   if (patch->old_filename == NULL || patch->new_filename == NULL)
2298     {
2299       /* Something went wrong, just discard the result. */
2300       patch = NULL;
2301     }
2302   else
2303     {
2304       if (state == state_binary_patch_found)
2305         {
2306           SVN_ERR(parse_binary_patch(patch, patch_file->apr_file, reverse,
2307                                      result_pool, iterpool));
2308           /* And fall through in property parsing */
2309         }
2310 
2311       SVN_ERR(parse_hunks(patch, patch_file->apr_file, ignore_whitespace,
2312                           result_pool, iterpool));
2313     }
2314 
2315   svn_pool_destroy(iterpool);
2316 
2317   SVN_ERR(svn_io_file_get_offset(&patch_file->next_patch_offset,
2318                                  patch_file->apr_file, scratch_pool));
2319 
2320   if (patch && patch->hunks)
2321     {
2322       /* Usually, hunks appear in the patch sorted by their original line
2323        * offset. But just in case they weren't parsed in this order for
2324        * some reason, we sort them so that our caller can assume that hunks
2325        * are sorted as if parsed from a usual patch. */
2326       svn_sort__array(patch->hunks, compare_hunks);
2327     }
2328 
2329   *patch_p = patch;
2330   return SVN_NO_ERROR;
2331 }
2332 
2333 svn_error_t *
svn_diff_close_patch_file(svn_patch_file_t * patch_file,apr_pool_t * scratch_pool)2334 svn_diff_close_patch_file(svn_patch_file_t *patch_file,
2335                           apr_pool_t *scratch_pool)
2336 {
2337   return svn_error_trace(svn_io_file_close(patch_file->apr_file,
2338                                            scratch_pool));
2339 }
2340