1 /*
2  * diff3.c :  routines for doing diffs
3  *
4  * ====================================================================
5  *    Licensed to the Apache Software Foundation (ASF) under one
6  *    or more contributor license agreements.  See the NOTICE file
7  *    distributed with this work for additional information
8  *    regarding copyright ownership.  The ASF licenses this file
9  *    to you under the Apache License, Version 2.0 (the
10  *    "License"); you may not use this file except in compliance
11  *    with the License.  You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  *    Unless required by applicable law or agreed to in writing,
16  *    software distributed under the License is distributed on an
17  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18  *    KIND, either express or implied.  See the License for the
19  *    specific language governing permissions and limitations
20  *    under the License.
21  * ====================================================================
22  */
23 
24 
25 #include <apr.h>
26 #include <apr_pools.h>
27 #include <apr_general.h>
28 
29 #include "svn_pools.h"
30 #include "svn_error.h"
31 #include "svn_diff.h"
32 #include "svn_sorts.h"
33 #include "svn_types.h"
34 
35 #include "diff.h"
36 
37 
38 void
svn_diff__resolve_conflict(svn_diff_t * hunk,svn_diff__position_t ** position_list1,svn_diff__position_t ** position_list2,svn_diff__token_index_t num_tokens,apr_pool_t * pool)39 svn_diff__resolve_conflict(svn_diff_t *hunk,
40                            svn_diff__position_t **position_list1,
41                            svn_diff__position_t **position_list2,
42                            svn_diff__token_index_t num_tokens,
43                            apr_pool_t *pool)
44 {
45   apr_off_t modified_start = hunk->modified_start + 1;
46   apr_off_t latest_start = hunk->latest_start + 1;
47   apr_off_t common_length;
48   apr_off_t modified_length = hunk->modified_length;
49   apr_off_t latest_length = hunk->latest_length;
50   svn_diff__position_t *start_position[2];
51   svn_diff__position_t *position[2];
52   svn_diff__token_index_t *token_counts[2];
53   svn_diff__lcs_t *lcs = NULL;
54   svn_diff__lcs_t **lcs_ref = &lcs;
55   svn_diff_t **diff_ref = &hunk->resolved_diff;
56   apr_pool_t *subpool;
57 
58   /* First find the starting positions for the
59    * comparison
60    */
61 
62   start_position[0] = *position_list1;
63   start_position[1] = *position_list2;
64 
65   while (start_position[0]->offset < modified_start)
66     start_position[0] = start_position[0]->next;
67 
68   while (start_position[1]->offset < latest_start)
69     start_position[1] = start_position[1]->next;
70 
71   position[0] = start_position[0];
72   position[1] = start_position[1];
73 
74   common_length = modified_length < latest_length
75                 ? modified_length : latest_length;
76 
77   while (common_length > 0
78          && position[0]->token_index == position[1]->token_index)
79     {
80       position[0] = position[0]->next;
81       position[1] = position[1]->next;
82 
83       common_length--;
84     }
85 
86   if (common_length == 0
87       && modified_length == latest_length)
88     {
89       hunk->type = svn_diff__type_diff_common;
90       hunk->resolved_diff = NULL;
91 
92       *position_list1 = position[0];
93       *position_list2 = position[1];
94 
95       return;
96     }
97 
98   hunk->type = svn_diff__type_conflict;
99 
100   /* ### If we have a conflict we can try to find the
101    * ### common parts in it by getting an lcs between
102    * ### modified (start to start + length) and
103    * ### latest (start to start + length).
104    * ### We use this lcs to create a simple diff.  Only
105    * ### where there is a diff between the two, we have
106    * ### a conflict.
107    * ### This raises a problem; several common diffs and
108    * ### conflicts can occur within the same original
109    * ### block.  This needs some thought.
110    * ###
111    * ### NB: We can use the node _pointers_ to identify
112    * ###     different tokens
113    */
114 
115   subpool = svn_pool_create(pool);
116 
117   /* Calculate how much of the two sequences was
118    * actually the same.
119    */
120   common_length = (modified_length < latest_length
121                   ? modified_length : latest_length)
122                 - common_length;
123 
124   /* If there were matching symbols at the start of
125    * both sequences, record that fact.
126    */
127   if (common_length > 0)
128     {
129       lcs = apr_palloc(subpool, sizeof(*lcs));
130       lcs->next = NULL;
131       lcs->position[0] = start_position[0];
132       lcs->position[1] = start_position[1];
133       lcs->length = common_length;
134 
135       lcs_ref = &lcs->next;
136     }
137 
138   modified_length -= common_length;
139   latest_length -= common_length;
140 
141   modified_start = start_position[0]->offset;
142   latest_start = start_position[1]->offset;
143 
144   start_position[0] = position[0];
145   start_position[1] = position[1];
146 
147   /* Create a new ring for svn_diff__lcs to grok.
148    * We can safely do this given we don't need the
149    * positions we processed anymore.
150    */
151   if (modified_length == 0)
152     {
153       *position_list1 = position[0];
154       position[0] = NULL;
155     }
156   else
157     {
158       while (--modified_length)
159         position[0] = position[0]->next;
160 
161       *position_list1 = position[0]->next;
162       position[0]->next = start_position[0];
163     }
164 
165   if (latest_length == 0)
166     {
167       *position_list2 = position[1];
168       position[1] = NULL;
169     }
170   else
171     {
172       while (--latest_length)
173         position[1] = position[1]->next;
174 
175       *position_list2 = position[1]->next;
176       position[1]->next = start_position[1];
177     }
178 
179   token_counts[0] = svn_diff__get_token_counts(position[0], num_tokens,
180                                                subpool);
181   token_counts[1] = svn_diff__get_token_counts(position[1], num_tokens,
182                                                subpool);
183 
184   *lcs_ref = svn_diff__lcs(position[0], position[1], token_counts[0],
185                            token_counts[1], num_tokens, 0, 0, subpool);
186 
187   /* Fix up the EOF lcs element in case one of
188    * the two sequences was NULL.
189    */
190   if ((*lcs_ref)->position[0]->offset == 1)
191     (*lcs_ref)->position[0] = *position_list1;
192 
193   if ((*lcs_ref)->position[1]->offset == 1)
194     (*lcs_ref)->position[1] = *position_list2;
195 
196   /* Produce the resolved diff */
197   while (1)
198     {
199       if (modified_start < lcs->position[0]->offset
200           || latest_start < lcs->position[1]->offset)
201         {
202           (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref));
203 
204           (*diff_ref)->type = svn_diff__type_conflict;
205           (*diff_ref)->original_start = hunk->original_start;
206           (*diff_ref)->original_length = hunk->original_length;
207           (*diff_ref)->modified_start = modified_start - 1;
208           (*diff_ref)->modified_length = lcs->position[0]->offset
209                                          - modified_start;
210           (*diff_ref)->latest_start = latest_start - 1;
211           (*diff_ref)->latest_length = lcs->position[1]->offset
212                                        - latest_start;
213           (*diff_ref)->resolved_diff = NULL;
214 
215           diff_ref = &(*diff_ref)->next;
216         }
217 
218       /* Detect the EOF */
219       if (lcs->length == 0)
220         break;
221 
222       modified_start = lcs->position[0]->offset;
223       latest_start = lcs->position[1]->offset;
224 
225       (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref));
226 
227       (*diff_ref)->type = svn_diff__type_diff_common;
228       (*diff_ref)->original_start = hunk->original_start;
229       (*diff_ref)->original_length = hunk->original_length;
230       (*diff_ref)->modified_start = modified_start - 1;
231       (*diff_ref)->modified_length = lcs->length;
232       (*diff_ref)->latest_start = latest_start - 1;
233       (*diff_ref)->latest_length = lcs->length;
234       (*diff_ref)->resolved_diff = NULL;
235 
236       diff_ref = &(*diff_ref)->next;
237 
238       modified_start += lcs->length;
239       latest_start += lcs->length;
240 
241       lcs = lcs->next;
242     }
243 
244   *diff_ref = NULL;
245 
246   svn_pool_destroy(subpool);
247 }
248 
249 
250 svn_error_t *
svn_diff_diff3_2(svn_diff_t ** diff,void * diff_baton,const svn_diff_fns2_t * vtable,apr_pool_t * pool)251 svn_diff_diff3_2(svn_diff_t **diff,
252                  void *diff_baton,
253                  const svn_diff_fns2_t *vtable,
254                  apr_pool_t *pool)
255 {
256   svn_diff__tree_t *tree;
257   svn_diff__position_t *position_list[3];
258   svn_diff__token_index_t num_tokens;
259   svn_diff__token_index_t *token_counts[3];
260   svn_diff_datasource_e datasource[] = {svn_diff_datasource_original,
261                                         svn_diff_datasource_modified,
262                                         svn_diff_datasource_latest};
263   svn_diff__lcs_t *lcs_om;
264   svn_diff__lcs_t *lcs_ol;
265   apr_pool_t *subpool;
266   apr_pool_t *treepool;
267   apr_off_t prefix_lines = 0;
268   apr_off_t suffix_lines = 0;
269 
270   *diff = NULL;
271 
272   subpool = svn_pool_create(pool);
273   treepool = svn_pool_create(pool);
274 
275   svn_diff__tree_create(&tree, treepool);
276 
277   SVN_ERR(vtable->datasources_open(diff_baton, &prefix_lines, &suffix_lines,
278                                    datasource, 3));
279 
280   SVN_ERR(svn_diff__get_tokens(&position_list[0],
281                                tree,
282                                diff_baton, vtable,
283                                svn_diff_datasource_original,
284                                prefix_lines,
285                                subpool));
286 
287   SVN_ERR(svn_diff__get_tokens(&position_list[1],
288                                tree,
289                                diff_baton, vtable,
290                                svn_diff_datasource_modified,
291                                prefix_lines,
292                                subpool));
293 
294   SVN_ERR(svn_diff__get_tokens(&position_list[2],
295                                tree,
296                                diff_baton, vtable,
297                                svn_diff_datasource_latest,
298                                prefix_lines,
299                                subpool));
300 
301   num_tokens = svn_diff__get_node_count(tree);
302 
303   /* Get rid of the tokens, we don't need them to calc the diff */
304   if (vtable->token_discard_all != NULL)
305     vtable->token_discard_all(diff_baton);
306 
307   /* We don't need the nodes in the tree either anymore, nor the tree itself */
308   svn_pool_destroy(treepool);
309 
310   token_counts[0] = svn_diff__get_token_counts(position_list[0], num_tokens,
311                                                subpool);
312   token_counts[1] = svn_diff__get_token_counts(position_list[1], num_tokens,
313                                                subpool);
314   token_counts[2] = svn_diff__get_token_counts(position_list[2], num_tokens,
315                                                subpool);
316 
317   /* Get the lcs for original-modified and original-latest */
318   lcs_om = svn_diff__lcs(position_list[0], position_list[1], token_counts[0],
319                          token_counts[1], num_tokens, prefix_lines,
320                          suffix_lines, subpool);
321   lcs_ol = svn_diff__lcs(position_list[0], position_list[2], token_counts[0],
322                          token_counts[2], num_tokens, prefix_lines,
323                          suffix_lines, subpool);
324 
325   /* Produce a merged diff */
326   {
327     svn_diff_t **diff_ref = diff;
328 
329     apr_off_t original_start = 1;
330     apr_off_t modified_start = 1;
331     apr_off_t latest_start = 1;
332     apr_off_t original_sync;
333     apr_off_t modified_sync;
334     apr_off_t latest_sync;
335     apr_off_t common_length;
336     apr_off_t modified_length;
337     apr_off_t latest_length;
338     svn_boolean_t is_modified;
339     svn_boolean_t is_latest;
340     svn_diff__position_t sentinel_position[2];
341 
342     /* Point the position lists to the start of the list
343      * so that common_diff/conflict detection actually is
344      * able to work.
345      */
346     if (position_list[1])
347       {
348         sentinel_position[0].next = position_list[1]->next;
349         sentinel_position[0].offset = position_list[1]->offset + 1;
350         position_list[1]->next = &sentinel_position[0];
351         position_list[1] = sentinel_position[0].next;
352       }
353     else
354       {
355         sentinel_position[0].offset = prefix_lines + 1;
356         sentinel_position[0].next = NULL;
357         position_list[1] = &sentinel_position[0];
358       }
359 
360     if (position_list[2])
361       {
362         sentinel_position[1].next = position_list[2]->next;
363         sentinel_position[1].offset = position_list[2]->offset + 1;
364         position_list[2]->next = &sentinel_position[1];
365         position_list[2] = sentinel_position[1].next;
366       }
367     else
368       {
369         sentinel_position[1].offset = prefix_lines + 1;
370         sentinel_position[1].next = NULL;
371         position_list[2] = &sentinel_position[1];
372       }
373 
374     while (1)
375       {
376         /* Find the sync points */
377         while (1)
378           {
379             if (lcs_om->position[0]->offset > lcs_ol->position[0]->offset)
380               {
381                 original_sync = lcs_om->position[0]->offset;
382 
383                 while (lcs_ol->position[0]->offset + lcs_ol->length
384                        < original_sync)
385                   lcs_ol = lcs_ol->next;
386 
387                 /* If the sync point is the EOF, and our current lcs segment
388                  * doesn't reach as far as EOF, we need to skip this segment.
389                  */
390                 if (lcs_om->length == 0 && lcs_ol->length > 0
391                     && lcs_ol->position[0]->offset + lcs_ol->length
392                        == original_sync
393                     && lcs_ol->position[1]->offset + lcs_ol->length
394                        != lcs_ol->next->position[1]->offset)
395                   lcs_ol = lcs_ol->next;
396 
397                 if (lcs_ol->position[0]->offset <= original_sync)
398                     break;
399               }
400             else
401               {
402                 original_sync = lcs_ol->position[0]->offset;
403 
404                 while (lcs_om->position[0]->offset + lcs_om->length
405                        < original_sync)
406                   lcs_om = lcs_om->next;
407 
408                 /* If the sync point is the EOF, and our current lcs segment
409                  * doesn't reach as far as EOF, we need to skip this segment.
410                  */
411                 if (lcs_ol->length == 0 && lcs_om->length > 0
412                     && lcs_om->position[0]->offset + lcs_om->length
413                        == original_sync
414                     && lcs_om->position[1]->offset + lcs_om->length
415                        != lcs_om->next->position[1]->offset)
416                   lcs_om = lcs_om->next;
417 
418                 if (lcs_om->position[0]->offset <= original_sync)
419                     break;
420               }
421           }
422 
423         modified_sync = lcs_om->position[1]->offset
424                       + (original_sync - lcs_om->position[0]->offset);
425         latest_sync = lcs_ol->position[1]->offset
426                     + (original_sync - lcs_ol->position[0]->offset);
427 
428         /* Determine what is modified, if anything */
429         is_modified = lcs_om->position[0]->offset - original_start > 0
430                       || lcs_om->position[1]->offset - modified_start > 0;
431 
432         is_latest = lcs_ol->position[0]->offset - original_start > 0
433                     || lcs_ol->position[1]->offset - latest_start > 0;
434 
435         if (is_modified || is_latest)
436           {
437             modified_length = modified_sync - modified_start;
438             latest_length = latest_sync - latest_start;
439 
440             (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref));
441 
442             (*diff_ref)->original_start = original_start - 1;
443             (*diff_ref)->original_length = original_sync - original_start;
444             (*diff_ref)->modified_start = modified_start - 1;
445             (*diff_ref)->modified_length = modified_length;
446             (*diff_ref)->latest_start = latest_start - 1;
447             (*diff_ref)->latest_length = latest_length;
448             (*diff_ref)->resolved_diff = NULL;
449 
450             if (is_modified && is_latest)
451               {
452                 svn_diff__resolve_conflict(*diff_ref,
453                                            &position_list[1],
454                                            &position_list[2],
455                                            num_tokens,
456                                            pool);
457               }
458             else if (is_modified)
459               {
460                 (*diff_ref)->type = svn_diff__type_diff_modified;
461               }
462             else
463               {
464                 (*diff_ref)->type = svn_diff__type_diff_latest;
465               }
466 
467             diff_ref = &(*diff_ref)->next;
468           }
469 
470         /* Detect EOF */
471         if (lcs_om->length == 0 || lcs_ol->length == 0)
472             break;
473 
474         modified_length = lcs_om->length
475                           - (original_sync - lcs_om->position[0]->offset);
476         latest_length = lcs_ol->length
477                         - (original_sync - lcs_ol->position[0]->offset);
478         common_length = MIN(modified_length, latest_length);
479 
480         if (common_length > 0)
481           {
482             (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref));
483 
484             (*diff_ref)->type = svn_diff__type_common;
485             (*diff_ref)->original_start = original_sync - 1;
486             (*diff_ref)->original_length = common_length;
487             (*diff_ref)->modified_start = modified_sync - 1;
488             (*diff_ref)->modified_length = common_length;
489             (*diff_ref)->latest_start = latest_sync - 1;
490             (*diff_ref)->latest_length = common_length;
491             (*diff_ref)->resolved_diff = NULL;
492 
493             diff_ref = &(*diff_ref)->next;
494           }
495 
496         /* Set the new offsets */
497         original_start = original_sync + common_length;
498         modified_start = modified_sync + common_length;
499         latest_start = latest_sync + common_length;
500 
501         /* Make it easier for diff_common/conflict detection
502            by recording last lcs start positions
503          */
504         if (position_list[1]->offset < lcs_om->position[1]->offset)
505           position_list[1] = lcs_om->position[1];
506 
507         if (position_list[2]->offset < lcs_ol->position[1]->offset)
508           position_list[2] = lcs_ol->position[1];
509 
510         /* Make sure we are pointing to lcs entries beyond
511          * the range we just processed
512          */
513         while (original_start >= lcs_om->position[0]->offset + lcs_om->length
514                && lcs_om->length > 0)
515           {
516             lcs_om = lcs_om->next;
517           }
518 
519         while (original_start >= lcs_ol->position[0]->offset + lcs_ol->length
520                && lcs_ol->length > 0)
521           {
522             lcs_ol = lcs_ol->next;
523           }
524       }
525 
526     *diff_ref = NULL;
527   }
528 
529   svn_pool_destroy(subpool);
530 
531   return SVN_NO_ERROR;
532 }
533