1 /*
2 * diff3.c : routines for doing diffs
3 *
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
20 * under the License.
21 * ====================================================================
22 */
23
24
25 #include <apr.h>
26 #include <apr_pools.h>
27 #include <apr_general.h>
28
29 #include "svn_pools.h"
30 #include "svn_error.h"
31 #include "svn_diff.h"
32 #include "svn_sorts.h"
33 #include "svn_types.h"
34
35 #include "diff.h"
36
37
38 void
svn_diff__resolve_conflict(svn_diff_t * hunk,svn_diff__position_t ** position_list1,svn_diff__position_t ** position_list2,svn_diff__token_index_t num_tokens,apr_pool_t * pool)39 svn_diff__resolve_conflict(svn_diff_t *hunk,
40 svn_diff__position_t **position_list1,
41 svn_diff__position_t **position_list2,
42 svn_diff__token_index_t num_tokens,
43 apr_pool_t *pool)
44 {
45 apr_off_t modified_start = hunk->modified_start + 1;
46 apr_off_t latest_start = hunk->latest_start + 1;
47 apr_off_t common_length;
48 apr_off_t modified_length = hunk->modified_length;
49 apr_off_t latest_length = hunk->latest_length;
50 svn_diff__position_t *start_position[2];
51 svn_diff__position_t *position[2];
52 svn_diff__token_index_t *token_counts[2];
53 svn_diff__lcs_t *lcs = NULL;
54 svn_diff__lcs_t **lcs_ref = &lcs;
55 svn_diff_t **diff_ref = &hunk->resolved_diff;
56 apr_pool_t *subpool;
57
58 /* First find the starting positions for the
59 * comparison
60 */
61
62 start_position[0] = *position_list1;
63 start_position[1] = *position_list2;
64
65 while (start_position[0]->offset < modified_start)
66 start_position[0] = start_position[0]->next;
67
68 while (start_position[1]->offset < latest_start)
69 start_position[1] = start_position[1]->next;
70
71 position[0] = start_position[0];
72 position[1] = start_position[1];
73
74 common_length = modified_length < latest_length
75 ? modified_length : latest_length;
76
77 while (common_length > 0
78 && position[0]->token_index == position[1]->token_index)
79 {
80 position[0] = position[0]->next;
81 position[1] = position[1]->next;
82
83 common_length--;
84 }
85
86 if (common_length == 0
87 && modified_length == latest_length)
88 {
89 hunk->type = svn_diff__type_diff_common;
90 hunk->resolved_diff = NULL;
91
92 *position_list1 = position[0];
93 *position_list2 = position[1];
94
95 return;
96 }
97
98 hunk->type = svn_diff__type_conflict;
99
100 /* ### If we have a conflict we can try to find the
101 * ### common parts in it by getting an lcs between
102 * ### modified (start to start + length) and
103 * ### latest (start to start + length).
104 * ### We use this lcs to create a simple diff. Only
105 * ### where there is a diff between the two, we have
106 * ### a conflict.
107 * ### This raises a problem; several common diffs and
108 * ### conflicts can occur within the same original
109 * ### block. This needs some thought.
110 * ###
111 * ### NB: We can use the node _pointers_ to identify
112 * ### different tokens
113 */
114
115 subpool = svn_pool_create(pool);
116
117 /* Calculate how much of the two sequences was
118 * actually the same.
119 */
120 common_length = (modified_length < latest_length
121 ? modified_length : latest_length)
122 - common_length;
123
124 /* If there were matching symbols at the start of
125 * both sequences, record that fact.
126 */
127 if (common_length > 0)
128 {
129 lcs = apr_palloc(subpool, sizeof(*lcs));
130 lcs->next = NULL;
131 lcs->position[0] = start_position[0];
132 lcs->position[1] = start_position[1];
133 lcs->length = common_length;
134
135 lcs_ref = &lcs->next;
136 }
137
138 modified_length -= common_length;
139 latest_length -= common_length;
140
141 modified_start = start_position[0]->offset;
142 latest_start = start_position[1]->offset;
143
144 start_position[0] = position[0];
145 start_position[1] = position[1];
146
147 /* Create a new ring for svn_diff__lcs to grok.
148 * We can safely do this given we don't need the
149 * positions we processed anymore.
150 */
151 if (modified_length == 0)
152 {
153 *position_list1 = position[0];
154 position[0] = NULL;
155 }
156 else
157 {
158 while (--modified_length)
159 position[0] = position[0]->next;
160
161 *position_list1 = position[0]->next;
162 position[0]->next = start_position[0];
163 }
164
165 if (latest_length == 0)
166 {
167 *position_list2 = position[1];
168 position[1] = NULL;
169 }
170 else
171 {
172 while (--latest_length)
173 position[1] = position[1]->next;
174
175 *position_list2 = position[1]->next;
176 position[1]->next = start_position[1];
177 }
178
179 token_counts[0] = svn_diff__get_token_counts(position[0], num_tokens,
180 subpool);
181 token_counts[1] = svn_diff__get_token_counts(position[1], num_tokens,
182 subpool);
183
184 *lcs_ref = svn_diff__lcs(position[0], position[1], token_counts[0],
185 token_counts[1], num_tokens, 0, 0, subpool);
186
187 /* Fix up the EOF lcs element in case one of
188 * the two sequences was NULL.
189 */
190 if ((*lcs_ref)->position[0]->offset == 1)
191 (*lcs_ref)->position[0] = *position_list1;
192
193 if ((*lcs_ref)->position[1]->offset == 1)
194 (*lcs_ref)->position[1] = *position_list2;
195
196 /* Produce the resolved diff */
197 while (1)
198 {
199 if (modified_start < lcs->position[0]->offset
200 || latest_start < lcs->position[1]->offset)
201 {
202 (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref));
203
204 (*diff_ref)->type = svn_diff__type_conflict;
205 (*diff_ref)->original_start = hunk->original_start;
206 (*diff_ref)->original_length = hunk->original_length;
207 (*diff_ref)->modified_start = modified_start - 1;
208 (*diff_ref)->modified_length = lcs->position[0]->offset
209 - modified_start;
210 (*diff_ref)->latest_start = latest_start - 1;
211 (*diff_ref)->latest_length = lcs->position[1]->offset
212 - latest_start;
213 (*diff_ref)->resolved_diff = NULL;
214
215 diff_ref = &(*diff_ref)->next;
216 }
217
218 /* Detect the EOF */
219 if (lcs->length == 0)
220 break;
221
222 modified_start = lcs->position[0]->offset;
223 latest_start = lcs->position[1]->offset;
224
225 (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref));
226
227 (*diff_ref)->type = svn_diff__type_diff_common;
228 (*diff_ref)->original_start = hunk->original_start;
229 (*diff_ref)->original_length = hunk->original_length;
230 (*diff_ref)->modified_start = modified_start - 1;
231 (*diff_ref)->modified_length = lcs->length;
232 (*diff_ref)->latest_start = latest_start - 1;
233 (*diff_ref)->latest_length = lcs->length;
234 (*diff_ref)->resolved_diff = NULL;
235
236 diff_ref = &(*diff_ref)->next;
237
238 modified_start += lcs->length;
239 latest_start += lcs->length;
240
241 lcs = lcs->next;
242 }
243
244 *diff_ref = NULL;
245
246 svn_pool_destroy(subpool);
247 }
248
249
250 svn_error_t *
svn_diff_diff3_2(svn_diff_t ** diff,void * diff_baton,const svn_diff_fns2_t * vtable,apr_pool_t * pool)251 svn_diff_diff3_2(svn_diff_t **diff,
252 void *diff_baton,
253 const svn_diff_fns2_t *vtable,
254 apr_pool_t *pool)
255 {
256 svn_diff__tree_t *tree;
257 svn_diff__position_t *position_list[3];
258 svn_diff__token_index_t num_tokens;
259 svn_diff__token_index_t *token_counts[3];
260 svn_diff_datasource_e datasource[] = {svn_diff_datasource_original,
261 svn_diff_datasource_modified,
262 svn_diff_datasource_latest};
263 svn_diff__lcs_t *lcs_om;
264 svn_diff__lcs_t *lcs_ol;
265 apr_pool_t *subpool;
266 apr_pool_t *treepool;
267 apr_off_t prefix_lines = 0;
268 apr_off_t suffix_lines = 0;
269
270 *diff = NULL;
271
272 subpool = svn_pool_create(pool);
273 treepool = svn_pool_create(pool);
274
275 svn_diff__tree_create(&tree, treepool);
276
277 SVN_ERR(vtable->datasources_open(diff_baton, &prefix_lines, &suffix_lines,
278 datasource, 3));
279
280 SVN_ERR(svn_diff__get_tokens(&position_list[0],
281 tree,
282 diff_baton, vtable,
283 svn_diff_datasource_original,
284 prefix_lines,
285 subpool));
286
287 SVN_ERR(svn_diff__get_tokens(&position_list[1],
288 tree,
289 diff_baton, vtable,
290 svn_diff_datasource_modified,
291 prefix_lines,
292 subpool));
293
294 SVN_ERR(svn_diff__get_tokens(&position_list[2],
295 tree,
296 diff_baton, vtable,
297 svn_diff_datasource_latest,
298 prefix_lines,
299 subpool));
300
301 num_tokens = svn_diff__get_node_count(tree);
302
303 /* Get rid of the tokens, we don't need them to calc the diff */
304 if (vtable->token_discard_all != NULL)
305 vtable->token_discard_all(diff_baton);
306
307 /* We don't need the nodes in the tree either anymore, nor the tree itself */
308 svn_pool_destroy(treepool);
309
310 token_counts[0] = svn_diff__get_token_counts(position_list[0], num_tokens,
311 subpool);
312 token_counts[1] = svn_diff__get_token_counts(position_list[1], num_tokens,
313 subpool);
314 token_counts[2] = svn_diff__get_token_counts(position_list[2], num_tokens,
315 subpool);
316
317 /* Get the lcs for original-modified and original-latest */
318 lcs_om = svn_diff__lcs(position_list[0], position_list[1], token_counts[0],
319 token_counts[1], num_tokens, prefix_lines,
320 suffix_lines, subpool);
321 lcs_ol = svn_diff__lcs(position_list[0], position_list[2], token_counts[0],
322 token_counts[2], num_tokens, prefix_lines,
323 suffix_lines, subpool);
324
325 /* Produce a merged diff */
326 {
327 svn_diff_t **diff_ref = diff;
328
329 apr_off_t original_start = 1;
330 apr_off_t modified_start = 1;
331 apr_off_t latest_start = 1;
332 apr_off_t original_sync;
333 apr_off_t modified_sync;
334 apr_off_t latest_sync;
335 apr_off_t common_length;
336 apr_off_t modified_length;
337 apr_off_t latest_length;
338 svn_boolean_t is_modified;
339 svn_boolean_t is_latest;
340 svn_diff__position_t sentinel_position[2];
341
342 /* Point the position lists to the start of the list
343 * so that common_diff/conflict detection actually is
344 * able to work.
345 */
346 if (position_list[1])
347 {
348 sentinel_position[0].next = position_list[1]->next;
349 sentinel_position[0].offset = position_list[1]->offset + 1;
350 position_list[1]->next = &sentinel_position[0];
351 position_list[1] = sentinel_position[0].next;
352 }
353 else
354 {
355 sentinel_position[0].offset = prefix_lines + 1;
356 sentinel_position[0].next = NULL;
357 position_list[1] = &sentinel_position[0];
358 }
359
360 if (position_list[2])
361 {
362 sentinel_position[1].next = position_list[2]->next;
363 sentinel_position[1].offset = position_list[2]->offset + 1;
364 position_list[2]->next = &sentinel_position[1];
365 position_list[2] = sentinel_position[1].next;
366 }
367 else
368 {
369 sentinel_position[1].offset = prefix_lines + 1;
370 sentinel_position[1].next = NULL;
371 position_list[2] = &sentinel_position[1];
372 }
373
374 while (1)
375 {
376 /* Find the sync points */
377 while (1)
378 {
379 if (lcs_om->position[0]->offset > lcs_ol->position[0]->offset)
380 {
381 original_sync = lcs_om->position[0]->offset;
382
383 while (lcs_ol->position[0]->offset + lcs_ol->length
384 < original_sync)
385 lcs_ol = lcs_ol->next;
386
387 /* If the sync point is the EOF, and our current lcs segment
388 * doesn't reach as far as EOF, we need to skip this segment.
389 */
390 if (lcs_om->length == 0 && lcs_ol->length > 0
391 && lcs_ol->position[0]->offset + lcs_ol->length
392 == original_sync
393 && lcs_ol->position[1]->offset + lcs_ol->length
394 != lcs_ol->next->position[1]->offset)
395 lcs_ol = lcs_ol->next;
396
397 if (lcs_ol->position[0]->offset <= original_sync)
398 break;
399 }
400 else
401 {
402 original_sync = lcs_ol->position[0]->offset;
403
404 while (lcs_om->position[0]->offset + lcs_om->length
405 < original_sync)
406 lcs_om = lcs_om->next;
407
408 /* If the sync point is the EOF, and our current lcs segment
409 * doesn't reach as far as EOF, we need to skip this segment.
410 */
411 if (lcs_ol->length == 0 && lcs_om->length > 0
412 && lcs_om->position[0]->offset + lcs_om->length
413 == original_sync
414 && lcs_om->position[1]->offset + lcs_om->length
415 != lcs_om->next->position[1]->offset)
416 lcs_om = lcs_om->next;
417
418 if (lcs_om->position[0]->offset <= original_sync)
419 break;
420 }
421 }
422
423 modified_sync = lcs_om->position[1]->offset
424 + (original_sync - lcs_om->position[0]->offset);
425 latest_sync = lcs_ol->position[1]->offset
426 + (original_sync - lcs_ol->position[0]->offset);
427
428 /* Determine what is modified, if anything */
429 is_modified = lcs_om->position[0]->offset - original_start > 0
430 || lcs_om->position[1]->offset - modified_start > 0;
431
432 is_latest = lcs_ol->position[0]->offset - original_start > 0
433 || lcs_ol->position[1]->offset - latest_start > 0;
434
435 if (is_modified || is_latest)
436 {
437 modified_length = modified_sync - modified_start;
438 latest_length = latest_sync - latest_start;
439
440 (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref));
441
442 (*diff_ref)->original_start = original_start - 1;
443 (*diff_ref)->original_length = original_sync - original_start;
444 (*diff_ref)->modified_start = modified_start - 1;
445 (*diff_ref)->modified_length = modified_length;
446 (*diff_ref)->latest_start = latest_start - 1;
447 (*diff_ref)->latest_length = latest_length;
448 (*diff_ref)->resolved_diff = NULL;
449
450 if (is_modified && is_latest)
451 {
452 svn_diff__resolve_conflict(*diff_ref,
453 &position_list[1],
454 &position_list[2],
455 num_tokens,
456 pool);
457 }
458 else if (is_modified)
459 {
460 (*diff_ref)->type = svn_diff__type_diff_modified;
461 }
462 else
463 {
464 (*diff_ref)->type = svn_diff__type_diff_latest;
465 }
466
467 diff_ref = &(*diff_ref)->next;
468 }
469
470 /* Detect EOF */
471 if (lcs_om->length == 0 || lcs_ol->length == 0)
472 break;
473
474 modified_length = lcs_om->length
475 - (original_sync - lcs_om->position[0]->offset);
476 latest_length = lcs_ol->length
477 - (original_sync - lcs_ol->position[0]->offset);
478 common_length = MIN(modified_length, latest_length);
479
480 if (common_length > 0)
481 {
482 (*diff_ref) = apr_palloc(pool, sizeof(**diff_ref));
483
484 (*diff_ref)->type = svn_diff__type_common;
485 (*diff_ref)->original_start = original_sync - 1;
486 (*diff_ref)->original_length = common_length;
487 (*diff_ref)->modified_start = modified_sync - 1;
488 (*diff_ref)->modified_length = common_length;
489 (*diff_ref)->latest_start = latest_sync - 1;
490 (*diff_ref)->latest_length = common_length;
491 (*diff_ref)->resolved_diff = NULL;
492
493 diff_ref = &(*diff_ref)->next;
494 }
495
496 /* Set the new offsets */
497 original_start = original_sync + common_length;
498 modified_start = modified_sync + common_length;
499 latest_start = latest_sync + common_length;
500
501 /* Make it easier for diff_common/conflict detection
502 by recording last lcs start positions
503 */
504 if (position_list[1]->offset < lcs_om->position[1]->offset)
505 position_list[1] = lcs_om->position[1];
506
507 if (position_list[2]->offset < lcs_ol->position[1]->offset)
508 position_list[2] = lcs_ol->position[1];
509
510 /* Make sure we are pointing to lcs entries beyond
511 * the range we just processed
512 */
513 while (original_start >= lcs_om->position[0]->offset + lcs_om->length
514 && lcs_om->length > 0)
515 {
516 lcs_om = lcs_om->next;
517 }
518
519 while (original_start >= lcs_ol->position[0]->offset + lcs_ol->length
520 && lcs_ol->length > 0)
521 {
522 lcs_ol = lcs_ol->next;
523 }
524 }
525
526 *diff_ref = NULL;
527 }
528
529 svn_pool_destroy(subpool);
530
531 return SVN_NO_ERROR;
532 }
533