1 /*
2 * token.c : routines for doing diffs
3 *
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
20 * under the License.
21 * ====================================================================
22 */
23
24
25 #include <apr.h>
26 #include <apr_pools.h>
27 #include <apr_general.h>
28
29 #include "svn_error.h"
30 #include "svn_diff.h"
31 #include "svn_types.h"
32
33 #include "diff.h"
34
35
36 /*
37 * Prime number to use as the size of the hash table. This number was
38 * not selected by testing of any kind and may need tweaking.
39 */
40 #define SVN_DIFF__HASH_SIZE 127
41
42 struct svn_diff__node_t
43 {
44 svn_diff__node_t *parent;
45 svn_diff__node_t *left;
46 svn_diff__node_t *right;
47
48 apr_uint32_t hash;
49 svn_diff__token_index_t index;
50 void *token;
51 };
52
53 struct svn_diff__tree_t
54 {
55 svn_diff__node_t *root[SVN_DIFF__HASH_SIZE];
56 apr_pool_t *pool;
57 svn_diff__token_index_t node_count;
58 };
59
60
61 /*
62 * Returns number of tokens in a tree
63 */
64 svn_diff__token_index_t
svn_diff__get_node_count(svn_diff__tree_t * tree)65 svn_diff__get_node_count(svn_diff__tree_t *tree)
66 {
67 return tree->node_count;
68 }
69
70 /*
71 * Support functions to build a tree of token positions
72 */
73
74 void
svn_diff__tree_create(svn_diff__tree_t ** tree,apr_pool_t * pool)75 svn_diff__tree_create(svn_diff__tree_t **tree, apr_pool_t *pool)
76 {
77 *tree = apr_pcalloc(pool, sizeof(**tree));
78 (*tree)->pool = pool;
79 (*tree)->node_count = 0;
80 }
81
82
83 static svn_error_t *
tree_insert_token(svn_diff__node_t ** node,svn_diff__tree_t * tree,void * diff_baton,const svn_diff_fns2_t * vtable,apr_uint32_t hash,void * token)84 tree_insert_token(svn_diff__node_t **node, svn_diff__tree_t *tree,
85 void *diff_baton,
86 const svn_diff_fns2_t *vtable,
87 apr_uint32_t hash, void *token)
88 {
89 svn_diff__node_t *new_node;
90 svn_diff__node_t **node_ref;
91 svn_diff__node_t *parent;
92 int rv;
93
94 SVN_ERR_ASSERT(token);
95
96 parent = NULL;
97 node_ref = &tree->root[hash % SVN_DIFF__HASH_SIZE];
98
99 while (*node_ref != NULL)
100 {
101 parent = *node_ref;
102
103 rv = hash - parent->hash;
104 if (!rv)
105 SVN_ERR(vtable->token_compare(diff_baton, parent->token, token, &rv));
106
107 if (rv == 0)
108 {
109 /* Discard the previous token. This helps in cases where
110 * only recently read tokens are still in memory.
111 */
112 if (vtable->token_discard != NULL)
113 vtable->token_discard(diff_baton, parent->token);
114
115 parent->token = token;
116 *node = parent;
117
118 return SVN_NO_ERROR;
119 }
120 else if (rv > 0)
121 {
122 node_ref = &parent->left;
123 }
124 else
125 {
126 node_ref = &parent->right;
127 }
128 }
129
130 /* Create a new node */
131 new_node = apr_palloc(tree->pool, sizeof(*new_node));
132 new_node->parent = parent;
133 new_node->left = NULL;
134 new_node->right = NULL;
135 new_node->hash = hash;
136 new_node->token = token;
137 new_node->index = tree->node_count++;
138
139 *node = *node_ref = new_node;
140
141 return SVN_NO_ERROR;
142 }
143
144
145 /*
146 * Get all tokens from a datasource. Return the
147 * last item in the (circular) list.
148 */
149 svn_error_t *
svn_diff__get_tokens(svn_diff__position_t ** position_list,svn_diff__tree_t * tree,void * diff_baton,const svn_diff_fns2_t * vtable,svn_diff_datasource_e datasource,apr_off_t prefix_lines,apr_pool_t * pool)150 svn_diff__get_tokens(svn_diff__position_t **position_list,
151 svn_diff__tree_t *tree,
152 void *diff_baton,
153 const svn_diff_fns2_t *vtable,
154 svn_diff_datasource_e datasource,
155 apr_off_t prefix_lines,
156 apr_pool_t *pool)
157 {
158 svn_diff__position_t *start_position;
159 svn_diff__position_t *position = NULL;
160 svn_diff__position_t **position_ref;
161 svn_diff__node_t *node;
162 void *token;
163 apr_off_t offset;
164 apr_uint32_t hash;
165
166 *position_list = NULL;
167
168 position_ref = &start_position;
169 offset = prefix_lines;
170 hash = 0; /* The callback fn doesn't need to touch it per se */
171 while (1)
172 {
173 SVN_ERR(vtable->datasource_get_next_token(&hash, &token,
174 diff_baton, datasource));
175 if (token == NULL)
176 break;
177
178 offset++;
179 SVN_ERR(tree_insert_token(&node, tree, diff_baton, vtable, hash, token));
180
181 /* Create a new position */
182 position = apr_palloc(pool, sizeof(*position));
183 position->next = NULL;
184 position->token_index = node->index;
185 position->offset = offset;
186
187 *position_ref = position;
188 position_ref = &position->next;
189 }
190
191 *position_ref = start_position;
192
193 SVN_ERR(vtable->datasource_close(diff_baton, datasource));
194
195 *position_list = position;
196
197 return SVN_NO_ERROR;
198 }
199