1 /*
2  * svndumpfilter.c: Subversion dump stream filtering tool main file.
3  *
4  * ====================================================================
5  *    Licensed to the Apache Software Foundation (ASF) under one
6  *    or more contributor license agreements.  See the NOTICE file
7  *    distributed with this work for additional information
8  *    regarding copyright ownership.  The ASF licenses this file
9  *    to you under the Apache License, Version 2.0 (the
10  *    "License"); you may not use this file except in compliance
11  *    with the License.  You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  *    Unless required by applicable law or agreed to in writing,
16  *    software distributed under the License is distributed on an
17  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18  *    KIND, either express or implied.  See the License for the
19  *    specific language governing permissions and limitations
20  *    under the License.
21  * ====================================================================
22  */
23 
24 
25 #include <stdlib.h>
26 
27 #include <apr_file_io.h>
28 
29 #include "svn_private_config.h"
30 #include "svn_cmdline.h"
31 #include "svn_error.h"
32 #include "svn_string.h"
33 #include "svn_opt.h"
34 #include "svn_utf.h"
35 #include "svn_dirent_uri.h"
36 #include "svn_path.h"
37 #include "svn_hash.h"
38 #include "svn_repos.h"
39 #include "svn_fs.h"
40 #include "svn_pools.h"
41 #include "svn_sorts.h"
42 #include "svn_props.h"
43 #include "svn_mergeinfo.h"
44 #include "svn_version.h"
45 
46 #include "private/svn_dirent_uri_private.h"
47 #include "private/svn_repos_private.h"
48 #include "private/svn_mergeinfo_private.h"
49 #include "private/svn_cmdline_private.h"
50 #include "private/svn_sorts_private.h"
51 
52 /*** Code. ***/
53 
54 /* Writes a property in dumpfile format to given stringbuf. */
55 static void
write_prop_to_stringbuf(svn_stringbuf_t * strbuf,const char * name,const svn_string_t * value)56 write_prop_to_stringbuf(svn_stringbuf_t *strbuf,
57                         const char *name,
58                         const svn_string_t *value)
59 {
60   int bytes_used;
61   size_t namelen;
62   char buf[SVN_KEYLINE_MAXLEN];
63 
64   /* Output name length, then name. */
65   namelen = strlen(name);
66   svn_stringbuf_appendbytes(strbuf, "K ", 2);
67 
68   bytes_used = apr_snprintf(buf, sizeof(buf), "%" APR_SIZE_T_FMT, namelen);
69   svn_stringbuf_appendbytes(strbuf, buf, bytes_used);
70   svn_stringbuf_appendbyte(strbuf, '\n');
71 
72   svn_stringbuf_appendbytes(strbuf, name, namelen);
73   svn_stringbuf_appendbyte(strbuf, '\n');
74 
75   /* Output value length, then value. */
76   svn_stringbuf_appendbytes(strbuf, "V ", 2);
77 
78   bytes_used = apr_snprintf(buf, sizeof(buf), "%" APR_SIZE_T_FMT, value->len);
79   svn_stringbuf_appendbytes(strbuf, buf, bytes_used);
80   svn_stringbuf_appendbyte(strbuf, '\n');
81 
82   svn_stringbuf_appendbytes(strbuf, value->data, value->len);
83   svn_stringbuf_appendbyte(strbuf, '\n');
84 }
85 
86 
87 /* Writes a property deletion in dumpfile format to given stringbuf. */
88 static void
write_propdel_to_stringbuf(svn_stringbuf_t ** strbuf,const char * name)89 write_propdel_to_stringbuf(svn_stringbuf_t **strbuf,
90                            const char *name)
91 {
92   int bytes_used;
93   size_t namelen;
94   char buf[SVN_KEYLINE_MAXLEN];
95 
96   /* Output name length, then name. */
97   namelen = strlen(name);
98   svn_stringbuf_appendbytes(*strbuf, "D ", 2);
99 
100   bytes_used = apr_snprintf(buf, sizeof(buf), "%" APR_SIZE_T_FMT, namelen);
101   svn_stringbuf_appendbytes(*strbuf, buf, bytes_used);
102   svn_stringbuf_appendbyte(*strbuf, '\n');
103 
104   svn_stringbuf_appendbytes(*strbuf, name, namelen);
105   svn_stringbuf_appendbyte(*strbuf, '\n');
106 }
107 
108 
109 /* Compare the node-path PATH with the (const char *) prefixes in PFXLIST.
110  * Return TRUE if any prefix is a prefix of PATH (matching whole path
111  * components); FALSE otherwise.
112  * PATH starts with a '/', as do the (const char *) paths in PREFIXES. */
113 /* This function is a duplicate of svnadmin.c:ary_prefix_match(). */
114 static svn_boolean_t
ary_prefix_match(const apr_array_header_t * pfxlist,const char * path)115 ary_prefix_match(const apr_array_header_t *pfxlist, const char *path)
116 {
117   int i;
118   size_t path_len = strlen(path);
119 
120   for (i = 0; i < pfxlist->nelts; i++)
121     {
122       const char *pfx = APR_ARRAY_IDX(pfxlist, i, const char *);
123       size_t pfx_len = strlen(pfx);
124 
125       if (path_len < pfx_len)
126         continue;
127       if (strncmp(path, pfx, pfx_len) == 0
128           && (pfx_len == 1 || path[pfx_len] == '\0' || path[pfx_len] == '/'))
129         return TRUE;
130     }
131 
132   return FALSE;
133 }
134 
135 
136 /* Check whether we need to skip this PATH based on its presence in
137    the PREFIXES list, and the DO_EXCLUDE option.
138    PATH starts with a '/', as do the (const char *) paths in PREFIXES. */
139 static APR_INLINE svn_boolean_t
skip_path(const char * path,const apr_array_header_t * prefixes,svn_boolean_t do_exclude,svn_boolean_t glob)140 skip_path(const char *path, const apr_array_header_t *prefixes,
141           svn_boolean_t do_exclude, svn_boolean_t glob)
142 {
143   const svn_boolean_t matches =
144     (glob
145      ? svn_cstring_match_glob_list(path, prefixes)
146      : ary_prefix_match(prefixes, path));
147 
148   /* NXOR */
149   return (matches ? do_exclude : !do_exclude);
150 }
151 
152 
153 
154 /* Note: the input stream parser calls us with events.
155    Output of the filtered dump occurs for the most part streamily with the
156    event callbacks, to avoid caching large quantities of data in memory.
157    The exceptions this are:
158    - All revision data (headers and props) must be cached until a non-skipped
159      node within the revision is found, or the revision is closed.
160    - Node headers and props must be cached until all props have been received
161      (to allow the Prop-content-length to be found). This is signalled either
162      by the node text arriving, or the node being closed.
163    The writing_begun members of the associated object batons track the state.
164    output_revision() and output_node() are called to cause this flushing of
165    cached data to occur.
166 */
167 
168 
169 /* Filtering batons */
170 
171 struct revmap_t
172 {
173   svn_revnum_t rev; /* Last non-dropped revision to which this maps. */
174   svn_boolean_t was_dropped; /* Was this revision dropped? */
175 };
176 
177 struct parse_baton_t
178 {
179   /* Command-line options values. */
180   svn_boolean_t do_exclude;
181   svn_boolean_t quiet;
182   svn_boolean_t glob;
183   svn_boolean_t drop_empty_revs;
184   svn_boolean_t drop_all_empty_revs;
185   svn_boolean_t do_renumber_revs;
186   svn_boolean_t preserve_revprops;
187   svn_boolean_t skip_missing_merge_sources;
188   svn_boolean_t allow_deltas;
189   apr_array_header_t *prefixes;
190 
191   /* Input and output streams. */
192   svn_stream_t *in_stream;
193   svn_stream_t *out_stream;
194 
195   /* State for the filtering process. */
196   apr_int32_t rev_drop_count;
197   apr_hash_t *dropped_nodes;
198   apr_hash_t *renumber_history;  /* svn_revnum_t -> struct revmap_t */
199   svn_revnum_t last_live_revision;
200   /* The oldest original revision, greater than r0, in the input
201      stream which was not filtered. */
202   svn_revnum_t oldest_original_rev;
203 };
204 
205 struct revision_baton_t
206 {
207   /* Reference to the global parse baton. */
208   struct parse_baton_t *pb;
209 
210   /* Does this revision have node or prop changes? */
211   svn_boolean_t has_nodes;
212 
213   /* Did we drop any nodes? */
214   svn_boolean_t had_dropped_nodes;
215 
216   /* Written to output stream? */
217   svn_boolean_t writing_begun;
218 
219   /* The original and new (re-mapped) revision numbers. */
220   svn_revnum_t rev_orig;
221   svn_revnum_t rev_actual;
222 
223   /* Pointers to dumpfile data. */
224   apr_hash_t *original_headers;
225   apr_hash_t *props;
226 };
227 
228 struct node_baton_t
229 {
230   /* Reference to the current revision baton. */
231   struct revision_baton_t *rb;
232 
233   /* Are we skipping this node? */
234   svn_boolean_t do_skip;
235 
236   /* Have we been instructed to change or remove props on, or change
237      the text of, this node? */
238   svn_boolean_t has_props;
239   svn_boolean_t has_text;
240 
241   /* Written to output stream? */
242   svn_boolean_t writing_begun;
243 
244   /* The text content length according to the dumpfile headers, because we
245      need the length before we have the actual text. */
246   svn_filesize_t tcl;
247 
248   /* Pointers to dumpfile data. */
249   svn_repos__dumpfile_headers_t *headers;
250   svn_stringbuf_t *props;
251 
252   /* Expect deltas? */
253   svn_boolean_t has_prop_delta;
254   svn_boolean_t has_text_delta;
255 
256   /* We might need the node path in a parse error message. */
257   char *node_path;
258 
259   apr_pool_t *node_pool;
260 };
261 
262 
263 
264 /* Filtering vtable members */
265 
266 /* File-format stamp. */
267 static svn_error_t *
magic_header_record(int version,void * parse_baton,apr_pool_t * pool)268 magic_header_record(int version, void *parse_baton, apr_pool_t *pool)
269 {
270   struct parse_baton_t *pb = parse_baton;
271 
272   if (version >= SVN_REPOS_DUMPFILE_FORMAT_VERSION_DELTAS)
273     pb->allow_deltas = TRUE;
274 
275   SVN_ERR(svn_repos__dump_magic_header_record(pb->out_stream, version, pool));
276 
277   return SVN_NO_ERROR;
278 }
279 
280 
281 /* Return a deep copy of a (char * -> char *) hash. */
282 static apr_hash_t *
headers_dup(apr_hash_t * headers,apr_pool_t * pool)283 headers_dup(apr_hash_t *headers,
284             apr_pool_t *pool)
285 {
286   apr_hash_t *new_hash = apr_hash_make(pool);
287   apr_hash_index_t *hi;
288 
289   for (hi = apr_hash_first(pool, headers); hi; hi = apr_hash_next(hi))
290     {
291       const char *key = apr_hash_this_key(hi);
292       const char *val = apr_hash_this_val(hi);
293 
294       svn_hash_sets(new_hash, apr_pstrdup(pool, key), apr_pstrdup(pool, val));
295     }
296   return new_hash;
297 }
298 
299 /* New revision: set up revision_baton, decide if we skip it. */
300 static svn_error_t *
new_revision_record(void ** revision_baton,apr_hash_t * headers,void * parse_baton,apr_pool_t * pool)301 new_revision_record(void **revision_baton,
302                     apr_hash_t *headers,
303                     void *parse_baton,
304                     apr_pool_t *pool)
305 {
306   struct revision_baton_t *rb;
307   const char *rev_orig;
308 
309   *revision_baton = apr_palloc(pool, sizeof(struct revision_baton_t));
310   rb = *revision_baton;
311   rb->pb = parse_baton;
312   rb->has_nodes = FALSE;
313   rb->had_dropped_nodes = FALSE;
314   rb->writing_begun = FALSE;
315   rb->props = apr_hash_make(pool);
316   rb->original_headers = headers_dup(headers, pool);
317 
318   rev_orig = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_REVISION_NUMBER);
319   rb->rev_orig = SVN_STR_TO_REV(rev_orig);
320 
321   if (rb->pb->do_renumber_revs)
322     rb->rev_actual = rb->rev_orig - rb->pb->rev_drop_count;
323   else
324     rb->rev_actual = rb->rev_orig;
325 
326   return SVN_NO_ERROR;
327 }
328 
329 
330 /* Output revision to dumpstream
331    This may be called by new_node_record(), iff rb->has_nodes has been set
332    to TRUE, or by close_revision() otherwise. This must only be called
333    if rb->writing_begun is FALSE. */
334 static svn_error_t *
output_revision(struct revision_baton_t * rb)335 output_revision(struct revision_baton_t *rb)
336 {
337   svn_boolean_t write_out_rev = FALSE;
338   apr_pool_t *hash_pool = apr_hash_pool_get(rb->props);
339   apr_pool_t *subpool = svn_pool_create(hash_pool);
340 
341   rb->writing_begun = TRUE;
342 
343   /* If this revision has no nodes left because the ones it had were
344      dropped, and we are not dropping empty revisions, and we were not
345      told to preserve revision props, then we want to fixup the
346      revision props to only contain:
347        - the date
348        - a log message that reports that this revision is just stuffing. */
349   if ((! rb->pb->preserve_revprops)
350       && (! rb->has_nodes)
351       && rb->had_dropped_nodes
352       && (! rb->pb->drop_empty_revs)
353       && (! rb->pb->drop_all_empty_revs))
354     {
355       apr_hash_t *old_props = rb->props;
356       rb->props = apr_hash_make(hash_pool);
357       svn_hash_sets(rb->props, SVN_PROP_REVISION_DATE,
358                     svn_hash_gets(old_props, SVN_PROP_REVISION_DATE));
359       svn_hash_sets(rb->props, SVN_PROP_REVISION_LOG,
360                     svn_string_create(_("This is an empty revision for "
361                                         "padding."), hash_pool));
362     }
363 
364   /* write out the revision */
365   /* Revision is written out in the following cases:
366      1. If the revision has nodes or
367      it is revision 0 (Special case: To preserve the props on r0).
368      2. --drop-empty-revs has been supplied,
369      but revision has not all nodes dropped.
370      3. If no --drop-empty-revs or --drop-all-empty-revs have been supplied,
371      write out the revision which has no nodes to begin with.
372   */
373   if (rb->has_nodes || (rb->rev_orig == 0))
374     write_out_rev = TRUE;
375   else if (rb->pb->drop_empty_revs)
376     write_out_rev = ! rb->had_dropped_nodes;
377   else if (! rb->pb->drop_all_empty_revs)
378     write_out_rev = TRUE;
379 
380   if (write_out_rev)
381     {
382       /* This revision is a keeper. */
383       SVN_ERR(svn_repos__dump_revision_record(rb->pb->out_stream,
384                                               rb->rev_actual,
385                                               rb->original_headers,
386                                               rb->props,
387                                               FALSE /*props_section_always*/,
388                                               subpool));
389 
390       /* Stash the oldest original rev not dropped. */
391       if (rb->rev_orig > 0
392           && !SVN_IS_VALID_REVNUM(rb->pb->oldest_original_rev))
393         rb->pb->oldest_original_rev = rb->rev_orig;
394 
395       if (rb->pb->do_renumber_revs)
396         {
397           svn_revnum_t *rr_key;
398           struct revmap_t *rr_val;
399           apr_pool_t *rr_pool = apr_hash_pool_get(rb->pb->renumber_history);
400           rr_key = apr_palloc(rr_pool, sizeof(*rr_key));
401           rr_val = apr_palloc(rr_pool, sizeof(*rr_val));
402           *rr_key = rb->rev_orig;
403           rr_val->rev = rb->rev_actual;
404           rr_val->was_dropped = FALSE;
405           apr_hash_set(rb->pb->renumber_history, rr_key,
406                        sizeof(*rr_key), rr_val);
407           rb->pb->last_live_revision = rb->rev_actual;
408         }
409 
410       if (! rb->pb->quiet)
411         SVN_ERR(svn_cmdline_fprintf(stderr, subpool,
412                                     _("Revision %ld committed as %ld.\n"),
413                                     rb->rev_orig, rb->rev_actual));
414     }
415   else
416     {
417       /* We're dropping this revision. */
418       rb->pb->rev_drop_count++;
419       if (rb->pb->do_renumber_revs)
420         {
421           svn_revnum_t *rr_key;
422           struct revmap_t *rr_val;
423           apr_pool_t *rr_pool = apr_hash_pool_get(rb->pb->renumber_history);
424           rr_key = apr_palloc(rr_pool, sizeof(*rr_key));
425           rr_val = apr_palloc(rr_pool, sizeof(*rr_val));
426           *rr_key = rb->rev_orig;
427           rr_val->rev = rb->pb->last_live_revision;
428           rr_val->was_dropped = TRUE;
429           apr_hash_set(rb->pb->renumber_history, rr_key,
430                        sizeof(*rr_key), rr_val);
431         }
432 
433       if (! rb->pb->quiet)
434         SVN_ERR(svn_cmdline_fprintf(stderr, subpool,
435                                     _("Revision %ld skipped.\n"),
436                                     rb->rev_orig));
437     }
438   svn_pool_destroy(subpool);
439   return SVN_NO_ERROR;
440 }
441 
442 
443 /* UUID record here: dump it, as we do not filter them. */
444 static svn_error_t *
uuid_record(const char * uuid,void * parse_baton,apr_pool_t * pool)445 uuid_record(const char *uuid, void *parse_baton, apr_pool_t *pool)
446 {
447   struct parse_baton_t *pb = parse_baton;
448 
449   SVN_ERR(svn_repos__dump_uuid_header_record(pb->out_stream, uuid, pool));
450   return SVN_NO_ERROR;
451 }
452 
453 
454 /* New node here. Set up node_baton by copying headers. */
455 static svn_error_t *
new_node_record(void ** node_baton,apr_hash_t * headers,void * rev_baton,apr_pool_t * pool)456 new_node_record(void **node_baton,
457                 apr_hash_t *headers,
458                 void *rev_baton,
459                 apr_pool_t *pool)
460 {
461   struct parse_baton_t *pb;
462   struct node_baton_t *nb;
463   char *node_path, *copyfrom_path;
464   apr_hash_index_t *hi;
465   const char *tcl;
466 
467   *node_baton = apr_palloc(pool, sizeof(struct node_baton_t));
468   nb          = *node_baton;
469   nb->rb      = rev_baton;
470   nb->node_pool = pool;
471   pb          = nb->rb->pb;
472 
473   node_path = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_NODE_PATH);
474   copyfrom_path = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_NODE_COPYFROM_PATH);
475 
476   /* Ensure that paths start with a leading '/'. */
477   if (node_path[0] != '/')
478     node_path = apr_pstrcat(pool, "/", node_path, SVN_VA_NULL);
479   if (copyfrom_path && copyfrom_path[0] != '/')
480     copyfrom_path = apr_pstrcat(pool, "/", copyfrom_path, SVN_VA_NULL);
481 
482   nb->do_skip = skip_path(node_path, pb->prefixes,
483                           pb->do_exclude, pb->glob);
484 
485   /* If we're skipping the node, take note of path, discarding the
486      rest.  */
487   if (nb->do_skip)
488     {
489       svn_hash_sets(pb->dropped_nodes,
490                     apr_pstrdup(apr_hash_pool_get(pb->dropped_nodes),
491                                 node_path),
492                     (void *)1);
493       nb->rb->had_dropped_nodes = TRUE;
494     }
495   else
496     {
497       const char *kind;
498       const char *action;
499 
500       tcl = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_TEXT_CONTENT_LENGTH);
501 
502       /* Test if this node was copied from dropped source. */
503       if (copyfrom_path &&
504           skip_path(copyfrom_path, pb->prefixes, pb->do_exclude, pb->glob))
505         {
506           /* This node was copied from a dropped source.
507              We have a problem, since we did not want to drop this node too.
508 
509              However, there is one special case we'll handle.  If the node is
510              a file, and this was a copy-and-modify operation, then the
511              dumpfile should contain the new contents of the file.  In this
512              scenario, we'll just do an add without history using the new
513              contents.  */
514           kind = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_NODE_KIND);
515 
516           /* If there is a Text-content-length header, and the kind is
517              "file", we just fallback to an add without history. */
518           if (tcl && (strcmp(kind, "file") == 0))
519             {
520               svn_hash_sets(headers, SVN_REPOS_DUMPFILE_NODE_COPYFROM_PATH,
521                             NULL);
522               svn_hash_sets(headers, SVN_REPOS_DUMPFILE_NODE_COPYFROM_REV,
523                             NULL);
524               copyfrom_path = NULL;
525             }
526           /* Else, this is either a directory or a file whose contents we
527              don't have readily available.  */
528           else
529             {
530               return svn_error_createf
531                 (SVN_ERR_INCOMPLETE_DATA, 0,
532                  _("Invalid copy source path '%s' for '%s'"),
533                  copyfrom_path, node_path);
534             }
535         }
536 
537       nb->has_props = FALSE;
538       nb->has_text = FALSE;
539       nb->has_prop_delta = FALSE;
540       nb->has_text_delta = FALSE;
541       nb->writing_begun = FALSE;
542       nb->tcl = tcl ? svn__atoui64(tcl) : 0;
543       nb->headers = svn_repos__dumpfile_headers_create(pool);
544       nb->props = svn_stringbuf_create_empty(pool);
545       nb->node_path = apr_pstrdup(pool, node_path);
546 
547       /* Now we know for sure that we have a node that will not be
548          skipped, flush the revision if it has not already been done. */
549       nb->rb->has_nodes = TRUE;
550       if (! nb->rb->writing_begun)
551         SVN_ERR(output_revision(nb->rb));
552 
553       /* A node record is required to begin with 'Node-path', skip the
554          leading '/' to match the form used by 'svnadmin dump'. */
555       svn_repos__dumpfile_header_push(
556         nb->headers, SVN_REPOS_DUMPFILE_NODE_PATH, node_path + 1);
557 
558       /* Node-kind is next and is optional. */
559       kind = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_NODE_KIND);
560       if (kind)
561         svn_repos__dumpfile_header_push(
562           nb->headers, SVN_REPOS_DUMPFILE_NODE_KIND, kind);
563 
564       /* Node-action is next and required. */
565       action = svn_hash_gets(headers, SVN_REPOS_DUMPFILE_NODE_ACTION);
566       if (action)
567         svn_repos__dumpfile_header_push(
568           nb->headers, SVN_REPOS_DUMPFILE_NODE_ACTION, action);
569       else
570         return svn_error_createf(SVN_ERR_INCOMPLETE_DATA, 0,
571                                  _("Missing Node-action for path '%s'"),
572                                  node_path);
573 
574       for (hi = apr_hash_first(pool, headers); hi; hi = apr_hash_next(hi))
575         {
576           const char *key = apr_hash_this_key(hi);
577           const char *val = apr_hash_this_val(hi);
578 
579           if ((!strcmp(key, SVN_REPOS_DUMPFILE_PROP_DELTA))
580               && (!strcmp(val, "true")))
581             nb->has_prop_delta = TRUE;
582 
583           if ((!strcmp(key, SVN_REPOS_DUMPFILE_TEXT_DELTA))
584               && (!strcmp(val, "true")))
585             nb->has_text_delta = TRUE;
586 
587           if ((!strcmp(key, SVN_REPOS_DUMPFILE_CONTENT_LENGTH))
588               || (!strcmp(key, SVN_REPOS_DUMPFILE_PROP_CONTENT_LENGTH))
589               || (!strcmp(key, SVN_REPOS_DUMPFILE_TEXT_CONTENT_LENGTH))
590               || (!strcmp(key, SVN_REPOS_DUMPFILE_NODE_PATH))
591               || (!strcmp(key, SVN_REPOS_DUMPFILE_NODE_KIND))
592               || (!strcmp(key, SVN_REPOS_DUMPFILE_NODE_ACTION)))
593             continue;
594 
595           /* Rewrite Node-Copyfrom-Rev if we are renumbering revisions.
596              The number points to some revision in the past. We keep track
597              of revision renumbering in an apr_hash, which maps original
598              revisions to new ones. Dropped revision are mapped to -1.
599              This should never happen here.
600           */
601           if (pb->do_renumber_revs
602               && (!strcmp(key, SVN_REPOS_DUMPFILE_NODE_COPYFROM_REV)))
603             {
604               svn_revnum_t cf_orig_rev;
605               struct revmap_t *cf_renum_val;
606 
607               cf_orig_rev = SVN_STR_TO_REV(val);
608               cf_renum_val = apr_hash_get(pb->renumber_history,
609                                           &cf_orig_rev,
610                                           sizeof(cf_orig_rev));
611               if (! (cf_renum_val && SVN_IS_VALID_REVNUM(cf_renum_val->rev)))
612                 return svn_error_createf
613                   (SVN_ERR_NODE_UNEXPECTED_KIND, NULL,
614                    _("No valid copyfrom revision in filtered stream for '%s'"),
615                    node_path);
616               svn_repos__dumpfile_header_pushf(
617                 nb->headers, SVN_REPOS_DUMPFILE_NODE_COPYFROM_REV,
618                 "%ld", cf_renum_val->rev);
619               continue;
620             }
621 
622           /* passthru: put header straight to output */
623           svn_repos__dumpfile_header_push(nb->headers, key, val);
624         }
625     }
626 
627   return SVN_NO_ERROR;
628 }
629 
630 
631 /* Examine the mergeinfo in INITIAL_VAL, omitting missing merge
632    sources or renumbering revisions in rangelists as appropriate, and
633    return the (possibly new) mergeinfo in *FINAL_VAL (allocated from
634    POOL). */
635 static svn_error_t *
adjust_mergeinfo(svn_string_t ** final_val,const svn_string_t * initial_val,struct revision_baton_t * rb,apr_pool_t * pool)636 adjust_mergeinfo(svn_string_t **final_val, const svn_string_t *initial_val,
637                  struct revision_baton_t *rb, apr_pool_t *pool)
638 {
639   apr_hash_t *mergeinfo;
640   apr_hash_t *final_mergeinfo = apr_hash_make(pool);
641   apr_hash_index_t *hi;
642   apr_pool_t *subpool = svn_pool_create(pool);
643 
644   SVN_ERR(svn_mergeinfo_parse(&mergeinfo, initial_val->data, subpool));
645 
646   /* Issue #3020: If we are skipping missing merge sources, then also
647      filter mergeinfo ranges as old or older than the oldest revision in the
648      dump stream.  Those older than the oldest obviously refer to history
649      outside of the dump stream.  The oldest rev itself is present in the
650      dump, but cannot be a valid merge source revision since it is the
651      start of all history.  E.g. if we dump -r100:400 then dumpfilter the
652      result with --skip-missing-merge-sources, any mergeinfo with revision
653      100 implies a change of -r99:100, but r99 is part of the history we
654      want filtered.
655 
656      If the oldest rev is r0 then there is nothing to filter. */
657 
658   /* ### This seems to cater only for use cases where the revisions being
659          processed are not following on from revisions that will already
660          exist in the destination repository. If the revisions being
661          processed do follow on, then we might want to keep the mergeinfo
662          that refers to those older revisions. */
663 
664   if (rb->pb->skip_missing_merge_sources && rb->pb->oldest_original_rev > 0)
665     SVN_ERR(svn_mergeinfo__filter_mergeinfo_by_ranges(
666       &mergeinfo, mergeinfo,
667       rb->pb->oldest_original_rev, 0,
668       FALSE, subpool, subpool));
669 
670   for (hi = apr_hash_first(subpool, mergeinfo); hi; hi = apr_hash_next(hi))
671     {
672       const char *merge_source = apr_hash_this_key(hi);
673       svn_rangelist_t *rangelist = apr_hash_this_val(hi);
674       struct parse_baton_t *pb = rb->pb;
675 
676       /* Determine whether the merge_source is a part of the prefix. */
677       if (skip_path(merge_source, pb->prefixes, pb->do_exclude, pb->glob))
678         {
679           if (pb->skip_missing_merge_sources)
680             continue;
681           else
682             return svn_error_createf(SVN_ERR_INCOMPLETE_DATA, 0,
683                                      _("Missing merge source path '%s'; try "
684                                        "with --skip-missing-merge-sources"),
685                                      merge_source);
686         }
687 
688       /* Possibly renumber revisions in merge source's rangelist. */
689       if (pb->do_renumber_revs)
690         {
691           int i;
692 
693           for (i = 0; i < rangelist->nelts; i++)
694             {
695               struct revmap_t *revmap_start;
696               struct revmap_t *revmap_end;
697               svn_merge_range_t *range = APR_ARRAY_IDX(rangelist, i,
698                                                        svn_merge_range_t *);
699 
700               revmap_start = apr_hash_get(pb->renumber_history,
701                                           &range->start, sizeof(range->start));
702               if (! (revmap_start && SVN_IS_VALID_REVNUM(revmap_start->rev)))
703                 return svn_error_createf
704                   (SVN_ERR_NODE_UNEXPECTED_KIND, NULL,
705                    _("No valid revision range 'start' in filtered stream"));
706 
707               revmap_end = apr_hash_get(pb->renumber_history,
708                                         &range->end, sizeof(range->end));
709               if (! (revmap_end && SVN_IS_VALID_REVNUM(revmap_end->rev)))
710                 return svn_error_createf
711                   (SVN_ERR_NODE_UNEXPECTED_KIND, NULL,
712                    _("No valid revision range 'end' in filtered stream"));
713 
714               range->start = revmap_start->rev;
715               range->end = revmap_end->rev;
716             }
717         }
718       svn_hash_sets(final_mergeinfo, merge_source, rangelist);
719     }
720 
721   SVN_ERR(svn_mergeinfo__canonicalize_ranges(final_mergeinfo, subpool));
722   SVN_ERR(svn_mergeinfo_to_string(final_val, final_mergeinfo, pool));
723   svn_pool_destroy(subpool);
724 
725   return SVN_NO_ERROR;
726 }
727 
728 
729 static svn_error_t *
set_revision_property(void * revision_baton,const char * name,const svn_string_t * value)730 set_revision_property(void *revision_baton,
731                       const char *name,
732                       const svn_string_t *value)
733 {
734   struct revision_baton_t *rb = revision_baton;
735   apr_pool_t *hash_pool = apr_hash_pool_get(rb->props);
736 
737   svn_hash_sets(rb->props,
738                 apr_pstrdup(hash_pool, name),
739                 svn_string_dup(value, hash_pool));
740   return SVN_NO_ERROR;
741 }
742 
743 
744 static svn_error_t *
set_node_property(void * node_baton,const char * name,const svn_string_t * value)745 set_node_property(void *node_baton,
746                   const char *name,
747                   const svn_string_t *value)
748 {
749   struct node_baton_t *nb = node_baton;
750   struct revision_baton_t *rb = nb->rb;
751 
752   if (nb->do_skip)
753     return SVN_NO_ERROR;
754 
755   /* Try to detect if a delta-mode property occurs unexpectedly. HAS_PROPS
756      can be false here only if the parser didn't call remove_node_props(),
757      so this may indicate a bug rather than bad data. */
758   if (! (nb->has_props || nb->has_prop_delta))
759     return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
760                              _("Delta property block detected, but deltas "
761                                "are not enabled for node '%s' in original "
762                                "revision %ld"),
763                              nb->node_path, rb->rev_orig);
764 
765   if (strcmp(name, SVN_PROP_MERGEINFO) == 0)
766     {
767       svn_string_t *filtered_mergeinfo;  /* Avoid compiler warning. */
768       apr_pool_t *pool = apr_hash_pool_get(rb->props);
769       SVN_ERR(adjust_mergeinfo(&filtered_mergeinfo, value, rb, pool));
770       value = filtered_mergeinfo;
771     }
772 
773   nb->has_props = TRUE;
774   write_prop_to_stringbuf(nb->props, name, value);
775 
776   return SVN_NO_ERROR;
777 }
778 
779 
780 static svn_error_t *
delete_node_property(void * node_baton,const char * name)781 delete_node_property(void *node_baton, const char *name)
782 {
783   struct node_baton_t *nb = node_baton;
784   struct revision_baton_t *rb = nb->rb;
785 
786   if (nb->do_skip)
787     return SVN_NO_ERROR;
788 
789   if (!nb->has_prop_delta)
790     return svn_error_createf(SVN_ERR_STREAM_MALFORMED_DATA, NULL,
791                              _("Delta property block detected, but deltas "
792                                "are not enabled for node '%s' in original "
793                                "revision %ld"),
794                              nb->node_path, rb->rev_orig);
795 
796   nb->has_props = TRUE;
797   write_propdel_to_stringbuf(&(nb->props), name);
798 
799   return SVN_NO_ERROR;
800 }
801 
802 
803 /* The parser calls this method if the node record has a non-delta
804  * property content section, before any calls to set_node_property().
805  * If the node record uses property deltas, this is not called.
806  */
807 static svn_error_t *
remove_node_props(void * node_baton)808 remove_node_props(void *node_baton)
809 {
810   struct node_baton_t *nb = node_baton;
811 
812   /* In this case, not actually indicating that the node *has* props,
813      rather that it has a property content section. */
814   nb->has_props = TRUE;
815 
816   return SVN_NO_ERROR;
817 }
818 
819 
820 static svn_error_t *
set_fulltext(svn_stream_t ** stream,void * node_baton)821 set_fulltext(svn_stream_t **stream, void *node_baton)
822 {
823   struct node_baton_t *nb = node_baton;
824 
825   if (!nb->do_skip)
826     {
827       nb->has_text = TRUE;
828       if (! nb->writing_begun)
829         {
830           nb->writing_begun = TRUE;
831           if (nb->has_props)
832             {
833               svn_stringbuf_appendcstr(nb->props, "PROPS-END\n");
834             }
835           SVN_ERR(svn_repos__dump_node_record(nb->rb->pb->out_stream,
836                                               nb->headers,
837                                               nb->has_props ? nb->props : NULL,
838                                               nb->has_text,
839                                               nb->tcl,
840                                               TRUE /*content_length_always*/,
841                                               nb->node_pool));
842         }
843       *stream = nb->rb->pb->out_stream;
844     }
845 
846   return SVN_NO_ERROR;
847 }
848 
849 
850 /* Finalize node */
851 static svn_error_t *
close_node(void * node_baton)852 close_node(void *node_baton)
853 {
854   struct node_baton_t *nb = node_baton;
855   apr_size_t len = 2;
856 
857   /* Get out of here if we can. */
858   if (nb->do_skip)
859     return SVN_NO_ERROR;
860 
861   /* If the node was not flushed already to output its text, do it now. */
862   if (! nb->writing_begun)
863     {
864       nb->writing_begun = TRUE;
865       if (nb->has_props)
866         {
867           svn_stringbuf_appendcstr(nb->props, "PROPS-END\n");
868         }
869       SVN_ERR(svn_repos__dump_node_record(nb->rb->pb->out_stream,
870                                           nb->headers,
871                                           nb->has_props ? nb->props : NULL,
872                                           nb->has_text,
873                                           nb->tcl,
874                                           TRUE /*content_length_always*/,
875                                           nb->node_pool));
876     }
877 
878   /* put an end to node. */
879   SVN_ERR(svn_stream_write(nb->rb->pb->out_stream, "\n\n", &len));
880 
881   return SVN_NO_ERROR;
882 }
883 
884 
885 /* Finalize revision */
886 static svn_error_t *
close_revision(void * revision_baton)887 close_revision(void *revision_baton)
888 {
889   struct revision_baton_t *rb = revision_baton;
890 
891   /* If no node has yet flushed the revision, do it now. */
892   if (! rb->writing_begun)
893     return output_revision(rb);
894   else
895     return SVN_NO_ERROR;
896 }
897 
898 
899 /* Filtering vtable */
900 static svn_repos_parse_fns3_t filtering_vtable =
901   {
902     magic_header_record,
903     uuid_record,
904     new_revision_record,
905     new_node_record,
906     set_revision_property,
907     set_node_property,
908     delete_node_property,
909     remove_node_props,
910     set_fulltext,
911     NULL,
912     close_node,
913     close_revision
914   };
915 
916 
917 
918 /** Subcommands. **/
919 
920 static svn_opt_subcommand_t
921   subcommand_help,
922   subcommand_exclude,
923   subcommand_include;
924 
925 enum
926   {
927     svndumpfilter__drop_empty_revs = SVN_OPT_FIRST_LONGOPT_ID,
928     svndumpfilter__drop_all_empty_revs,
929     svndumpfilter__renumber_revs,
930     svndumpfilter__preserve_revprops,
931     svndumpfilter__skip_missing_merge_sources,
932     svndumpfilter__targets,
933     svndumpfilter__quiet,
934     svndumpfilter__glob,
935     svndumpfilter__version
936   };
937 
938 /* Option codes and descriptions.
939  *
940  * The entire list must be terminated with an entry of nulls.
941  */
942 static const apr_getopt_option_t options_table[] =
943   {
944     {"help",          'h', 0,
945      N_("show help on a subcommand")},
946 
947     {NULL,            '?', 0,
948      N_("show help on a subcommand")},
949 
950     {"version",            svndumpfilter__version, 0,
951      N_("show program version information") },
952     {"quiet",              svndumpfilter__quiet, 0,
953      N_("Do not display filtering statistics.") },
954     {"pattern",            svndumpfilter__glob, 0,
955      N_("Treat the path prefixes as file glob patterns.\n"
956         "                             Glob special characters are '*' '?' '[]' and '\\'.\n"
957         "                             Character '/' is not treated specially, so\n"
958         "                             pattern /*/foo matches paths /a/foo and /a/b/foo.") },
959     {"drop-empty-revs",    svndumpfilter__drop_empty_revs, 0,
960      N_("Remove revisions emptied by filtering.")},
961     {"drop-all-empty-revs",    svndumpfilter__drop_all_empty_revs, 0,
962      N_("Remove all empty revisions found in dumpstream\n"
963         "                             except revision 0.")},
964     {"renumber-revs",      svndumpfilter__renumber_revs, 0,
965      N_("Renumber revisions left after filtering.") },
966     {"skip-missing-merge-sources",
967      svndumpfilter__skip_missing_merge_sources, 0,
968      N_("Skip missing merge sources.") },
969     {"preserve-revprops",  svndumpfilter__preserve_revprops, 0,
970      N_("Don't filter revision properties.") },
971     {"targets", svndumpfilter__targets, 1,
972      N_("Read additional prefixes, one per line, from\n"
973         "                             file ARG.")},
974     {NULL}
975   };
976 
977 
978 /* Array of available subcommands.
979  * The entire list must be terminated with an entry of nulls.
980  */
981 static const svn_opt_subcommand_desc3_t cmd_table[] =
982   {
983     {"exclude", subcommand_exclude, {0}, {N_(
984         "Filter out nodes with given prefixes from dumpstream.\n"
985         "usage: svndumpfilter exclude PATH_PREFIX...\n"
986      )},
987      {svndumpfilter__drop_empty_revs, svndumpfilter__drop_all_empty_revs,
988       svndumpfilter__renumber_revs,
989       svndumpfilter__skip_missing_merge_sources, svndumpfilter__targets,
990       svndumpfilter__preserve_revprops, svndumpfilter__quiet,
991       svndumpfilter__glob} },
992 
993     {"include", subcommand_include, {0}, {N_(
994         "Filter out nodes without given prefixes from dumpstream.\n"
995         "usage: svndumpfilter include PATH_PREFIX...\n"
996      )},
997      {svndumpfilter__drop_empty_revs, svndumpfilter__drop_all_empty_revs,
998       svndumpfilter__renumber_revs,
999       svndumpfilter__skip_missing_merge_sources, svndumpfilter__targets,
1000       svndumpfilter__preserve_revprops, svndumpfilter__quiet,
1001       svndumpfilter__glob} },
1002 
1003     {"help", subcommand_help, {"?", "h"}, {N_(
1004         "Describe the usage of this program or its subcommands.\n"
1005         "usage: svndumpfilter help [SUBCOMMAND...]\n"
1006      )},
1007      {0} },
1008 
1009     { NULL, NULL, {0}, {NULL}, {0} }
1010   };
1011 
1012 
1013 /* Baton for passing option/argument state to a subcommand function. */
1014 struct svndumpfilter_opt_state
1015 {
1016   svn_opt_revision_t start_revision;     /* -r X[:Y] is         */
1017   svn_opt_revision_t end_revision;       /* not implemented.    */
1018   svn_boolean_t quiet;                   /* --quiet             */
1019   svn_boolean_t glob;                    /* --pattern           */
1020   svn_boolean_t version;                 /* --version           */
1021   svn_boolean_t drop_empty_revs;         /* --drop-empty-revs   */
1022   svn_boolean_t drop_all_empty_revs;     /* --drop-all-empty-revs */
1023   svn_boolean_t help;                    /* --help or -?        */
1024   svn_boolean_t renumber_revs;           /* --renumber-revs     */
1025   svn_boolean_t preserve_revprops;       /* --preserve-revprops */
1026   svn_boolean_t skip_missing_merge_sources;
1027                                          /* --skip-missing-merge-sources */
1028   const char *targets_file;              /* --targets-file       */
1029   apr_array_header_t *prefixes;          /* mainargs.           */
1030 };
1031 
1032 
1033 static svn_error_t *
parse_baton_initialize(struct parse_baton_t ** pb,struct svndumpfilter_opt_state * opt_state,svn_boolean_t do_exclude,apr_pool_t * pool)1034 parse_baton_initialize(struct parse_baton_t **pb,
1035                        struct svndumpfilter_opt_state *opt_state,
1036                        svn_boolean_t do_exclude,
1037                        apr_pool_t *pool)
1038 {
1039   struct parse_baton_t *baton = apr_palloc(pool, sizeof(*baton));
1040 
1041   /* Read the stream from STDIN.  Users can redirect a file. */
1042   SVN_ERR(svn_stream_for_stdin2(&baton->in_stream, TRUE, pool));
1043 
1044   /* Have the parser dump results to STDOUT. Users can redirect a file. */
1045   SVN_ERR(svn_stream_for_stdout(&baton->out_stream, pool));
1046 
1047   baton->do_exclude = do_exclude;
1048 
1049   /* Ignore --renumber-revs if there can't possibly be
1050      anything to renumber. */
1051   baton->do_renumber_revs =
1052     (opt_state->renumber_revs && (opt_state->drop_empty_revs
1053                                   || opt_state->drop_all_empty_revs));
1054 
1055   baton->drop_empty_revs = opt_state->drop_empty_revs;
1056   baton->drop_all_empty_revs = opt_state->drop_all_empty_revs;
1057   baton->preserve_revprops = opt_state->preserve_revprops;
1058   baton->quiet = opt_state->quiet;
1059   baton->glob = opt_state->glob;
1060   baton->prefixes = opt_state->prefixes;
1061   baton->skip_missing_merge_sources = opt_state->skip_missing_merge_sources;
1062   baton->rev_drop_count = 0; /* used to shift revnums while filtering */
1063   baton->dropped_nodes = apr_hash_make(pool);
1064   baton->renumber_history = apr_hash_make(pool);
1065   baton->last_live_revision = SVN_INVALID_REVNUM;
1066   baton->oldest_original_rev = SVN_INVALID_REVNUM;
1067   baton->allow_deltas = FALSE;
1068 
1069   *pb = baton;
1070   return SVN_NO_ERROR;
1071 }
1072 
1073 /* This implements `help` subcommand. */
1074 static svn_error_t *
subcommand_help(apr_getopt_t * os,void * baton,apr_pool_t * pool)1075 subcommand_help(apr_getopt_t *os, void *baton, apr_pool_t *pool)
1076 {
1077   struct svndumpfilter_opt_state *opt_state = baton;
1078   const char *header =
1079     _("general usage: svndumpfilter SUBCOMMAND [ARGS & OPTIONS ...]\n"
1080       "Subversion repository dump filtering tool.\n"
1081       "Type 'svndumpfilter help <subcommand>' for help on a "
1082       "specific subcommand.\n"
1083       "Type 'svndumpfilter --version' to see the program version.\n"
1084       "\n"
1085       "Available subcommands:\n");
1086 
1087   SVN_ERR(svn_opt_print_help5(os, "svndumpfilter",
1088                               opt_state ? opt_state->version : FALSE,
1089                               opt_state ? opt_state->quiet : FALSE,
1090                               /*###opt_state ? opt_state->verbose :*/ FALSE,
1091                               NULL, header, cmd_table, options_table,
1092                               NULL, NULL, pool));
1093 
1094   return SVN_NO_ERROR;
1095 }
1096 
1097 
1098 /* Version compatibility check */
1099 static svn_error_t *
check_lib_versions(void)1100 check_lib_versions(void)
1101 {
1102   static const svn_version_checklist_t checklist[] =
1103     {
1104       { "svn_subr",  svn_subr_version },
1105       { "svn_repos", svn_repos_version },
1106       { "svn_delta", svn_delta_version },
1107       { NULL, NULL }
1108     };
1109   SVN_VERSION_DEFINE(my_version);
1110 
1111   return svn_ver_check_list2(&my_version, checklist, svn_ver_equal);
1112 }
1113 
1114 
1115 /* Do the real work of filtering. */
1116 static svn_error_t *
do_filter(apr_getopt_t * os,void * baton,svn_boolean_t do_exclude,apr_pool_t * pool)1117 do_filter(apr_getopt_t *os,
1118           void *baton,
1119           svn_boolean_t do_exclude,
1120           apr_pool_t *pool)
1121 {
1122   struct svndumpfilter_opt_state *opt_state = baton;
1123   struct parse_baton_t *pb;
1124   apr_hash_index_t *hi;
1125   apr_array_header_t *keys;
1126   int i, num_keys;
1127 
1128   if (! opt_state->quiet)
1129     {
1130       apr_pool_t *subpool = svn_pool_create(pool);
1131 
1132       if (opt_state->glob)
1133         {
1134           SVN_ERR(svn_cmdline_fprintf(stderr, subpool,
1135                                       do_exclude
1136                                       ? (opt_state->drop_empty_revs
1137                                          || opt_state->drop_all_empty_revs)
1138                                         ? _("Excluding (and dropping empty "
1139                                             "revisions for) prefix patterns:\n")
1140                                         : _("Excluding prefix patterns:\n")
1141                                       : (opt_state->drop_empty_revs
1142                                          || opt_state->drop_all_empty_revs)
1143                                         ? _("Including (and dropping empty "
1144                                             "revisions for) prefix patterns:\n")
1145                                         : _("Including prefix patterns:\n")));
1146         }
1147       else
1148         {
1149           SVN_ERR(svn_cmdline_fprintf(stderr, subpool,
1150                                       do_exclude
1151                                       ? (opt_state->drop_empty_revs
1152                                          || opt_state->drop_all_empty_revs)
1153                                         ? _("Excluding (and dropping empty "
1154                                             "revisions for) prefixes:\n")
1155                                         : _("Excluding prefixes:\n")
1156                                       : (opt_state->drop_empty_revs
1157                                          || opt_state->drop_all_empty_revs)
1158                                         ? _("Including (and dropping empty "
1159                                             "revisions for) prefixes:\n")
1160                                         : _("Including prefixes:\n")));
1161         }
1162 
1163       for (i = 0; i < opt_state->prefixes->nelts; i++)
1164         {
1165           svn_pool_clear(subpool);
1166           SVN_ERR(svn_cmdline_fprintf
1167                   (stderr, subpool, "   '%s'\n",
1168                    APR_ARRAY_IDX(opt_state->prefixes, i, const char *)));
1169         }
1170 
1171       SVN_ERR(svn_cmdline_fputs("\n", stderr, subpool));
1172       svn_pool_destroy(subpool);
1173     }
1174 
1175   SVN_ERR(parse_baton_initialize(&pb, opt_state, do_exclude, pool));
1176   SVN_ERR(svn_repos_parse_dumpstream3(pb->in_stream, &filtering_vtable, pb,
1177                                       TRUE, NULL, NULL, pool));
1178 
1179   /* The rest of this is just reporting.  If we aren't reporting, get
1180      outta here. */
1181   if (opt_state->quiet)
1182     return SVN_NO_ERROR;
1183 
1184   SVN_ERR(svn_cmdline_fputs("\n", stderr, pool));
1185 
1186   if (pb->rev_drop_count)
1187     SVN_ERR(svn_cmdline_fprintf(stderr, pool,
1188                                 Q_("Dropped %d revision.\n\n",
1189                                    "Dropped %d revisions.\n\n",
1190                                    pb->rev_drop_count),
1191                                 pb->rev_drop_count));
1192 
1193   if (pb->do_renumber_revs)
1194     {
1195       apr_pool_t *subpool = svn_pool_create(pool);
1196       SVN_ERR(svn_cmdline_fputs(_("Revisions renumbered as follows:\n"),
1197                                 stderr, subpool));
1198 
1199       /* Get the keys of the hash, sort them, then print the hash keys
1200          and values, sorted by keys. */
1201       num_keys = apr_hash_count(pb->renumber_history);
1202       keys = apr_array_make(pool, num_keys + 1, sizeof(svn_revnum_t));
1203       for (hi = apr_hash_first(pool, pb->renumber_history);
1204            hi;
1205            hi = apr_hash_next(hi))
1206         {
1207           const svn_revnum_t *revnum = apr_hash_this_key(hi);
1208 
1209           APR_ARRAY_PUSH(keys, svn_revnum_t) = *revnum;
1210         }
1211       svn_sort__array(keys, svn_sort_compare_revisions);
1212       for (i = 0; i < keys->nelts; i++)
1213         {
1214           svn_revnum_t this_key;
1215           struct revmap_t *this_val;
1216 
1217           svn_pool_clear(subpool);
1218           this_key = APR_ARRAY_IDX(keys, i, svn_revnum_t);
1219           this_val = apr_hash_get(pb->renumber_history, &this_key,
1220                                   sizeof(this_key));
1221           if (this_val->was_dropped)
1222             SVN_ERR(svn_cmdline_fprintf(stderr, subpool,
1223                                         _("   %ld => (dropped)\n"),
1224                                         this_key));
1225           else
1226             SVN_ERR(svn_cmdline_fprintf(stderr, subpool,
1227                                         "   %ld => %ld\n",
1228                                         this_key, this_val->rev));
1229         }
1230       SVN_ERR(svn_cmdline_fputs("\n", stderr, subpool));
1231       svn_pool_destroy(subpool);
1232     }
1233 
1234   if ((num_keys = apr_hash_count(pb->dropped_nodes)))
1235     {
1236       apr_pool_t *subpool = svn_pool_create(pool);
1237       SVN_ERR(svn_cmdline_fprintf(stderr, subpool,
1238                                   Q_("Dropped %d node:\n",
1239                                      "Dropped %d nodes:\n",
1240                                      num_keys),
1241                                   num_keys));
1242 
1243       /* Get the keys of the hash, sort them, then print the hash keys
1244          and values, sorted by keys. */
1245       keys = apr_array_make(pool, num_keys + 1, sizeof(const char *));
1246       for (hi = apr_hash_first(pool, pb->dropped_nodes);
1247            hi;
1248            hi = apr_hash_next(hi))
1249         {
1250           const char *path = apr_hash_this_key(hi);
1251 
1252           APR_ARRAY_PUSH(keys, const char *) = path;
1253         }
1254       svn_sort__array(keys, svn_sort_compare_paths);
1255       for (i = 0; i < keys->nelts; i++)
1256         {
1257           svn_pool_clear(subpool);
1258           SVN_ERR(svn_cmdline_fprintf
1259                   (stderr, subpool, "   '%s'\n",
1260                    (const char *)APR_ARRAY_IDX(keys, i, const char *)));
1261         }
1262       SVN_ERR(svn_cmdline_fputs("\n", stderr, subpool));
1263       svn_pool_destroy(subpool);
1264     }
1265 
1266   return SVN_NO_ERROR;
1267 }
1268 
1269 /* This implements `exclude' subcommand. */
1270 static svn_error_t *
subcommand_exclude(apr_getopt_t * os,void * baton,apr_pool_t * pool)1271 subcommand_exclude(apr_getopt_t *os, void *baton, apr_pool_t *pool)
1272 {
1273   return do_filter(os, baton, TRUE, pool);
1274 }
1275 
1276 
1277 /* This implements `include` subcommand. */
1278 static svn_error_t *
subcommand_include(apr_getopt_t * os,void * baton,apr_pool_t * pool)1279 subcommand_include(apr_getopt_t *os, void *baton, apr_pool_t *pool)
1280 {
1281   return do_filter(os, baton, FALSE, pool);
1282 }
1283 
1284 
1285 
1286 /** Main. **/
1287 
1288 /*
1289  * On success, leave *EXIT_CODE untouched and return SVN_NO_ERROR. On error,
1290  * either return an error to be displayed, or set *EXIT_CODE to non-zero and
1291  * return SVN_NO_ERROR.
1292  */
1293 static svn_error_t *
sub_main(int * exit_code,int argc,const char * argv[],apr_pool_t * pool)1294 sub_main(int *exit_code, int argc, const char *argv[], apr_pool_t *pool)
1295 {
1296   svn_error_t *err;
1297   apr_status_t apr_err;
1298 
1299   const svn_opt_subcommand_desc3_t *subcommand = NULL;
1300   struct svndumpfilter_opt_state opt_state;
1301   apr_getopt_t *os;
1302   int opt_id;
1303   apr_array_header_t *received_opts;
1304   int i;
1305 
1306   /* Check library versions */
1307   SVN_ERR(check_lib_versions());
1308 
1309   received_opts = apr_array_make(pool, SVN_OPT_MAX_OPTIONS, sizeof(int));
1310 
1311   /* Initialize the FS library. */
1312   SVN_ERR(svn_fs_initialize(pool));
1313 
1314   if (argc <= 1)
1315     {
1316       SVN_ERR(subcommand_help(NULL, NULL, pool));
1317       *exit_code = EXIT_FAILURE;
1318       return SVN_NO_ERROR;
1319     }
1320 
1321   /* Initialize opt_state. */
1322   memset(&opt_state, 0, sizeof(opt_state));
1323   opt_state.start_revision.kind = svn_opt_revision_unspecified;
1324   opt_state.end_revision.kind = svn_opt_revision_unspecified;
1325 
1326   /* Parse options. */
1327   SVN_ERR(svn_cmdline__getopt_init(&os, argc, argv, pool));
1328 
1329   os->interleave = 1;
1330   while (1)
1331     {
1332       const char *opt_arg;
1333 
1334       /* Parse the next option. */
1335       apr_err = apr_getopt_long(os, options_table, &opt_id, &opt_arg);
1336       if (APR_STATUS_IS_EOF(apr_err))
1337         break;
1338       else if (apr_err)
1339         {
1340           SVN_ERR(subcommand_help(NULL, NULL, pool));
1341           *exit_code = EXIT_FAILURE;
1342           return SVN_NO_ERROR;
1343         }
1344 
1345       /* Stash the option code in an array before parsing it. */
1346       APR_ARRAY_PUSH(received_opts, int) = opt_id;
1347 
1348       switch (opt_id)
1349         {
1350         case 'h':
1351         case '?':
1352           opt_state.help = TRUE;
1353           break;
1354         case svndumpfilter__version:
1355           opt_state.version = TRUE;
1356           break;
1357         case svndumpfilter__quiet:
1358           opt_state.quiet = TRUE;
1359           break;
1360         case svndumpfilter__glob:
1361           opt_state.glob = TRUE;
1362           break;
1363         case svndumpfilter__drop_empty_revs:
1364           opt_state.drop_empty_revs = TRUE;
1365           break;
1366         case svndumpfilter__drop_all_empty_revs:
1367           opt_state.drop_all_empty_revs = TRUE;
1368           break;
1369         case svndumpfilter__renumber_revs:
1370           opt_state.renumber_revs = TRUE;
1371           break;
1372         case svndumpfilter__preserve_revprops:
1373           opt_state.preserve_revprops = TRUE;
1374           break;
1375         case svndumpfilter__skip_missing_merge_sources:
1376           opt_state.skip_missing_merge_sources = TRUE;
1377           break;
1378         case svndumpfilter__targets:
1379           SVN_ERR(svn_utf_cstring_to_utf8(&opt_state.targets_file,
1380                                           opt_arg, pool));
1381           break;
1382         default:
1383           {
1384             SVN_ERR(subcommand_help(NULL, NULL, pool));
1385             *exit_code = EXIT_FAILURE;
1386             return SVN_NO_ERROR;
1387           }
1388         }  /* close `switch' */
1389     }  /* close `while' */
1390 
1391   /* Disallow simultaneous use of both --drop-empty-revs and
1392      --drop-all-empty-revs. */
1393   if (opt_state.drop_empty_revs && opt_state.drop_all_empty_revs)
1394     {
1395       return svn_error_create(SVN_ERR_CL_MUTUALLY_EXCLUSIVE_ARGS,
1396                               NULL,
1397                               _("--drop-empty-revs cannot be used with "
1398                                 "--drop-all-empty-revs"));
1399     }
1400 
1401   /* If the user asked for help, then the rest of the arguments are
1402      the names of subcommands to get help on (if any), or else they're
1403      just typos/mistakes.  Whatever the case, the subcommand to
1404      actually run is subcommand_help(). */
1405   if (opt_state.help)
1406     subcommand = svn_opt_get_canonical_subcommand3(cmd_table, "help");
1407 
1408   /* If we're not running the `help' subcommand, then look for a
1409      subcommand in the first argument. */
1410   if (subcommand == NULL)
1411     {
1412       if (os->ind >= os->argc)
1413         {
1414           if (opt_state.version)
1415             {
1416               /* Use the "help" subcommand to handle the "--version" option. */
1417               static const svn_opt_subcommand_desc3_t pseudo_cmd =
1418                 { "--version", subcommand_help, {0}, {""},
1419                   {svndumpfilter__version,  /* must accept its own option */
1420                    svndumpfilter__quiet,
1421                   } };
1422 
1423               subcommand = &pseudo_cmd;
1424             }
1425           else
1426             {
1427               svn_error_clear(svn_cmdline_fprintf
1428                               (stderr, pool,
1429                                _("Subcommand argument required\n")));
1430               SVN_ERR(subcommand_help(NULL, NULL, pool));
1431               *exit_code = EXIT_FAILURE;
1432               return SVN_NO_ERROR;
1433             }
1434         }
1435       else
1436         {
1437           const char *first_arg;
1438 
1439           SVN_ERR(svn_utf_cstring_to_utf8(&first_arg, os->argv[os->ind++],
1440                                           pool));
1441           subcommand = svn_opt_get_canonical_subcommand3(cmd_table, first_arg);
1442           if (subcommand == NULL)
1443             {
1444               svn_error_clear(
1445                 svn_cmdline_fprintf(stderr, pool,
1446                                     _("Unknown subcommand: '%s'\n"),
1447                                     first_arg));
1448               SVN_ERR(subcommand_help(NULL, NULL, pool));
1449               *exit_code = EXIT_FAILURE;
1450               return SVN_NO_ERROR;
1451             }
1452         }
1453     }
1454 
1455   /* If there's a second argument, it's probably [one of] prefixes.
1456      Every subcommand except `help' requires at least one, so we parse
1457      them out here and store in opt_state. */
1458 
1459   if (subcommand->cmd_func != subcommand_help)
1460     {
1461 
1462       opt_state.prefixes = apr_array_make(pool, os->argc - os->ind,
1463                                           sizeof(const char *));
1464       for (i = os->ind ; i< os->argc; i++)
1465         {
1466           const char *prefix;
1467 
1468           /* Ensure that each prefix is UTF8-encoded, in internal
1469              style, and absolute. */
1470           SVN_ERR(svn_utf_cstring_to_utf8(&prefix, os->argv[i], pool));
1471           SVN_ERR(svn_relpath__make_internal(&prefix, prefix, pool, pool));
1472           if (prefix[0] != '/')
1473             prefix = apr_pstrcat(pool, "/", prefix, SVN_VA_NULL);
1474           APR_ARRAY_PUSH(opt_state.prefixes, const char *) = prefix;
1475         }
1476 
1477       if (opt_state.targets_file)
1478         {
1479           svn_stringbuf_t *buffer, *buffer_utf8;
1480           apr_array_header_t *targets = apr_array_make(pool, 0,
1481                                                        sizeof(const char *));
1482 
1483           /* We need to convert to UTF-8 now, even before we divide
1484              the targets into an array, because otherwise we wouldn't
1485              know what delimiter to use for svn_cstring_split().  */
1486           SVN_ERR(svn_stringbuf_from_file2(&buffer, opt_state.targets_file,
1487                                            pool));
1488           SVN_ERR(svn_utf_stringbuf_to_utf8(&buffer_utf8, buffer, pool));
1489 
1490           targets = apr_array_append(pool,
1491                          svn_cstring_split(buffer_utf8->data, "\n\r",
1492                                            TRUE, pool),
1493                          targets);
1494 
1495           for (i = 0; i < targets->nelts; i++)
1496             {
1497               const char *prefix = APR_ARRAY_IDX(targets, i, const char *);
1498               if (prefix[0] != '/')
1499                 prefix = apr_pstrcat(pool, "/", prefix, SVN_VA_NULL);
1500               APR_ARRAY_PUSH(opt_state.prefixes, const char *) = prefix;
1501             }
1502         }
1503 
1504       if (apr_is_empty_array(opt_state.prefixes))
1505         {
1506           svn_error_clear(svn_cmdline_fprintf
1507                           (stderr, pool,
1508                            _("\nError: no prefixes supplied.\n")));
1509           *exit_code = EXIT_FAILURE;
1510           return SVN_NO_ERROR;
1511         }
1512     }
1513 
1514 
1515   /* Check that the subcommand wasn't passed any inappropriate options. */
1516   for (i = 0; i < received_opts->nelts; i++)
1517     {
1518       opt_id = APR_ARRAY_IDX(received_opts, i, int);
1519 
1520       /* All commands implicitly accept --help, so just skip over this
1521          when we see it. Note that we don't want to include this option
1522          in their "accepted options" list because it would be awfully
1523          redundant to display it in every commands' help text. */
1524       if (opt_id == 'h' || opt_id == '?')
1525         continue;
1526 
1527       if (! svn_opt_subcommand_takes_option4(subcommand, opt_id, NULL))
1528         {
1529           const char *optstr;
1530           const apr_getopt_option_t *badopt =
1531             svn_opt_get_option_from_code3(opt_id, options_table, subcommand,
1532                                           pool);
1533           svn_opt_format_option(&optstr, badopt, FALSE, pool);
1534           if (subcommand->name[0] == '-')
1535             SVN_ERR(subcommand_help(NULL, NULL, pool));
1536           else
1537             svn_error_clear(svn_cmdline_fprintf
1538                             (stderr, pool,
1539                              _("Subcommand '%s' doesn't accept option '%s'\n"
1540                                "Type 'svndumpfilter help %s' for usage.\n"),
1541                              subcommand->name, optstr, subcommand->name));
1542           *exit_code = EXIT_FAILURE;
1543           return SVN_NO_ERROR;
1544         }
1545     }
1546 
1547   /* Run the subcommand. */
1548   err = (*subcommand->cmd_func)(os, &opt_state, pool);
1549   if (err)
1550     {
1551       /* For argument-related problems, suggest using the 'help'
1552          subcommand. */
1553       if (err->apr_err == SVN_ERR_CL_INSUFFICIENT_ARGS
1554           || err->apr_err == SVN_ERR_CL_ARG_PARSING_ERROR)
1555         {
1556           err = svn_error_quick_wrap(err,
1557                                      _("Try 'svndumpfilter help' for more "
1558                                        "info"));
1559         }
1560       return err;
1561     }
1562 
1563   return SVN_NO_ERROR;
1564 }
1565 
1566 int
main(int argc,const char * argv[])1567 main(int argc, const char *argv[])
1568 {
1569   apr_pool_t *pool;
1570   int exit_code = EXIT_SUCCESS;
1571   svn_error_t *err;
1572 
1573   /* Initialize the app. */
1574   if (svn_cmdline_init("svndumpfilter", stderr) != EXIT_SUCCESS)
1575     return EXIT_FAILURE;
1576 
1577   /* Create our top-level pool.  Use a separate mutexless allocator,
1578    * given this application is single threaded.
1579    */
1580   pool = apr_allocator_owner_get(svn_pool_create_allocator(FALSE));
1581 
1582   err = sub_main(&exit_code, argc, argv, pool);
1583 
1584   /* Flush stdout and report if it fails. It would be flushed on exit anyway
1585      but this makes sure that output is not silently lost if it fails. */
1586   err = svn_error_compose_create(err, svn_cmdline_fflush(stdout));
1587 
1588   if (err)
1589     {
1590       exit_code = EXIT_FAILURE;
1591       svn_cmdline_handle_exit_error(err, NULL, "svndumpfilter: ");
1592     }
1593 
1594   svn_pool_destroy(pool);
1595   return exit_code;
1596 }
1597