xref: /freebsd-14.2/usr.bin/diff/diffreg.c (revision efa8af7c)
1 /*	$OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $	*/
2 
3 /*
4  * Copyright (C) Caldera International Inc.  2001-2002.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code and documentation must retain the above
11  *    copyright notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed or owned by Caldera
18  *	International, Inc.
19  * 4. Neither the name of Caldera International, Inc. nor the names of other
20  *    contributors may be used to endorse or promote products derived from
21  *    this software without specific prior written permission.
22  *
23  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
24  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
25  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
26  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
27  * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT,
28  * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
29  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
32  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
33  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  */
36 /*-
37  * Copyright (c) 1991, 1993
38  *	The Regents of the University of California.  All rights reserved.
39  *
40  * Redistribution and use in source and binary forms, with or without
41  * modification, are permitted provided that the following conditions
42  * are met:
43  * 1. Redistributions of source code must retain the above copyright
44  *    notice, this list of conditions and the following disclaimer.
45  * 2. Redistributions in binary form must reproduce the above copyright
46  *    notice, this list of conditions and the following disclaimer in the
47  *    documentation and/or other materials provided with the distribution.
48  * 3. Neither the name of the University nor the names of its contributors
49  *    may be used to endorse or promote products derived from this software
50  *    without specific prior written permission.
51  *
52  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
53  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
55  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
56  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
58  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
59  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
60  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
61  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  *
64  *	@(#)diffreg.c   8.1 (Berkeley) 6/6/93
65  */
66 
67 #include <sys/cdefs.h>
68 __FBSDID("$FreeBSD$");
69 
70 #include <sys/capsicum.h>
71 #include <sys/procdesc.h>
72 #include <sys/stat.h>
73 #include <sys/types.h>
74 #include <sys/event.h>
75 #include <sys/wait.h>
76 
77 #include <capsicum_helpers.h>
78 #include <ctype.h>
79 #include <err.h>
80 #include <errno.h>
81 #include <fcntl.h>
82 #include <paths.h>
83 #include <regex.h>
84 #include <stddef.h>
85 #include <stdint.h>
86 #include <stdio.h>
87 #include <stdlib.h>
88 #include <string.h>
89 #include <unistd.h>
90 #include <limits.h>
91 #include <signal.h>
92 
93 #include "diff.h"
94 #include "xmalloc.h"
95 
96 #define _PATH_PR "/usr/bin/pr"
97 
98 /*
99  * diff - compare two files.
100  */
101 
102 /*
103  *	Uses an algorithm due to Harold Stone, which finds
104  *	a pair of longest identical subsequences in the two
105  *	files.
106  *
107  *	The major goal is to generate the match vector J.
108  *	J[i] is the index of the line in file1 corresponding
109  *	to line i file0. J[i] = 0 if there is no
110  *	such line in file1.
111  *
112  *	Lines are hashed so as to work in core. All potential
113  *	matches are located by sorting the lines of each file
114  *	on the hash (called ``value''). In particular, this
115  *	collects the equivalence classes in file1 together.
116  *	Subroutine equiv replaces the value of each line in
117  *	file0 by the index of the first element of its
118  *	matching equivalence in (the reordered) file1.
119  *	To save space equiv squeezes file1 into a single
120  *	array member in which the equivalence classes
121  *	are simply concatenated, except that their first
122  *	members are flagged by changing sign.
123  *
124  *	Next the indices that point into member are unsorted into
125  *	array class according to the original order of file0.
126  *
127  *	The cleverness lies in routine stone. This marches
128  *	through the lines of file0, developing a vector klist
129  *	of "k-candidates". At step i a k-candidate is a matched
130  *	pair of lines x,y (x in file0 y in file1) such that
131  *	there is a common subsequence of length k
132  *	between the first i lines of file0 and the first y
133  *	lines of file1, but there is no such subsequence for
134  *	any smaller y. x is the earliest possible mate to y
135  *	that occurs in such a subsequence.
136  *
137  *	Whenever any of the members of the equivalence class of
138  *	lines in file1 matable to a line in file0 has serial number
139  *	less than the y of some k-candidate, that k-candidate
140  *	with the smallest such y is replaced. The new
141  *	k-candidate is chained (via pred) to the current
142  *	k-1 candidate so that the actual subsequence can
143  *	be recovered. When a member has serial number greater
144  *	that the y of all k-candidates, the klist is extended.
145  *	At the end, the longest subsequence is pulled out
146  *	and placed in the array J by unravel
147  *
148  *	With J in hand, the matches there recorded are
149  *	check'ed against reality to assure that no spurious
150  *	matches have crept in due to hashing. If they have,
151  *	they are broken, and "jackpot" is recorded--a harmless
152  *	matter except that a true match for a spuriously
153  *	mated line may now be unnecessarily reported as a change.
154  *
155  *	Much of the complexity of the program comes simply
156  *	from trying to minimize core utilization and
157  *	maximize the range of doable problems by dynamically
158  *	allocating what is needed and reusing what is not.
159  *	The core requirements for problems larger than somewhat
160  *	are (in words) 2*length(file0) + length(file1) +
161  *	3*(number of k-candidates installed),  typically about
162  *	6n words for files of length n.
163  */
164 
165 struct cand {
166 	int	x;
167 	int	y;
168 	int	pred;
169 };
170 
171 static struct line {
172 	int	serial;
173 	int	value;
174 } *file[2];
175 
176 /*
177  * The following struct is used to record change information when
178  * doing a "context" or "unified" diff.  (see routine "change" to
179  * understand the highly mnemonic field names)
180  */
181 struct context_vec {
182 	int	a;		/* start line in old file */
183 	int	b;		/* end line in old file */
184 	int	c;		/* start line in new file */
185 	int	d;		/* end line in new file */
186 };
187 
188 #define	diff_output	printf
189 static FILE	*opentemp(const char *);
190 static void	 output(char *, FILE *, char *, FILE *, int);
191 static void	 check(FILE *, FILE *, int);
192 static void	 range(int, int, const char *);
193 static void	 uni_range(int, int);
194 static void	 dump_context_vec(FILE *, FILE *, int);
195 static void	 dump_unified_vec(FILE *, FILE *, int);
196 static void	 prepare(int, FILE *, size_t, int);
197 static void	 prune(void);
198 static void	 equiv(struct line *, int, struct line *, int, int *);
199 static void	 unravel(int);
200 static void	 unsort(struct line *, int, int *);
201 static void	 change(char *, FILE *, char *, FILE *, int, int, int, int, int *);
202 static void	 sort(struct line *, int);
203 static void	 print_header(const char *, const char *);
204 static int	 ignoreline(char *);
205 static int	 asciifile(FILE *);
206 static int	 fetch(long *, int, int, FILE *, int, int, int);
207 static int	 newcand(int, int, int);
208 static int	 search(int *, int, int);
209 static int	 skipline(FILE *);
210 static int	 isqrt(int);
211 static int	 stone(int *, int, int *, int *, int);
212 static int	 readhash(FILE *, int);
213 static int	 files_differ(FILE *, FILE *, int);
214 static char	*match_function(const long *, int, FILE *);
215 static char	*preadline(int, size_t, off_t);
216 
217 static int  *J;			/* will be overlaid on class */
218 static int  *class;		/* will be overlaid on file[0] */
219 static int  *klist;		/* will be overlaid on file[0] after class */
220 static int  *member;		/* will be overlaid on file[1] */
221 static int   clen;
222 static int   inifdef;		/* whether or not we are in a #ifdef block */
223 static int   len[2];
224 static int   pref, suff;	/* length of prefix and suffix */
225 static int   slen[2];
226 static int   anychange;
227 static long *ixnew;		/* will be overlaid on file[1] */
228 static long *ixold;		/* will be overlaid on klist */
229 static struct cand *clist;	/* merely a free storage pot for candidates */
230 static int   clistlen;		/* the length of clist */
231 static struct line *sfile[2];	/* shortened by pruning common prefix/suffix */
232 static u_char *chrtran;		/* translation table for case-folding */
233 static struct context_vec *context_vec_start;
234 static struct context_vec *context_vec_end;
235 static struct context_vec *context_vec_ptr;
236 
237 #define FUNCTION_CONTEXT_SIZE	55
238 static char lastbuf[FUNCTION_CONTEXT_SIZE];
239 static int lastline;
240 static int lastmatchline;
241 
242 
243 /*
244  * chrtran points to one of 2 translation tables: cup2low if folding upper to
245  * lower case clow2low if not folding case
246  */
247 static u_char clow2low[256] = {
248 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
249 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
250 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
251 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
252 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
253 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41,
254 	0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c,
255 	0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
256 	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62,
257 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
258 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
259 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
260 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
261 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
262 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
263 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
264 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
265 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
266 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
267 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
268 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
269 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
270 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
271 	0xfd, 0xfe, 0xff
272 };
273 
274 static u_char cup2low[256] = {
275 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
276 	0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15,
277 	0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
278 	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b,
279 	0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
280 	0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61,
281 	0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
282 	0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
283 	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62,
284 	0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d,
285 	0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
286 	0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83,
287 	0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e,
288 	0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99,
289 	0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4,
290 	0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
291 	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba,
292 	0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5,
293 	0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0,
294 	0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
295 	0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6,
296 	0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1,
297 	0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc,
298 	0xfd, 0xfe, 0xff
299 };
300 
301 int
302 diffreg(char *file1, char *file2, int flags, int capsicum)
303 {
304 	FILE *f1, *f2;
305 	int i, rval;
306 	int	ostdout = -1;
307 	int pr_pd, kq;
308 	struct kevent *e;
309 	cap_rights_t rights_ro;
310 
311 	e = NULL;
312 	kq = -1;
313 	f1 = f2 = NULL;
314 	rval = D_SAME;
315 	anychange = 0;
316 	lastline = 0;
317 	lastmatchline = 0;
318 	context_vec_ptr = context_vec_start - 1;
319 	if (flags & D_IGNORECASE)
320 		chrtran = cup2low;
321 	else
322 		chrtran = clow2low;
323 	if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode))
324 		return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2);
325 	if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0)
326 		goto closem;
327 
328 	if (flags & D_EMPTY1)
329 		f1 = fopen(_PATH_DEVNULL, "r");
330 	else {
331 		if (!S_ISREG(stb1.st_mode)) {
332 			if ((f1 = opentemp(file1)) == NULL ||
333 			    fstat(fileno(f1), &stb1) < 0) {
334 				warn("%s", file1);
335 				status |= 2;
336 				goto closem;
337 			}
338 		} else if (strcmp(file1, "-") == 0)
339 			f1 = stdin;
340 		else
341 			f1 = fopen(file1, "r");
342 	}
343 	if (f1 == NULL) {
344 		warn("%s", file1);
345 		status |= 2;
346 		goto closem;
347 	}
348 
349 	if (flags & D_EMPTY2)
350 		f2 = fopen(_PATH_DEVNULL, "r");
351 	else {
352 		if (!S_ISREG(stb2.st_mode)) {
353 			if ((f2 = opentemp(file2)) == NULL ||
354 			    fstat(fileno(f2), &stb2) < 0) {
355 				warn("%s", file2);
356 				status |= 2;
357 				goto closem;
358 			}
359 		} else if (strcmp(file2, "-") == 0)
360 			f2 = stdin;
361 		else
362 			f2 = fopen(file2, "r");
363 	}
364 	if (f2 == NULL) {
365 		warn("%s", file2);
366 		status |= 2;
367 		goto closem;
368 	}
369 
370 	if (lflag) {
371 		/* redirect stdout to pr */
372 		int	 pfd[2];
373 		pid_t	pid;
374 		char	*header;
375 
376 		xasprintf(&header, "%s %s %s", diffargs, file1, file2);
377 		signal(SIGPIPE, SIG_IGN);
378 		fflush(stdout);
379 		rewind(stdout);
380 		pipe(pfd);
381 		switch ((pid = pdfork(&pr_pd, PD_CLOEXEC))) {
382 		case -1:
383 			status |= 2;
384 			free(header);
385 			err(2, "No more processes");
386 		case 0:
387 			/* child */
388 			if (pfd[0] != STDIN_FILENO) {
389 				dup2(pfd[0], STDIN_FILENO);
390 				close(pfd[0]);
391 			}
392 			close(pfd[1]);
393 			execl(_PATH_PR, _PATH_PR, "-h", header, (char *)0);
394 			_exit(127);
395 		default:
396 
397 			/* parent */
398 			if (pfd[1] != STDOUT_FILENO) {
399 				ostdout = dup(STDOUT_FILENO);
400 				dup2(pfd[1], STDOUT_FILENO);
401 				close(pfd[1]);
402 			}
403 			close(pfd[0]);
404 			rewind(stdout);
405 			free(header);
406 			kq = kqueue();
407 			if (kq == -1)
408 				err(2, "kqueue");
409 			e = xmalloc(sizeof(struct kevent));
410 			EV_SET(e, pr_pd, EVFILT_PROCDESC, EV_ADD, NOTE_EXIT, 0,
411 			    NULL);
412 			if (kevent(kq, e, 1, NULL, 0, NULL) == -1)
413 				err(2, "kevent");
414 		}
415 	}
416 
417 	if (capsicum) {
418 		cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK);
419 		if (cap_rights_limit(fileno(f1), &rights_ro) < 0
420 		    && errno != ENOSYS)
421 			err(2, "unable to limit rights on: %s", file1);
422 		if (cap_rights_limit(fileno(f2), &rights_ro) < 0 &&
423 		    errno != ENOSYS)
424 			err(2, "unable to limit rights on: %s", file2);
425 		if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) {
426 			/* stding has already been limited */
427 			if (caph_limit_stderr() == -1)
428 				err(2, "unable to limit stderr");
429 			if (caph_limit_stdout() == -1)
430 				err(2, "unable to limit stdout");
431 		} else if (caph_limit_stdio() == -1)
432 				err(2, "unable to limit stdio");
433 
434 		caph_cache_catpages();
435 		caph_cache_tzdata();
436 		if (cap_enter() < 0 && errno != ENOSYS)
437 			err(2, "unable to enter capability mode");
438 	}
439 
440 	switch (files_differ(f1, f2, flags)) {
441 	case 0:
442 		goto closem;
443 	case 1:
444 		break;
445 	default:
446 		/* error */
447 		status |= 2;
448 		goto closem;
449 	}
450 
451 	if ((flags & D_FORCEASCII) == 0 &&
452 	    (!asciifile(f1) || !asciifile(f2))) {
453 		rval = D_BINARY;
454 		status |= 1;
455 		goto closem;
456 	}
457 	prepare(0, f1, stb1.st_size, flags);
458 	prepare(1, f2, stb2.st_size, flags);
459 
460 	prune();
461 	sort(sfile[0], slen[0]);
462 	sort(sfile[1], slen[1]);
463 
464 	member = (int *)file[1];
465 	equiv(sfile[0], slen[0], sfile[1], slen[1], member);
466 	member = xreallocarray(member, slen[1] + 2, sizeof(*member));
467 
468 	class = (int *)file[0];
469 	unsort(sfile[0], slen[0], class);
470 	class = xreallocarray(class, slen[0] + 2, sizeof(*class));
471 
472 	klist = xcalloc(slen[0] + 2, sizeof(*klist));
473 	clen = 0;
474 	clistlen = 100;
475 	clist = xcalloc(clistlen, sizeof(*clist));
476 	i = stone(class, slen[0], member, klist, flags);
477 	free(member);
478 	free(class);
479 
480 	J = xreallocarray(J, len[0] + 2, sizeof(*J));
481 	unravel(klist[i]);
482 	free(clist);
483 	free(klist);
484 
485 	ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold));
486 	ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew));
487 	check(f1, f2, flags);
488 	output(file1, f1, file2, f2, flags);
489 	if (ostdout != -1 && e != NULL) {
490 		/* close the pipe to pr and restore stdout */
491 		int wstatus;
492 
493 		fflush(stdout);
494 		if (ostdout != STDOUT_FILENO) {
495 			close(STDOUT_FILENO);
496 			dup2(ostdout, STDOUT_FILENO);
497 			close(ostdout);
498 		}
499 		if (kevent(kq, NULL, 0, e, 1, NULL) == -1)
500 			err(2, "kevent");
501 		wstatus = e[0].data;
502 		close(kq);
503 		if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0)
504 			errx(2, "pr exited abnormally");
505 		else if (WIFSIGNALED(wstatus))
506 			errx(2, "pr killed by signal %d",
507 			    WTERMSIG(wstatus));
508 	}
509 
510 closem:
511 	if (anychange) {
512 		status |= 1;
513 		if (rval == D_SAME)
514 			rval = D_DIFFER;
515 	}
516 	if (f1 != NULL)
517 		fclose(f1);
518 	if (f2 != NULL)
519 		fclose(f2);
520 
521 	return (rval);
522 }
523 
524 /*
525  * Check to see if the given files differ.
526  * Returns 0 if they are the same, 1 if different, and -1 on error.
527  * XXX - could use code from cmp(1) [faster]
528  */
529 static int
530 files_differ(FILE *f1, FILE *f2, int flags)
531 {
532 	char buf1[BUFSIZ], buf2[BUFSIZ];
533 	size_t i, j;
534 
535 	if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size ||
536 	    (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT))
537 		return (1);
538 	for (;;) {
539 		i = fread(buf1, 1, sizeof(buf1), f1);
540 		j = fread(buf2, 1, sizeof(buf2), f2);
541 		if ((!i && ferror(f1)) || (!j && ferror(f2)))
542 			return (-1);
543 		if (i != j)
544 			return (1);
545 		if (i == 0)
546 			return (0);
547 		if (memcmp(buf1, buf2, i) != 0)
548 			return (1);
549 	}
550 }
551 
552 static FILE *
553 opentemp(const char *f)
554 {
555 	char buf[BUFSIZ], tempfile[PATH_MAX];
556 	ssize_t nread;
557 	int ifd, ofd;
558 
559 	if (strcmp(f, "-") == 0)
560 		ifd = STDIN_FILENO;
561 	else if ((ifd = open(f, O_RDONLY, 0644)) < 0)
562 		return (NULL);
563 
564 	(void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile));
565 
566 	if ((ofd = mkstemp(tempfile)) < 0) {
567 		close(ifd);
568 		return (NULL);
569 	}
570 	unlink(tempfile);
571 	while ((nread = read(ifd, buf, BUFSIZ)) > 0) {
572 		if (write(ofd, buf, nread) != nread) {
573 			close(ifd);
574 			close(ofd);
575 			return (NULL);
576 		}
577 	}
578 	close(ifd);
579 	lseek(ofd, (off_t)0, SEEK_SET);
580 	return (fdopen(ofd, "r"));
581 }
582 
583 char *
584 splice(char *dir, char *path)
585 {
586 	char *tail, *buf;
587 	size_t dirlen;
588 
589 	dirlen = strlen(dir);
590 	while (dirlen != 0 && dir[dirlen - 1] == '/')
591 	    dirlen--;
592 	if ((tail = strrchr(path, '/')) == NULL)
593 		tail = path;
594 	else
595 		tail++;
596 	xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail);
597 	return (buf);
598 }
599 
600 static void
601 prepare(int i, FILE *fd, size_t filesize, int flags)
602 {
603 	struct line *p;
604 	int h;
605 	size_t sz, j;
606 
607 	rewind(fd);
608 
609 	sz = MIN(filesize, SIZE_MAX) / 25;
610 	if (sz < 100)
611 		sz = 100;
612 
613 	p = xcalloc(sz + 3, sizeof(*p));
614 	for (j = 0; (h = readhash(fd, flags));) {
615 		if (j == sz) {
616 			sz = sz * 3 / 2;
617 			p = xreallocarray(p, sz + 3, sizeof(*p));
618 		}
619 		p[++j].value = h;
620 	}
621 	len[i] = j;
622 	file[i] = p;
623 }
624 
625 static void
626 prune(void)
627 {
628 	int i, j;
629 
630 	for (pref = 0; pref < len[0] && pref < len[1] &&
631 	    file[0][pref + 1].value == file[1][pref + 1].value;
632 	    pref++)
633 		;
634 	for (suff = 0; suff < len[0] - pref && suff < len[1] - pref &&
635 	    file[0][len[0] - suff].value == file[1][len[1] - suff].value;
636 	    suff++)
637 		;
638 	for (j = 0; j < 2; j++) {
639 		sfile[j] = file[j] + pref;
640 		slen[j] = len[j] - pref - suff;
641 		for (i = 0; i <= slen[j]; i++)
642 			sfile[j][i].serial = i;
643 	}
644 }
645 
646 static void
647 equiv(struct line *a, int n, struct line *b, int m, int *c)
648 {
649 	int i, j;
650 
651 	i = j = 1;
652 	while (i <= n && j <= m) {
653 		if (a[i].value < b[j].value)
654 			a[i++].value = 0;
655 		else if (a[i].value == b[j].value)
656 			a[i++].value = j;
657 		else
658 			j++;
659 	}
660 	while (i <= n)
661 		a[i++].value = 0;
662 	b[m + 1].value = 0;
663 	j = 0;
664 	while (++j <= m) {
665 		c[j] = -b[j].serial;
666 		while (b[j + 1].value == b[j].value) {
667 			j++;
668 			c[j] = b[j].serial;
669 		}
670 	}
671 	c[j] = -1;
672 }
673 
674 /* Code taken from ping.c */
675 static int
676 isqrt(int n)
677 {
678 	int y, x = 1;
679 
680 	if (n == 0)
681 		return (0);
682 
683 	do { /* newton was a stinker */
684 		y = x;
685 		x = n / x;
686 		x += y;
687 		x /= 2;
688 	} while ((x - y) > 1 || (x - y) < -1);
689 
690 	return (x);
691 }
692 
693 static int
694 stone(int *a, int n, int *b, int *c, int flags)
695 {
696 	int i, k, y, j, l;
697 	int oldc, tc, oldl, sq;
698 	u_int numtries, bound;
699 
700 	if (flags & D_MINIMAL)
701 		bound = UINT_MAX;
702 	else {
703 		sq = isqrt(n);
704 		bound = MAX(256, sq);
705 	}
706 
707 	k = 0;
708 	c[0] = newcand(0, 0, 0);
709 	for (i = 1; i <= n; i++) {
710 		j = a[i];
711 		if (j == 0)
712 			continue;
713 		y = -b[j];
714 		oldl = 0;
715 		oldc = c[0];
716 		numtries = 0;
717 		do {
718 			if (y <= clist[oldc].y)
719 				continue;
720 			l = search(c, k, y);
721 			if (l != oldl + 1)
722 				oldc = c[l - 1];
723 			if (l <= k) {
724 				if (clist[c[l]].y <= y)
725 					continue;
726 				tc = c[l];
727 				c[l] = newcand(i, y, oldc);
728 				oldc = tc;
729 				oldl = l;
730 				numtries++;
731 			} else {
732 				c[l] = newcand(i, y, oldc);
733 				k++;
734 				break;
735 			}
736 		} while ((y = b[++j]) > 0 && numtries < bound);
737 	}
738 	return (k);
739 }
740 
741 static int
742 newcand(int x, int y, int pred)
743 {
744 	struct cand *q;
745 
746 	if (clen == clistlen) {
747 		clistlen = clistlen * 11 / 10;
748 		clist = xreallocarray(clist, clistlen, sizeof(*clist));
749 	}
750 	q = clist + clen;
751 	q->x = x;
752 	q->y = y;
753 	q->pred = pred;
754 	return (clen++);
755 }
756 
757 static int
758 search(int *c, int k, int y)
759 {
760 	int i, j, l, t;
761 
762 	if (clist[c[k]].y < y)	/* quick look for typical case */
763 		return (k + 1);
764 	i = 0;
765 	j = k + 1;
766 	for (;;) {
767 		l = (i + j) / 2;
768 		if (l <= i)
769 			break;
770 		t = clist[c[l]].y;
771 		if (t > y)
772 			j = l;
773 		else if (t < y)
774 			i = l;
775 		else
776 			return (l);
777 	}
778 	return (l + 1);
779 }
780 
781 static void
782 unravel(int p)
783 {
784 	struct cand *q;
785 	int i;
786 
787 	for (i = 0; i <= len[0]; i++)
788 		J[i] = i <= pref ? i :
789 		    i > len[0] - suff ? i + len[1] - len[0] : 0;
790 	for (q = clist + p; q->y != 0; q = clist + q->pred)
791 		J[q->x + pref] = q->y + pref;
792 }
793 
794 /*
795  * Check does double duty:
796  *  1.	ferret out any fortuitous correspondences due
797  *	to confounding by hashing (which result in "jackpot")
798  *  2.  collect random access indexes to the two files
799  */
800 static void
801 check(FILE *f1, FILE *f2, int flags)
802 {
803 	int i, j, jackpot, c, d;
804 	long ctold, ctnew;
805 
806 	rewind(f1);
807 	rewind(f2);
808 	j = 1;
809 	ixold[0] = ixnew[0] = 0;
810 	jackpot = 0;
811 	ctold = ctnew = 0;
812 	for (i = 1; i <= len[0]; i++) {
813 		if (J[i] == 0) {
814 			ixold[i] = ctold += skipline(f1);
815 			continue;
816 		}
817 		while (j < J[i]) {
818 			ixnew[j] = ctnew += skipline(f2);
819 			j++;
820 		}
821 		if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE|D_STRIPCR)) {
822 			for (;;) {
823 				c = getc(f1);
824 				d = getc(f2);
825 				/*
826 				 * GNU diff ignores a missing newline
827 				 * in one file for -b or -w.
828 				 */
829 				if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) {
830 					if (c == EOF && d == '\n') {
831 						ctnew++;
832 						break;
833 					} else if (c == '\n' && d == EOF) {
834 						ctold++;
835 						break;
836 					}
837 				}
838 				ctold++;
839 				ctnew++;
840 				if (flags & D_STRIPCR) {
841 					if (c == '\r') {
842 						if ((c = getc(f1)) == '\n') {
843 							ctnew++;
844 							break;
845 						}
846 					}
847 					if (d == '\r') {
848 						if ((d = getc(f2)) == '\n') {
849 							ctold++;
850 							break;
851 						}
852 					}
853 				}
854 				if ((flags & D_FOLDBLANKS) && isspace(c) &&
855 				    isspace(d)) {
856 					do {
857 						if (c == '\n')
858 							break;
859 						ctold++;
860 					} while (isspace(c = getc(f1)));
861 					do {
862 						if (d == '\n')
863 							break;
864 						ctnew++;
865 					} while (isspace(d = getc(f2)));
866 				} else if ((flags & D_IGNOREBLANKS)) {
867 					while (isspace(c) && c != '\n') {
868 						c = getc(f1);
869 						ctold++;
870 					}
871 					while (isspace(d) && d != '\n') {
872 						d = getc(f2);
873 						ctnew++;
874 					}
875 				}
876 				if (chrtran[c] != chrtran[d]) {
877 					jackpot++;
878 					J[i] = 0;
879 					if (c != '\n' && c != EOF)
880 						ctold += skipline(f1);
881 					if (d != '\n' && c != EOF)
882 						ctnew += skipline(f2);
883 					break;
884 				}
885 				if (c == '\n' || c == EOF)
886 					break;
887 			}
888 		} else {
889 			for (;;) {
890 				ctold++;
891 				ctnew++;
892 				if ((c = getc(f1)) != (d = getc(f2))) {
893 					/* jackpot++; */
894 					J[i] = 0;
895 					if (c != '\n' && c != EOF)
896 						ctold += skipline(f1);
897 					if (d != '\n' && c != EOF)
898 						ctnew += skipline(f2);
899 					break;
900 				}
901 				if (c == '\n' || c == EOF)
902 					break;
903 			}
904 		}
905 		ixold[i] = ctold;
906 		ixnew[j] = ctnew;
907 		j++;
908 	}
909 	for (; j <= len[1]; j++) {
910 		ixnew[j] = ctnew += skipline(f2);
911 	}
912 	/*
913 	 * if (jackpot)
914 	 *	fprintf(stderr, "jackpot\n");
915 	 */
916 }
917 
918 /* shellsort CACM #201 */
919 static void
920 sort(struct line *a, int n)
921 {
922 	struct line *ai, *aim, w;
923 	int j, m = 0, k;
924 
925 	if (n == 0)
926 		return;
927 	for (j = 1; j <= n; j *= 2)
928 		m = 2 * j - 1;
929 	for (m /= 2; m != 0; m /= 2) {
930 		k = n - m;
931 		for (j = 1; j <= k; j++) {
932 			for (ai = &a[j]; ai > a; ai -= m) {
933 				aim = &ai[m];
934 				if (aim < ai)
935 					break;	/* wraparound */
936 				if (aim->value > ai[0].value ||
937 				    (aim->value == ai[0].value &&
938 					aim->serial > ai[0].serial))
939 					break;
940 				w.value = ai[0].value;
941 				ai[0].value = aim->value;
942 				aim->value = w.value;
943 				w.serial = ai[0].serial;
944 				ai[0].serial = aim->serial;
945 				aim->serial = w.serial;
946 			}
947 		}
948 	}
949 }
950 
951 static void
952 unsort(struct line *f, int l, int *b)
953 {
954 	int *a, i;
955 
956 	a = xcalloc(l + 1, sizeof(*a));
957 	for (i = 1; i <= l; i++)
958 		a[f[i].serial] = f[i].value;
959 	for (i = 1; i <= l; i++)
960 		b[i] = a[i];
961 	free(a);
962 }
963 
964 static int
965 skipline(FILE *f)
966 {
967 	int i, c;
968 
969 	for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++)
970 		continue;
971 	return (i);
972 }
973 
974 static void
975 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags)
976 {
977 	int m, i0, i1, j0, j1;
978 
979 	rewind(f1);
980 	rewind(f2);
981 	m = len[0];
982 	J[0] = 0;
983 	J[m + 1] = len[1] + 1;
984 	if (diff_format != D_EDIT) {
985 		for (i0 = 1; i0 <= m; i0 = i1 + 1) {
986 			while (i0 <= m && J[i0] == J[i0 - 1] + 1)
987 				i0++;
988 			j0 = J[i0 - 1] + 1;
989 			i1 = i0 - 1;
990 			while (i1 < m && J[i1 + 1] == 0)
991 				i1++;
992 			j1 = J[i1 + 1] - 1;
993 			J[i1] = j1;
994 			change(file1, f1, file2, f2, i0, i1, j0, j1, &flags);
995 		}
996 	} else {
997 		for (i0 = m; i0 >= 1; i0 = i1 - 1) {
998 			while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0)
999 				i0--;
1000 			j0 = J[i0 + 1] - 1;
1001 			i1 = i0 + 1;
1002 			while (i1 > 1 && J[i1 - 1] == 0)
1003 				i1--;
1004 			j1 = J[i1 - 1] + 1;
1005 			J[i1] = j1;
1006 			change(file1, f1, file2, f2, i1, i0, j1, j0, &flags);
1007 		}
1008 	}
1009 	if (m == 0)
1010 		change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags);
1011 	if (diff_format == D_IFDEF || diff_format == D_GFORMAT) {
1012 		for (;;) {
1013 #define	c i0
1014 			if ((c = getc(f1)) == EOF)
1015 				return;
1016 			diff_output("%c", c);
1017 		}
1018 #undef c
1019 	}
1020 	if (anychange != 0) {
1021 		if (diff_format == D_CONTEXT)
1022 			dump_context_vec(f1, f2, flags);
1023 		else if (diff_format == D_UNIFIED)
1024 			dump_unified_vec(f1, f2, flags);
1025 	}
1026 }
1027 
1028 static void
1029 range(int a, int b, const char *separator)
1030 {
1031 	diff_output("%d", a > b ? b : a);
1032 	if (a < b)
1033 		diff_output("%s%d", separator, b);
1034 }
1035 
1036 static void
1037 uni_range(int a, int b)
1038 {
1039 	if (a < b)
1040 		diff_output("%d,%d", a, b - a + 1);
1041 	else if (a == b)
1042 		diff_output("%d", b);
1043 	else
1044 		diff_output("%d,0", b);
1045 }
1046 
1047 static char *
1048 preadline(int fd, size_t rlen, off_t off)
1049 {
1050 	char *line;
1051 	ssize_t nr;
1052 
1053 	line = xmalloc(rlen + 1);
1054 	if ((nr = pread(fd, line, rlen, off)) < 0)
1055 		err(2, "preadline");
1056 	if (nr > 0 && line[nr-1] == '\n')
1057 		nr--;
1058 	line[nr] = '\0';
1059 	return (line);
1060 }
1061 
1062 static int
1063 ignoreline(char *line)
1064 {
1065 	int ret;
1066 
1067 	ret = regexec(&ignore_re, line, 0, NULL, 0);
1068 	free(line);
1069 	return (ret == 0);	/* if it matched, it should be ignored. */
1070 }
1071 
1072 /*
1073  * Indicate that there is a difference between lines a and b of the from file
1074  * to get to lines c to d of the to file.  If a is greater then b then there
1075  * are no lines in the from file involved and this means that there were
1076  * lines appended (beginning at b).  If c is greater than d then there are
1077  * lines missing from the to file.
1078  */
1079 static void
1080 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d,
1081     int *pflags)
1082 {
1083 	static size_t max_context = 64;
1084 	long curpos;
1085 	int i, nc;
1086 	const char *walk;
1087 
1088 restart:
1089 	if ((diff_format != D_IFDEF || diff_format == D_GFORMAT) &&
1090 	    a > b && c > d)
1091 		return;
1092 	if (ignore_pats != NULL) {
1093 		char *line;
1094 		/*
1095 		 * All lines in the change, insert, or delete must
1096 		 * match an ignore pattern for the change to be
1097 		 * ignored.
1098 		 */
1099 		if (a <= b) {		/* Changes and deletes. */
1100 			for (i = a; i <= b; i++) {
1101 				line = preadline(fileno(f1),
1102 				    ixold[i] - ixold[i - 1], ixold[i - 1]);
1103 				if (!ignoreline(line))
1104 					goto proceed;
1105 			}
1106 		}
1107 		if (a > b || c <= d) {	/* Changes and inserts. */
1108 			for (i = c; i <= d; i++) {
1109 				line = preadline(fileno(f2),
1110 				    ixnew[i] - ixnew[i - 1], ixnew[i - 1]);
1111 				if (!ignoreline(line))
1112 					goto proceed;
1113 			}
1114 		}
1115 		return;
1116 	}
1117 proceed:
1118 	if (*pflags & D_HEADER) {
1119 		diff_output("%s %s %s\n", diffargs, file1, file2);
1120 		*pflags &= ~D_HEADER;
1121 	}
1122 	if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) {
1123 		/*
1124 		 * Allocate change records as needed.
1125 		 */
1126 		if (context_vec_ptr == context_vec_end - 1) {
1127 			ptrdiff_t offset = context_vec_ptr - context_vec_start;
1128 			max_context <<= 1;
1129 			context_vec_start = xreallocarray(context_vec_start,
1130 			    max_context, sizeof(*context_vec_start));
1131 			context_vec_end = context_vec_start + max_context;
1132 			context_vec_ptr = context_vec_start + offset;
1133 		}
1134 		if (anychange == 0) {
1135 			/*
1136 			 * Print the context/unidiff header first time through.
1137 			 */
1138 			print_header(file1, file2);
1139 			anychange = 1;
1140 		} else if (a > context_vec_ptr->b + (2 * diff_context) + 1 &&
1141 		    c > context_vec_ptr->d + (2 * diff_context) + 1) {
1142 			/*
1143 			 * If this change is more than 'diff_context' lines from the
1144 			 * previous change, dump the record and reset it.
1145 			 */
1146 			if (diff_format == D_CONTEXT)
1147 				dump_context_vec(f1, f2, *pflags);
1148 			else
1149 				dump_unified_vec(f1, f2, *pflags);
1150 		}
1151 		context_vec_ptr++;
1152 		context_vec_ptr->a = a;
1153 		context_vec_ptr->b = b;
1154 		context_vec_ptr->c = c;
1155 		context_vec_ptr->d = d;
1156 		return;
1157 	}
1158 	if (anychange == 0)
1159 		anychange = 1;
1160 	switch (diff_format) {
1161 	case D_BRIEF:
1162 		return;
1163 	case D_NORMAL:
1164 	case D_EDIT:
1165 		range(a, b, ",");
1166 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1167 		if (diff_format == D_NORMAL)
1168 			range(c, d, ",");
1169 		diff_output("\n");
1170 		break;
1171 	case D_REVERSE:
1172 		diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c');
1173 		range(a, b, " ");
1174 		diff_output("\n");
1175 		break;
1176 	case D_NREVERSE:
1177 		if (a > b)
1178 			diff_output("a%d %d\n", b, d - c + 1);
1179 		else {
1180 			diff_output("d%d %d\n", a, b - a + 1);
1181 			if (!(c > d))
1182 				/* add changed lines */
1183 				diff_output("a%d %d\n", b, d - c + 1);
1184 		}
1185 		break;
1186 	}
1187 	if (diff_format == D_GFORMAT) {
1188 		curpos = ftell(f1);
1189 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1190 		nc = ixold[a > b ? b : a - 1] - curpos;
1191 		for (i = 0; i < nc; i++)
1192 			diff_output("%c", getc(f1));
1193 		for (walk = group_format; *walk != '\0'; walk++) {
1194 			if (*walk == '%') {
1195 				walk++;
1196 				switch (*walk) {
1197 				case '<':
1198 					fetch(ixold, a, b, f1, '<', 1, *pflags);
1199 					break;
1200 				case '>':
1201 					fetch(ixnew, c, d, f2, '>', 0, *pflags);
1202 					break;
1203 				default:
1204 					diff_output("%%%c", *walk);
1205 					break;
1206 				}
1207 				continue;
1208 			}
1209 			diff_output("%c", *walk);
1210 		}
1211 	}
1212 	if (diff_format == D_NORMAL || diff_format == D_IFDEF) {
1213 		fetch(ixold, a, b, f1, '<', 1, *pflags);
1214 		if (a <= b && c <= d && diff_format == D_NORMAL)
1215 			diff_output("---\n");
1216 	}
1217 	if (diff_format != D_GFORMAT)
1218 		i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags);
1219 	if (i != 0 && diff_format == D_EDIT) {
1220 		/*
1221 		 * A non-zero return value for D_EDIT indicates that the
1222 		 * last line printed was a bare dot (".") that has been
1223 		 * escaped as ".." to prevent ed(1) from misinterpreting
1224 		 * it.  We have to add a substitute command to change this
1225 		 * back and restart where we left off.
1226 		 */
1227 		diff_output(".\n");
1228 		diff_output("%ds/.//\n", a + i - 1);
1229 		b = a + i - 1;
1230 		a = b + 1;
1231 		c += i;
1232 		goto restart;
1233 	}
1234 	if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d)
1235 		diff_output(".\n");
1236 	if (inifdef) {
1237 		diff_output("#endif /* %s */\n", ifdefname);
1238 		inifdef = 0;
1239 	}
1240 }
1241 
1242 static int
1243 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags)
1244 {
1245 	int i, j, c, lastc, col, nc;
1246 	int	newcol;
1247 
1248 	/*
1249 	 * When doing #ifdef's, copy down to current line
1250 	 * if this is the first file, so that stuff makes it to output.
1251 	 */
1252 	if ((diff_format == D_IFDEF) && oldfile) {
1253 		long curpos = ftell(lb);
1254 		/* print through if append (a>b), else to (nb: 0 vs 1 orig) */
1255 		nc = f[a > b ? b : a - 1] - curpos;
1256 		for (i = 0; i < nc; i++)
1257 			diff_output("%c", getc(lb));
1258 	}
1259 	if (a > b)
1260 		return (0);
1261 	if (diff_format == D_IFDEF) {
1262 		if (inifdef) {
1263 			diff_output("#else /* %s%s */\n",
1264 			    oldfile == 1 ? "!" : "", ifdefname);
1265 		} else {
1266 			if (oldfile)
1267 				diff_output("#ifndef %s\n", ifdefname);
1268 			else
1269 				diff_output("#ifdef %s\n", ifdefname);
1270 		}
1271 		inifdef = 1 + oldfile;
1272 	}
1273 	for (i = a; i <= b; i++) {
1274 		fseek(lb, f[i - 1], SEEK_SET);
1275 		nc = f[i] - f[i - 1];
1276 		if ((diff_format != D_IFDEF && diff_format != D_GFORMAT) &&
1277 		    ch != '\0') {
1278 			diff_output("%c", ch);
1279 			if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT
1280 			    || diff_format == D_UNIFIED))
1281 				diff_output("\t");
1282 			else if (diff_format != D_UNIFIED)
1283 				diff_output(" ");
1284 		}
1285 		col = 0;
1286 		for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) {
1287 			if ((c = getc(lb)) == EOF) {
1288 				if (diff_format == D_EDIT || diff_format == D_REVERSE ||
1289 				    diff_format == D_NREVERSE)
1290 					warnx("No newline at end of file");
1291 				else
1292 					diff_output("\n\\ No newline at end of "
1293 					    "file\n");
1294 				return (0);
1295 			}
1296 			if (c == '\t' && (flags & D_EXPANDTABS)) {
1297 				newcol = ((col/tabsize)+1)*tabsize;
1298 				do {
1299 					diff_output(" ");
1300 				} while (++col < newcol);
1301 			} else {
1302 				if (diff_format == D_EDIT && j == 1 && c == '\n'
1303 				    && lastc == '.') {
1304 					/*
1305 					 * Don't print a bare "." line
1306 					 * since that will confuse ed(1).
1307 					 * Print ".." instead and return,
1308 					 * giving the caller an offset
1309 					 * from which to restart.
1310 					 */
1311 					diff_output(".\n");
1312 					return (i - a + 1);
1313 				}
1314 				diff_output("%c", c);
1315 				col++;
1316 			}
1317 		}
1318 	}
1319 	return (0);
1320 }
1321 
1322 /*
1323  * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
1324  */
1325 static int
1326 readhash(FILE *f, int flags)
1327 {
1328 	int i, t, space;
1329 	int sum;
1330 
1331 	sum = 1;
1332 	space = 0;
1333 	if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) {
1334 		if (flags & D_IGNORECASE)
1335 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1336 				if (flags & D_STRIPCR && t == '\r') {
1337 					t = getc(f);
1338 					if (t == '\n')
1339 						break;
1340 					ungetc(t, f);
1341 				}
1342 				if (t == EOF) {
1343 					if (i == 0)
1344 						return (0);
1345 					break;
1346 				}
1347 				sum = sum * 127 + chrtran[t];
1348 			}
1349 		else
1350 			for (i = 0; (t = getc(f)) != '\n'; i++) {
1351 				if (flags & D_STRIPCR && t == '\r') {
1352 					t = getc(f);
1353 					if (t == '\n')
1354 						break;
1355 					ungetc(t, f);
1356 				}
1357 				if (t == EOF) {
1358 					if (i == 0)
1359 						return (0);
1360 					break;
1361 				}
1362 				sum = sum * 127 + t;
1363 			}
1364 	} else {
1365 		for (i = 0;;) {
1366 			switch (t = getc(f)) {
1367 			case '\r':
1368 			case '\t':
1369 			case '\v':
1370 			case '\f':
1371 			case ' ':
1372 				space++;
1373 				continue;
1374 			default:
1375 				if (space && (flags & D_IGNOREBLANKS) == 0) {
1376 					i++;
1377 					space = 0;
1378 				}
1379 				sum = sum * 127 + chrtran[t];
1380 				i++;
1381 				continue;
1382 			case EOF:
1383 				if (i == 0)
1384 					return (0);
1385 				/* FALLTHROUGH */
1386 			case '\n':
1387 				break;
1388 			}
1389 			break;
1390 		}
1391 	}
1392 	/*
1393 	 * There is a remote possibility that we end up with a zero sum.
1394 	 * Zero is used as an EOF marker, so return 1 instead.
1395 	 */
1396 	return (sum == 0 ? 1 : sum);
1397 }
1398 
1399 static int
1400 asciifile(FILE *f)
1401 {
1402 	unsigned char buf[BUFSIZ];
1403 	size_t cnt;
1404 
1405 	if (f == NULL)
1406 		return (1);
1407 
1408 	rewind(f);
1409 	cnt = fread(buf, 1, sizeof(buf), f);
1410 	return (memchr(buf, '\0', cnt) == NULL);
1411 }
1412 
1413 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0)
1414 
1415 static char *
1416 match_function(const long *f, int pos, FILE *fp)
1417 {
1418 	unsigned char buf[FUNCTION_CONTEXT_SIZE];
1419 	size_t nc;
1420 	int last = lastline;
1421 	const char *state = NULL;
1422 
1423 	lastline = pos;
1424 	while (pos > last) {
1425 		fseek(fp, f[pos - 1], SEEK_SET);
1426 		nc = f[pos] - f[pos - 1];
1427 		if (nc >= sizeof(buf))
1428 			nc = sizeof(buf) - 1;
1429 		nc = fread(buf, 1, nc, fp);
1430 		if (nc > 0) {
1431 			buf[nc] = '\0';
1432 			buf[strcspn(buf, "\n")] = '\0';
1433 			if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') {
1434 				if (begins_with(buf, "private:")) {
1435 					if (!state)
1436 						state = " (private)";
1437 				} else if (begins_with(buf, "protected:")) {
1438 					if (!state)
1439 						state = " (protected)";
1440 				} else if (begins_with(buf, "public:")) {
1441 					if (!state)
1442 						state = " (public)";
1443 				} else {
1444 					strlcpy(lastbuf, buf, sizeof lastbuf);
1445 					if (state)
1446 						strlcat(lastbuf, state,
1447 						    sizeof lastbuf);
1448 					lastmatchline = pos;
1449 					return lastbuf;
1450 				}
1451 			}
1452 		}
1453 		pos--;
1454 	}
1455 	return lastmatchline > 0 ? lastbuf : NULL;
1456 }
1457 
1458 /* dump accumulated "context" diff changes */
1459 static void
1460 dump_context_vec(FILE *f1, FILE *f2, int flags)
1461 {
1462 	struct context_vec *cvp = context_vec_start;
1463 	int lowa, upb, lowc, upd, do_output;
1464 	int a, b, c, d;
1465 	char ch, *f;
1466 
1467 	if (context_vec_start > context_vec_ptr)
1468 		return;
1469 
1470 	b = d = 0;		/* gcc */
1471 	lowa = MAX(1, cvp->a - diff_context);
1472 	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1473 	lowc = MAX(1, cvp->c - diff_context);
1474 	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1475 
1476 	diff_output("***************");
1477 	if ((flags & D_PROTOTYPE)) {
1478 		f = match_function(ixold, lowa-1, f1);
1479 		if (f != NULL)
1480 			diff_output(" %s", f);
1481 	}
1482 	diff_output("\n*** ");
1483 	range(lowa, upb, ",");
1484 	diff_output(" ****\n");
1485 
1486 	/*
1487 	 * Output changes to the "old" file.  The first loop suppresses
1488 	 * output if there were no changes to the "old" file (we'll see
1489 	 * the "old" lines as context in the "new" list).
1490 	 */
1491 	do_output = 0;
1492 	for (; cvp <= context_vec_ptr; cvp++)
1493 		if (cvp->a <= cvp->b) {
1494 			cvp = context_vec_start;
1495 			do_output++;
1496 			break;
1497 		}
1498 	if (do_output) {
1499 		while (cvp <= context_vec_ptr) {
1500 			a = cvp->a;
1501 			b = cvp->b;
1502 			c = cvp->c;
1503 			d = cvp->d;
1504 
1505 			if (a <= b && c <= d)
1506 				ch = 'c';
1507 			else
1508 				ch = (a <= b) ? 'd' : 'a';
1509 
1510 			if (ch == 'a')
1511 				fetch(ixold, lowa, b, f1, ' ', 0, flags);
1512 			else {
1513 				fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1514 				fetch(ixold, a, b, f1,
1515 				    ch == 'c' ? '!' : '-', 0, flags);
1516 			}
1517 			lowa = b + 1;
1518 			cvp++;
1519 		}
1520 		fetch(ixold, b + 1, upb, f1, ' ', 0, flags);
1521 	}
1522 	/* output changes to the "new" file */
1523 	diff_output("--- ");
1524 	range(lowc, upd, ",");
1525 	diff_output(" ----\n");
1526 
1527 	do_output = 0;
1528 	for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++)
1529 		if (cvp->c <= cvp->d) {
1530 			cvp = context_vec_start;
1531 			do_output++;
1532 			break;
1533 		}
1534 	if (do_output) {
1535 		while (cvp <= context_vec_ptr) {
1536 			a = cvp->a;
1537 			b = cvp->b;
1538 			c = cvp->c;
1539 			d = cvp->d;
1540 
1541 			if (a <= b && c <= d)
1542 				ch = 'c';
1543 			else
1544 				ch = (a <= b) ? 'd' : 'a';
1545 
1546 			if (ch == 'd')
1547 				fetch(ixnew, lowc, d, f2, ' ', 0, flags);
1548 			else {
1549 				fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1550 				fetch(ixnew, c, d, f2,
1551 				    ch == 'c' ? '!' : '+', 0, flags);
1552 			}
1553 			lowc = d + 1;
1554 			cvp++;
1555 		}
1556 		fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1557 	}
1558 	context_vec_ptr = context_vec_start - 1;
1559 }
1560 
1561 /* dump accumulated "unified" diff changes */
1562 static void
1563 dump_unified_vec(FILE *f1, FILE *f2, int flags)
1564 {
1565 	struct context_vec *cvp = context_vec_start;
1566 	int lowa, upb, lowc, upd;
1567 	int a, b, c, d;
1568 	char ch, *f;
1569 
1570 	if (context_vec_start > context_vec_ptr)
1571 		return;
1572 
1573 	b = d = 0;		/* gcc */
1574 	lowa = MAX(1, cvp->a - diff_context);
1575 	upb = MIN(len[0], context_vec_ptr->b + diff_context);
1576 	lowc = MAX(1, cvp->c - diff_context);
1577 	upd = MIN(len[1], context_vec_ptr->d + diff_context);
1578 
1579 	diff_output("@@ -");
1580 	uni_range(lowa, upb);
1581 	diff_output(" +");
1582 	uni_range(lowc, upd);
1583 	diff_output(" @@");
1584 	if ((flags & D_PROTOTYPE)) {
1585 		f = match_function(ixold, lowa-1, f1);
1586 		if (f != NULL)
1587 			diff_output(" %s", f);
1588 	}
1589 	diff_output("\n");
1590 
1591 	/*
1592 	 * Output changes in "unified" diff format--the old and new lines
1593 	 * are printed together.
1594 	 */
1595 	for (; cvp <= context_vec_ptr; cvp++) {
1596 		a = cvp->a;
1597 		b = cvp->b;
1598 		c = cvp->c;
1599 		d = cvp->d;
1600 
1601 		/*
1602 		 * c: both new and old changes
1603 		 * d: only changes in the old file
1604 		 * a: only changes in the new file
1605 		 */
1606 		if (a <= b && c <= d)
1607 			ch = 'c';
1608 		else
1609 			ch = (a <= b) ? 'd' : 'a';
1610 
1611 		switch (ch) {
1612 		case 'c':
1613 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1614 			fetch(ixold, a, b, f1, '-', 0, flags);
1615 			fetch(ixnew, c, d, f2, '+', 0, flags);
1616 			break;
1617 		case 'd':
1618 			fetch(ixold, lowa, a - 1, f1, ' ', 0, flags);
1619 			fetch(ixold, a, b, f1, '-', 0, flags);
1620 			break;
1621 		case 'a':
1622 			fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags);
1623 			fetch(ixnew, c, d, f2, '+', 0, flags);
1624 			break;
1625 		}
1626 		lowa = b + 1;
1627 		lowc = d + 1;
1628 	}
1629 	fetch(ixnew, d + 1, upd, f2, ' ', 0, flags);
1630 
1631 	context_vec_ptr = context_vec_start - 1;
1632 }
1633 
1634 static void
1635 print_header(const char *file1, const char *file2)
1636 {
1637 	const char *time_format;
1638 	char buf1[256];
1639 	char buf2[256];
1640 	char end1[10];
1641 	char end2[10];
1642 	struct tm tm1, tm2, *tm_ptr1, *tm_ptr2;
1643 	int nsec1 = stb1.st_mtim.tv_nsec;
1644 	int nsec2 = stb2.st_mtim.tv_nsec;
1645 
1646 	time_format = "%Y-%m-%d %H:%M:%S";
1647 
1648 	if (cflag)
1649 		time_format = "%c";
1650 	tm_ptr1 = localtime_r(&stb1.st_mtime, &tm1);
1651 	tm_ptr2 = localtime_r(&stb2.st_mtime, &tm2);
1652 	strftime(buf1, 256, time_format, tm_ptr1);
1653 	strftime(buf2, 256, time_format, tm_ptr2);
1654 	if (!cflag) {
1655 		strftime(end1, 10, "%z", tm_ptr1);
1656 		strftime(end2, 10, "%z", tm_ptr2);
1657 		sprintf(buf1, "%s.%.9d %s", buf1, nsec1, end1);
1658 		sprintf(buf2, "%s.%.9d %s", buf2, nsec2, end2);
1659 	}
1660 	if (label[0] != NULL)
1661 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---",
1662 		    label[0]);
1663 	else
1664 		diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "***" : "---",
1665 		    file1, buf1);
1666 	if (label[1] != NULL)
1667 		diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++",
1668 		    label[1]);
1669 	else
1670 		diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "---" : "+++",
1671 		    file2, buf2);
1672 }
1673