1 /* $OpenBSD: diffreg.c,v 1.91 2016/03/01 20:57:35 natano Exp $ */ 2 3 /* 4 * Copyright (C) Caldera International Inc. 2001-2002. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code and documentation must retain the above 11 * copyright notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed or owned by Caldera 18 * International, Inc. 19 * 4. Neither the name of Caldera International, Inc. nor the names of other 20 * contributors may be used to endorse or promote products derived from 21 * this software without specific prior written permission. 22 * 23 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 24 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 25 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 26 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 27 * IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE LIABLE FOR ANY DIRECT, 28 * INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 29 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 32 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 33 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * POSSIBILITY OF SUCH DAMAGE. 35 */ 36 /*- 37 * Copyright (c) 1991, 1993 38 * The Regents of the University of California. All rights reserved. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)diffreg.c 8.1 (Berkeley) 6/6/93 65 */ 66 67 #include <sys/cdefs.h> 68 __FBSDID("$FreeBSD$"); 69 70 #include <sys/capsicum.h> 71 #include <sys/procdesc.h> 72 #include <sys/stat.h> 73 #include <sys/types.h> 74 #include <sys/event.h> 75 #include <sys/wait.h> 76 77 #include <capsicum_helpers.h> 78 #include <ctype.h> 79 #include <err.h> 80 #include <errno.h> 81 #include <fcntl.h> 82 #include <paths.h> 83 #include <regex.h> 84 #include <stddef.h> 85 #include <stdint.h> 86 #include <stdio.h> 87 #include <stdlib.h> 88 #include <string.h> 89 #include <unistd.h> 90 #include <limits.h> 91 #include <signal.h> 92 93 #include "diff.h" 94 #include "xmalloc.h" 95 96 #define _PATH_PR "/usr/bin/pr" 97 98 /* 99 * diff - compare two files. 100 */ 101 102 /* 103 * Uses an algorithm due to Harold Stone, which finds 104 * a pair of longest identical subsequences in the two 105 * files. 106 * 107 * The major goal is to generate the match vector J. 108 * J[i] is the index of the line in file1 corresponding 109 * to line i file0. J[i] = 0 if there is no 110 * such line in file1. 111 * 112 * Lines are hashed so as to work in core. All potential 113 * matches are located by sorting the lines of each file 114 * on the hash (called ``value''). In particular, this 115 * collects the equivalence classes in file1 together. 116 * Subroutine equiv replaces the value of each line in 117 * file0 by the index of the first element of its 118 * matching equivalence in (the reordered) file1. 119 * To save space equiv squeezes file1 into a single 120 * array member in which the equivalence classes 121 * are simply concatenated, except that their first 122 * members are flagged by changing sign. 123 * 124 * Next the indices that point into member are unsorted into 125 * array class according to the original order of file0. 126 * 127 * The cleverness lies in routine stone. This marches 128 * through the lines of file0, developing a vector klist 129 * of "k-candidates". At step i a k-candidate is a matched 130 * pair of lines x,y (x in file0 y in file1) such that 131 * there is a common subsequence of length k 132 * between the first i lines of file0 and the first y 133 * lines of file1, but there is no such subsequence for 134 * any smaller y. x is the earliest possible mate to y 135 * that occurs in such a subsequence. 136 * 137 * Whenever any of the members of the equivalence class of 138 * lines in file1 matable to a line in file0 has serial number 139 * less than the y of some k-candidate, that k-candidate 140 * with the smallest such y is replaced. The new 141 * k-candidate is chained (via pred) to the current 142 * k-1 candidate so that the actual subsequence can 143 * be recovered. When a member has serial number greater 144 * that the y of all k-candidates, the klist is extended. 145 * At the end, the longest subsequence is pulled out 146 * and placed in the array J by unravel 147 * 148 * With J in hand, the matches there recorded are 149 * check'ed against reality to assure that no spurious 150 * matches have crept in due to hashing. If they have, 151 * they are broken, and "jackpot" is recorded--a harmless 152 * matter except that a true match for a spuriously 153 * mated line may now be unnecessarily reported as a change. 154 * 155 * Much of the complexity of the program comes simply 156 * from trying to minimize core utilization and 157 * maximize the range of doable problems by dynamically 158 * allocating what is needed and reusing what is not. 159 * The core requirements for problems larger than somewhat 160 * are (in words) 2*length(file0) + length(file1) + 161 * 3*(number of k-candidates installed), typically about 162 * 6n words for files of length n. 163 */ 164 165 struct cand { 166 int x; 167 int y; 168 int pred; 169 }; 170 171 static struct line { 172 int serial; 173 int value; 174 } *file[2]; 175 176 /* 177 * The following struct is used to record change information when 178 * doing a "context" or "unified" diff. (see routine "change" to 179 * understand the highly mnemonic field names) 180 */ 181 struct context_vec { 182 int a; /* start line in old file */ 183 int b; /* end line in old file */ 184 int c; /* start line in new file */ 185 int d; /* end line in new file */ 186 }; 187 188 #define diff_output printf 189 static FILE *opentemp(const char *); 190 static void output(char *, FILE *, char *, FILE *, int); 191 static void check(FILE *, FILE *, int); 192 static void range(int, int, const char *); 193 static void uni_range(int, int); 194 static void dump_context_vec(FILE *, FILE *, int); 195 static void dump_unified_vec(FILE *, FILE *, int); 196 static void prepare(int, FILE *, size_t, int); 197 static void prune(void); 198 static void equiv(struct line *, int, struct line *, int, int *); 199 static void unravel(int); 200 static void unsort(struct line *, int, int *); 201 static void change(char *, FILE *, char *, FILE *, int, int, int, int, int *); 202 static void sort(struct line *, int); 203 static void print_header(const char *, const char *); 204 static int ignoreline(char *); 205 static int asciifile(FILE *); 206 static int fetch(long *, int, int, FILE *, int, int, int); 207 static int newcand(int, int, int); 208 static int search(int *, int, int); 209 static int skipline(FILE *); 210 static int isqrt(int); 211 static int stone(int *, int, int *, int *, int); 212 static int readhash(FILE *, int); 213 static int files_differ(FILE *, FILE *, int); 214 static char *match_function(const long *, int, FILE *); 215 static char *preadline(int, size_t, off_t); 216 217 static int *J; /* will be overlaid on class */ 218 static int *class; /* will be overlaid on file[0] */ 219 static int *klist; /* will be overlaid on file[0] after class */ 220 static int *member; /* will be overlaid on file[1] */ 221 static int clen; 222 static int inifdef; /* whether or not we are in a #ifdef block */ 223 static int len[2]; 224 static int pref, suff; /* length of prefix and suffix */ 225 static int slen[2]; 226 static int anychange; 227 static long *ixnew; /* will be overlaid on file[1] */ 228 static long *ixold; /* will be overlaid on klist */ 229 static struct cand *clist; /* merely a free storage pot for candidates */ 230 static int clistlen; /* the length of clist */ 231 static struct line *sfile[2]; /* shortened by pruning common prefix/suffix */ 232 static u_char *chrtran; /* translation table for case-folding */ 233 static struct context_vec *context_vec_start; 234 static struct context_vec *context_vec_end; 235 static struct context_vec *context_vec_ptr; 236 237 #define FUNCTION_CONTEXT_SIZE 55 238 static char lastbuf[FUNCTION_CONTEXT_SIZE]; 239 static int lastline; 240 static int lastmatchline; 241 242 243 /* 244 * chrtran points to one of 2 translation tables: cup2low if folding upper to 245 * lower case clow2low if not folding case 246 */ 247 static u_char clow2low[256] = { 248 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 249 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 250 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 251 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 252 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 253 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 254 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4a, 0x4b, 0x4c, 255 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 256 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 257 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 258 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 259 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 260 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 261 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 262 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 263 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 264 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 265 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 266 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 267 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 268 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 269 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 270 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 271 0xfd, 0xfe, 0xff 272 }; 273 274 static u_char cup2low[256] = { 275 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 276 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 277 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 278 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 279 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 280 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x60, 0x61, 281 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 282 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 283 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x60, 0x61, 0x62, 284 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 285 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 286 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 287 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 288 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 289 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 290 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 291 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 292 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 293 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 294 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 295 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 296 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 297 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 298 0xfd, 0xfe, 0xff 299 }; 300 301 int 302 diffreg(char *file1, char *file2, int flags, int capsicum) 303 { 304 FILE *f1, *f2; 305 int i, rval; 306 int ostdout = -1; 307 int pr_pd, kq; 308 struct kevent *e; 309 cap_rights_t rights_ro; 310 311 e = NULL; 312 kq = -1; 313 f1 = f2 = NULL; 314 rval = D_SAME; 315 anychange = 0; 316 lastline = 0; 317 lastmatchline = 0; 318 context_vec_ptr = context_vec_start - 1; 319 if (flags & D_IGNORECASE) 320 chrtran = cup2low; 321 else 322 chrtran = clow2low; 323 if (S_ISDIR(stb1.st_mode) != S_ISDIR(stb2.st_mode)) 324 return (S_ISDIR(stb1.st_mode) ? D_MISMATCH1 : D_MISMATCH2); 325 if (strcmp(file1, "-") == 0 && strcmp(file2, "-") == 0) 326 goto closem; 327 328 if (flags & D_EMPTY1) 329 f1 = fopen(_PATH_DEVNULL, "r"); 330 else { 331 if (!S_ISREG(stb1.st_mode)) { 332 if ((f1 = opentemp(file1)) == NULL || 333 fstat(fileno(f1), &stb1) < 0) { 334 warn("%s", file1); 335 status |= 2; 336 goto closem; 337 } 338 } else if (strcmp(file1, "-") == 0) 339 f1 = stdin; 340 else 341 f1 = fopen(file1, "r"); 342 } 343 if (f1 == NULL) { 344 warn("%s", file1); 345 status |= 2; 346 goto closem; 347 } 348 349 if (flags & D_EMPTY2) 350 f2 = fopen(_PATH_DEVNULL, "r"); 351 else { 352 if (!S_ISREG(stb2.st_mode)) { 353 if ((f2 = opentemp(file2)) == NULL || 354 fstat(fileno(f2), &stb2) < 0) { 355 warn("%s", file2); 356 status |= 2; 357 goto closem; 358 } 359 } else if (strcmp(file2, "-") == 0) 360 f2 = stdin; 361 else 362 f2 = fopen(file2, "r"); 363 } 364 if (f2 == NULL) { 365 warn("%s", file2); 366 status |= 2; 367 goto closem; 368 } 369 370 if (lflag) { 371 /* redirect stdout to pr */ 372 int pfd[2]; 373 pid_t pid; 374 char *header; 375 376 xasprintf(&header, "%s %s %s", diffargs, file1, file2); 377 signal(SIGPIPE, SIG_IGN); 378 fflush(stdout); 379 rewind(stdout); 380 pipe(pfd); 381 switch ((pid = pdfork(&pr_pd, PD_CLOEXEC))) { 382 case -1: 383 status |= 2; 384 free(header); 385 err(2, "No more processes"); 386 case 0: 387 /* child */ 388 if (pfd[0] != STDIN_FILENO) { 389 dup2(pfd[0], STDIN_FILENO); 390 close(pfd[0]); 391 } 392 close(pfd[1]); 393 execl(_PATH_PR, _PATH_PR, "-h", header, (char *)0); 394 _exit(127); 395 default: 396 397 /* parent */ 398 if (pfd[1] != STDOUT_FILENO) { 399 ostdout = dup(STDOUT_FILENO); 400 dup2(pfd[1], STDOUT_FILENO); 401 close(pfd[1]); 402 } 403 close(pfd[0]); 404 rewind(stdout); 405 free(header); 406 kq = kqueue(); 407 if (kq == -1) 408 err(2, "kqueue"); 409 e = xmalloc(sizeof(struct kevent)); 410 EV_SET(e, pr_pd, EVFILT_PROCDESC, EV_ADD, NOTE_EXIT, 0, 411 NULL); 412 if (kevent(kq, e, 1, NULL, 0, NULL) == -1) 413 err(2, "kevent"); 414 } 415 } 416 417 if (capsicum) { 418 cap_rights_init(&rights_ro, CAP_READ, CAP_FSTAT, CAP_SEEK); 419 if (cap_rights_limit(fileno(f1), &rights_ro) < 0 420 && errno != ENOSYS) 421 err(2, "unable to limit rights on: %s", file1); 422 if (cap_rights_limit(fileno(f2), &rights_ro) < 0 && 423 errno != ENOSYS) 424 err(2, "unable to limit rights on: %s", file2); 425 if (fileno(f1) == STDIN_FILENO || fileno(f2) == STDIN_FILENO) { 426 /* stding has already been limited */ 427 if (caph_limit_stderr() == -1) 428 err(2, "unable to limit stderr"); 429 if (caph_limit_stdout() == -1) 430 err(2, "unable to limit stdout"); 431 } else if (caph_limit_stdio() == -1) 432 err(2, "unable to limit stdio"); 433 434 caph_cache_catpages(); 435 caph_cache_tzdata(); 436 if (cap_enter() < 0 && errno != ENOSYS) 437 err(2, "unable to enter capability mode"); 438 } 439 440 switch (files_differ(f1, f2, flags)) { 441 case 0: 442 goto closem; 443 case 1: 444 break; 445 default: 446 /* error */ 447 status |= 2; 448 goto closem; 449 } 450 451 if ((flags & D_FORCEASCII) == 0 && 452 (!asciifile(f1) || !asciifile(f2))) { 453 rval = D_BINARY; 454 status |= 1; 455 goto closem; 456 } 457 prepare(0, f1, stb1.st_size, flags); 458 prepare(1, f2, stb2.st_size, flags); 459 460 prune(); 461 sort(sfile[0], slen[0]); 462 sort(sfile[1], slen[1]); 463 464 member = (int *)file[1]; 465 equiv(sfile[0], slen[0], sfile[1], slen[1], member); 466 member = xreallocarray(member, slen[1] + 2, sizeof(*member)); 467 468 class = (int *)file[0]; 469 unsort(sfile[0], slen[0], class); 470 class = xreallocarray(class, slen[0] + 2, sizeof(*class)); 471 472 klist = xcalloc(slen[0] + 2, sizeof(*klist)); 473 clen = 0; 474 clistlen = 100; 475 clist = xcalloc(clistlen, sizeof(*clist)); 476 i = stone(class, slen[0], member, klist, flags); 477 free(member); 478 free(class); 479 480 J = xreallocarray(J, len[0] + 2, sizeof(*J)); 481 unravel(klist[i]); 482 free(clist); 483 free(klist); 484 485 ixold = xreallocarray(ixold, len[0] + 2, sizeof(*ixold)); 486 ixnew = xreallocarray(ixnew, len[1] + 2, sizeof(*ixnew)); 487 check(f1, f2, flags); 488 output(file1, f1, file2, f2, flags); 489 if (ostdout != -1 && e != NULL) { 490 /* close the pipe to pr and restore stdout */ 491 int wstatus; 492 493 fflush(stdout); 494 if (ostdout != STDOUT_FILENO) { 495 close(STDOUT_FILENO); 496 dup2(ostdout, STDOUT_FILENO); 497 close(ostdout); 498 } 499 if (kevent(kq, NULL, 0, e, 1, NULL) == -1) 500 err(2, "kevent"); 501 wstatus = e[0].data; 502 close(kq); 503 if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0) 504 errx(2, "pr exited abnormally"); 505 else if (WIFSIGNALED(wstatus)) 506 errx(2, "pr killed by signal %d", 507 WTERMSIG(wstatus)); 508 } 509 510 closem: 511 if (anychange) { 512 status |= 1; 513 if (rval == D_SAME) 514 rval = D_DIFFER; 515 } 516 if (f1 != NULL) 517 fclose(f1); 518 if (f2 != NULL) 519 fclose(f2); 520 521 return (rval); 522 } 523 524 /* 525 * Check to see if the given files differ. 526 * Returns 0 if they are the same, 1 if different, and -1 on error. 527 * XXX - could use code from cmp(1) [faster] 528 */ 529 static int 530 files_differ(FILE *f1, FILE *f2, int flags) 531 { 532 char buf1[BUFSIZ], buf2[BUFSIZ]; 533 size_t i, j; 534 535 if ((flags & (D_EMPTY1|D_EMPTY2)) || stb1.st_size != stb2.st_size || 536 (stb1.st_mode & S_IFMT) != (stb2.st_mode & S_IFMT)) 537 return (1); 538 for (;;) { 539 i = fread(buf1, 1, sizeof(buf1), f1); 540 j = fread(buf2, 1, sizeof(buf2), f2); 541 if ((!i && ferror(f1)) || (!j && ferror(f2))) 542 return (-1); 543 if (i != j) 544 return (1); 545 if (i == 0) 546 return (0); 547 if (memcmp(buf1, buf2, i) != 0) 548 return (1); 549 } 550 } 551 552 static FILE * 553 opentemp(const char *f) 554 { 555 char buf[BUFSIZ], tempfile[PATH_MAX]; 556 ssize_t nread; 557 int ifd, ofd; 558 559 if (strcmp(f, "-") == 0) 560 ifd = STDIN_FILENO; 561 else if ((ifd = open(f, O_RDONLY, 0644)) < 0) 562 return (NULL); 563 564 (void)strlcpy(tempfile, _PATH_TMP "/diff.XXXXXXXX", sizeof(tempfile)); 565 566 if ((ofd = mkstemp(tempfile)) < 0) { 567 close(ifd); 568 return (NULL); 569 } 570 unlink(tempfile); 571 while ((nread = read(ifd, buf, BUFSIZ)) > 0) { 572 if (write(ofd, buf, nread) != nread) { 573 close(ifd); 574 close(ofd); 575 return (NULL); 576 } 577 } 578 close(ifd); 579 lseek(ofd, (off_t)0, SEEK_SET); 580 return (fdopen(ofd, "r")); 581 } 582 583 char * 584 splice(char *dir, char *path) 585 { 586 char *tail, *buf; 587 size_t dirlen; 588 589 dirlen = strlen(dir); 590 while (dirlen != 0 && dir[dirlen - 1] == '/') 591 dirlen--; 592 if ((tail = strrchr(path, '/')) == NULL) 593 tail = path; 594 else 595 tail++; 596 xasprintf(&buf, "%.*s/%s", (int)dirlen, dir, tail); 597 return (buf); 598 } 599 600 static void 601 prepare(int i, FILE *fd, size_t filesize, int flags) 602 { 603 struct line *p; 604 int h; 605 size_t sz, j; 606 607 rewind(fd); 608 609 sz = MIN(filesize, SIZE_MAX) / 25; 610 if (sz < 100) 611 sz = 100; 612 613 p = xcalloc(sz + 3, sizeof(*p)); 614 for (j = 0; (h = readhash(fd, flags));) { 615 if (j == sz) { 616 sz = sz * 3 / 2; 617 p = xreallocarray(p, sz + 3, sizeof(*p)); 618 } 619 p[++j].value = h; 620 } 621 len[i] = j; 622 file[i] = p; 623 } 624 625 static void 626 prune(void) 627 { 628 int i, j; 629 630 for (pref = 0; pref < len[0] && pref < len[1] && 631 file[0][pref + 1].value == file[1][pref + 1].value; 632 pref++) 633 ; 634 for (suff = 0; suff < len[0] - pref && suff < len[1] - pref && 635 file[0][len[0] - suff].value == file[1][len[1] - suff].value; 636 suff++) 637 ; 638 for (j = 0; j < 2; j++) { 639 sfile[j] = file[j] + pref; 640 slen[j] = len[j] - pref - suff; 641 for (i = 0; i <= slen[j]; i++) 642 sfile[j][i].serial = i; 643 } 644 } 645 646 static void 647 equiv(struct line *a, int n, struct line *b, int m, int *c) 648 { 649 int i, j; 650 651 i = j = 1; 652 while (i <= n && j <= m) { 653 if (a[i].value < b[j].value) 654 a[i++].value = 0; 655 else if (a[i].value == b[j].value) 656 a[i++].value = j; 657 else 658 j++; 659 } 660 while (i <= n) 661 a[i++].value = 0; 662 b[m + 1].value = 0; 663 j = 0; 664 while (++j <= m) { 665 c[j] = -b[j].serial; 666 while (b[j + 1].value == b[j].value) { 667 j++; 668 c[j] = b[j].serial; 669 } 670 } 671 c[j] = -1; 672 } 673 674 /* Code taken from ping.c */ 675 static int 676 isqrt(int n) 677 { 678 int y, x = 1; 679 680 if (n == 0) 681 return (0); 682 683 do { /* newton was a stinker */ 684 y = x; 685 x = n / x; 686 x += y; 687 x /= 2; 688 } while ((x - y) > 1 || (x - y) < -1); 689 690 return (x); 691 } 692 693 static int 694 stone(int *a, int n, int *b, int *c, int flags) 695 { 696 int i, k, y, j, l; 697 int oldc, tc, oldl, sq; 698 u_int numtries, bound; 699 700 if (flags & D_MINIMAL) 701 bound = UINT_MAX; 702 else { 703 sq = isqrt(n); 704 bound = MAX(256, sq); 705 } 706 707 k = 0; 708 c[0] = newcand(0, 0, 0); 709 for (i = 1; i <= n; i++) { 710 j = a[i]; 711 if (j == 0) 712 continue; 713 y = -b[j]; 714 oldl = 0; 715 oldc = c[0]; 716 numtries = 0; 717 do { 718 if (y <= clist[oldc].y) 719 continue; 720 l = search(c, k, y); 721 if (l != oldl + 1) 722 oldc = c[l - 1]; 723 if (l <= k) { 724 if (clist[c[l]].y <= y) 725 continue; 726 tc = c[l]; 727 c[l] = newcand(i, y, oldc); 728 oldc = tc; 729 oldl = l; 730 numtries++; 731 } else { 732 c[l] = newcand(i, y, oldc); 733 k++; 734 break; 735 } 736 } while ((y = b[++j]) > 0 && numtries < bound); 737 } 738 return (k); 739 } 740 741 static int 742 newcand(int x, int y, int pred) 743 { 744 struct cand *q; 745 746 if (clen == clistlen) { 747 clistlen = clistlen * 11 / 10; 748 clist = xreallocarray(clist, clistlen, sizeof(*clist)); 749 } 750 q = clist + clen; 751 q->x = x; 752 q->y = y; 753 q->pred = pred; 754 return (clen++); 755 } 756 757 static int 758 search(int *c, int k, int y) 759 { 760 int i, j, l, t; 761 762 if (clist[c[k]].y < y) /* quick look for typical case */ 763 return (k + 1); 764 i = 0; 765 j = k + 1; 766 for (;;) { 767 l = (i + j) / 2; 768 if (l <= i) 769 break; 770 t = clist[c[l]].y; 771 if (t > y) 772 j = l; 773 else if (t < y) 774 i = l; 775 else 776 return (l); 777 } 778 return (l + 1); 779 } 780 781 static void 782 unravel(int p) 783 { 784 struct cand *q; 785 int i; 786 787 for (i = 0; i <= len[0]; i++) 788 J[i] = i <= pref ? i : 789 i > len[0] - suff ? i + len[1] - len[0] : 0; 790 for (q = clist + p; q->y != 0; q = clist + q->pred) 791 J[q->x + pref] = q->y + pref; 792 } 793 794 /* 795 * Check does double duty: 796 * 1. ferret out any fortuitous correspondences due 797 * to confounding by hashing (which result in "jackpot") 798 * 2. collect random access indexes to the two files 799 */ 800 static void 801 check(FILE *f1, FILE *f2, int flags) 802 { 803 int i, j, jackpot, c, d; 804 long ctold, ctnew; 805 806 rewind(f1); 807 rewind(f2); 808 j = 1; 809 ixold[0] = ixnew[0] = 0; 810 jackpot = 0; 811 ctold = ctnew = 0; 812 for (i = 1; i <= len[0]; i++) { 813 if (J[i] == 0) { 814 ixold[i] = ctold += skipline(f1); 815 continue; 816 } 817 while (j < J[i]) { 818 ixnew[j] = ctnew += skipline(f2); 819 j++; 820 } 821 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS|D_IGNORECASE|D_STRIPCR)) { 822 for (;;) { 823 c = getc(f1); 824 d = getc(f2); 825 /* 826 * GNU diff ignores a missing newline 827 * in one file for -b or -w. 828 */ 829 if (flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) { 830 if (c == EOF && d == '\n') { 831 ctnew++; 832 break; 833 } else if (c == '\n' && d == EOF) { 834 ctold++; 835 break; 836 } 837 } 838 ctold++; 839 ctnew++; 840 if (flags & D_STRIPCR) { 841 if (c == '\r') { 842 if ((c = getc(f1)) == '\n') { 843 ctnew++; 844 break; 845 } 846 } 847 if (d == '\r') { 848 if ((d = getc(f2)) == '\n') { 849 ctold++; 850 break; 851 } 852 } 853 } 854 if ((flags & D_FOLDBLANKS) && isspace(c) && 855 isspace(d)) { 856 do { 857 if (c == '\n') 858 break; 859 ctold++; 860 } while (isspace(c = getc(f1))); 861 do { 862 if (d == '\n') 863 break; 864 ctnew++; 865 } while (isspace(d = getc(f2))); 866 } else if ((flags & D_IGNOREBLANKS)) { 867 while (isspace(c) && c != '\n') { 868 c = getc(f1); 869 ctold++; 870 } 871 while (isspace(d) && d != '\n') { 872 d = getc(f2); 873 ctnew++; 874 } 875 } 876 if (chrtran[c] != chrtran[d]) { 877 jackpot++; 878 J[i] = 0; 879 if (c != '\n' && c != EOF) 880 ctold += skipline(f1); 881 if (d != '\n' && c != EOF) 882 ctnew += skipline(f2); 883 break; 884 } 885 if (c == '\n' || c == EOF) 886 break; 887 } 888 } else { 889 for (;;) { 890 ctold++; 891 ctnew++; 892 if ((c = getc(f1)) != (d = getc(f2))) { 893 /* jackpot++; */ 894 J[i] = 0; 895 if (c != '\n' && c != EOF) 896 ctold += skipline(f1); 897 if (d != '\n' && c != EOF) 898 ctnew += skipline(f2); 899 break; 900 } 901 if (c == '\n' || c == EOF) 902 break; 903 } 904 } 905 ixold[i] = ctold; 906 ixnew[j] = ctnew; 907 j++; 908 } 909 for (; j <= len[1]; j++) { 910 ixnew[j] = ctnew += skipline(f2); 911 } 912 /* 913 * if (jackpot) 914 * fprintf(stderr, "jackpot\n"); 915 */ 916 } 917 918 /* shellsort CACM #201 */ 919 static void 920 sort(struct line *a, int n) 921 { 922 struct line *ai, *aim, w; 923 int j, m = 0, k; 924 925 if (n == 0) 926 return; 927 for (j = 1; j <= n; j *= 2) 928 m = 2 * j - 1; 929 for (m /= 2; m != 0; m /= 2) { 930 k = n - m; 931 for (j = 1; j <= k; j++) { 932 for (ai = &a[j]; ai > a; ai -= m) { 933 aim = &ai[m]; 934 if (aim < ai) 935 break; /* wraparound */ 936 if (aim->value > ai[0].value || 937 (aim->value == ai[0].value && 938 aim->serial > ai[0].serial)) 939 break; 940 w.value = ai[0].value; 941 ai[0].value = aim->value; 942 aim->value = w.value; 943 w.serial = ai[0].serial; 944 ai[0].serial = aim->serial; 945 aim->serial = w.serial; 946 } 947 } 948 } 949 } 950 951 static void 952 unsort(struct line *f, int l, int *b) 953 { 954 int *a, i; 955 956 a = xcalloc(l + 1, sizeof(*a)); 957 for (i = 1; i <= l; i++) 958 a[f[i].serial] = f[i].value; 959 for (i = 1; i <= l; i++) 960 b[i] = a[i]; 961 free(a); 962 } 963 964 static int 965 skipline(FILE *f) 966 { 967 int i, c; 968 969 for (i = 1; (c = getc(f)) != '\n' && c != EOF; i++) 970 continue; 971 return (i); 972 } 973 974 static void 975 output(char *file1, FILE *f1, char *file2, FILE *f2, int flags) 976 { 977 int m, i0, i1, j0, j1; 978 979 rewind(f1); 980 rewind(f2); 981 m = len[0]; 982 J[0] = 0; 983 J[m + 1] = len[1] + 1; 984 if (diff_format != D_EDIT) { 985 for (i0 = 1; i0 <= m; i0 = i1 + 1) { 986 while (i0 <= m && J[i0] == J[i0 - 1] + 1) 987 i0++; 988 j0 = J[i0 - 1] + 1; 989 i1 = i0 - 1; 990 while (i1 < m && J[i1 + 1] == 0) 991 i1++; 992 j1 = J[i1 + 1] - 1; 993 J[i1] = j1; 994 change(file1, f1, file2, f2, i0, i1, j0, j1, &flags); 995 } 996 } else { 997 for (i0 = m; i0 >= 1; i0 = i1 - 1) { 998 while (i0 >= 1 && J[i0] == J[i0 + 1] - 1 && J[i0] != 0) 999 i0--; 1000 j0 = J[i0 + 1] - 1; 1001 i1 = i0 + 1; 1002 while (i1 > 1 && J[i1 - 1] == 0) 1003 i1--; 1004 j1 = J[i1 - 1] + 1; 1005 J[i1] = j1; 1006 change(file1, f1, file2, f2, i1, i0, j1, j0, &flags); 1007 } 1008 } 1009 if (m == 0) 1010 change(file1, f1, file2, f2, 1, 0, 1, len[1], &flags); 1011 if (diff_format == D_IFDEF || diff_format == D_GFORMAT) { 1012 for (;;) { 1013 #define c i0 1014 if ((c = getc(f1)) == EOF) 1015 return; 1016 diff_output("%c", c); 1017 } 1018 #undef c 1019 } 1020 if (anychange != 0) { 1021 if (diff_format == D_CONTEXT) 1022 dump_context_vec(f1, f2, flags); 1023 else if (diff_format == D_UNIFIED) 1024 dump_unified_vec(f1, f2, flags); 1025 } 1026 } 1027 1028 static void 1029 range(int a, int b, const char *separator) 1030 { 1031 diff_output("%d", a > b ? b : a); 1032 if (a < b) 1033 diff_output("%s%d", separator, b); 1034 } 1035 1036 static void 1037 uni_range(int a, int b) 1038 { 1039 if (a < b) 1040 diff_output("%d,%d", a, b - a + 1); 1041 else if (a == b) 1042 diff_output("%d", b); 1043 else 1044 diff_output("%d,0", b); 1045 } 1046 1047 static char * 1048 preadline(int fd, size_t rlen, off_t off) 1049 { 1050 char *line; 1051 ssize_t nr; 1052 1053 line = xmalloc(rlen + 1); 1054 if ((nr = pread(fd, line, rlen, off)) < 0) 1055 err(2, "preadline"); 1056 if (nr > 0 && line[nr-1] == '\n') 1057 nr--; 1058 line[nr] = '\0'; 1059 return (line); 1060 } 1061 1062 static int 1063 ignoreline(char *line) 1064 { 1065 int ret; 1066 1067 ret = regexec(&ignore_re, line, 0, NULL, 0); 1068 free(line); 1069 return (ret == 0); /* if it matched, it should be ignored. */ 1070 } 1071 1072 /* 1073 * Indicate that there is a difference between lines a and b of the from file 1074 * to get to lines c to d of the to file. If a is greater then b then there 1075 * are no lines in the from file involved and this means that there were 1076 * lines appended (beginning at b). If c is greater than d then there are 1077 * lines missing from the to file. 1078 */ 1079 static void 1080 change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d, 1081 int *pflags) 1082 { 1083 static size_t max_context = 64; 1084 long curpos; 1085 int i, nc; 1086 const char *walk; 1087 1088 restart: 1089 if ((diff_format != D_IFDEF || diff_format == D_GFORMAT) && 1090 a > b && c > d) 1091 return; 1092 if (ignore_pats != NULL) { 1093 char *line; 1094 /* 1095 * All lines in the change, insert, or delete must 1096 * match an ignore pattern for the change to be 1097 * ignored. 1098 */ 1099 if (a <= b) { /* Changes and deletes. */ 1100 for (i = a; i <= b; i++) { 1101 line = preadline(fileno(f1), 1102 ixold[i] - ixold[i - 1], ixold[i - 1]); 1103 if (!ignoreline(line)) 1104 goto proceed; 1105 } 1106 } 1107 if (a > b || c <= d) { /* Changes and inserts. */ 1108 for (i = c; i <= d; i++) { 1109 line = preadline(fileno(f2), 1110 ixnew[i] - ixnew[i - 1], ixnew[i - 1]); 1111 if (!ignoreline(line)) 1112 goto proceed; 1113 } 1114 } 1115 return; 1116 } 1117 proceed: 1118 if (*pflags & D_HEADER) { 1119 diff_output("%s %s %s\n", diffargs, file1, file2); 1120 *pflags &= ~D_HEADER; 1121 } 1122 if (diff_format == D_CONTEXT || diff_format == D_UNIFIED) { 1123 /* 1124 * Allocate change records as needed. 1125 */ 1126 if (context_vec_ptr == context_vec_end - 1) { 1127 ptrdiff_t offset = context_vec_ptr - context_vec_start; 1128 max_context <<= 1; 1129 context_vec_start = xreallocarray(context_vec_start, 1130 max_context, sizeof(*context_vec_start)); 1131 context_vec_end = context_vec_start + max_context; 1132 context_vec_ptr = context_vec_start + offset; 1133 } 1134 if (anychange == 0) { 1135 /* 1136 * Print the context/unidiff header first time through. 1137 */ 1138 print_header(file1, file2); 1139 anychange = 1; 1140 } else if (a > context_vec_ptr->b + (2 * diff_context) + 1 && 1141 c > context_vec_ptr->d + (2 * diff_context) + 1) { 1142 /* 1143 * If this change is more than 'diff_context' lines from the 1144 * previous change, dump the record and reset it. 1145 */ 1146 if (diff_format == D_CONTEXT) 1147 dump_context_vec(f1, f2, *pflags); 1148 else 1149 dump_unified_vec(f1, f2, *pflags); 1150 } 1151 context_vec_ptr++; 1152 context_vec_ptr->a = a; 1153 context_vec_ptr->b = b; 1154 context_vec_ptr->c = c; 1155 context_vec_ptr->d = d; 1156 return; 1157 } 1158 if (anychange == 0) 1159 anychange = 1; 1160 switch (diff_format) { 1161 case D_BRIEF: 1162 return; 1163 case D_NORMAL: 1164 case D_EDIT: 1165 range(a, b, ","); 1166 diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c'); 1167 if (diff_format == D_NORMAL) 1168 range(c, d, ","); 1169 diff_output("\n"); 1170 break; 1171 case D_REVERSE: 1172 diff_output("%c", a > b ? 'a' : c > d ? 'd' : 'c'); 1173 range(a, b, " "); 1174 diff_output("\n"); 1175 break; 1176 case D_NREVERSE: 1177 if (a > b) 1178 diff_output("a%d %d\n", b, d - c + 1); 1179 else { 1180 diff_output("d%d %d\n", a, b - a + 1); 1181 if (!(c > d)) 1182 /* add changed lines */ 1183 diff_output("a%d %d\n", b, d - c + 1); 1184 } 1185 break; 1186 } 1187 if (diff_format == D_GFORMAT) { 1188 curpos = ftell(f1); 1189 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 1190 nc = ixold[a > b ? b : a - 1] - curpos; 1191 for (i = 0; i < nc; i++) 1192 diff_output("%c", getc(f1)); 1193 for (walk = group_format; *walk != '\0'; walk++) { 1194 if (*walk == '%') { 1195 walk++; 1196 switch (*walk) { 1197 case '<': 1198 fetch(ixold, a, b, f1, '<', 1, *pflags); 1199 break; 1200 case '>': 1201 fetch(ixnew, c, d, f2, '>', 0, *pflags); 1202 break; 1203 default: 1204 diff_output("%%%c", *walk); 1205 break; 1206 } 1207 continue; 1208 } 1209 diff_output("%c", *walk); 1210 } 1211 } 1212 if (diff_format == D_NORMAL || diff_format == D_IFDEF) { 1213 fetch(ixold, a, b, f1, '<', 1, *pflags); 1214 if (a <= b && c <= d && diff_format == D_NORMAL) 1215 diff_output("---\n"); 1216 } 1217 if (diff_format != D_GFORMAT) 1218 i = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags); 1219 if (i != 0 && diff_format == D_EDIT) { 1220 /* 1221 * A non-zero return value for D_EDIT indicates that the 1222 * last line printed was a bare dot (".") that has been 1223 * escaped as ".." to prevent ed(1) from misinterpreting 1224 * it. We have to add a substitute command to change this 1225 * back and restart where we left off. 1226 */ 1227 diff_output(".\n"); 1228 diff_output("%ds/.//\n", a + i - 1); 1229 b = a + i - 1; 1230 a = b + 1; 1231 c += i; 1232 goto restart; 1233 } 1234 if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d) 1235 diff_output(".\n"); 1236 if (inifdef) { 1237 diff_output("#endif /* %s */\n", ifdefname); 1238 inifdef = 0; 1239 } 1240 } 1241 1242 static int 1243 fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags) 1244 { 1245 int i, j, c, lastc, col, nc; 1246 int newcol; 1247 1248 /* 1249 * When doing #ifdef's, copy down to current line 1250 * if this is the first file, so that stuff makes it to output. 1251 */ 1252 if ((diff_format == D_IFDEF) && oldfile) { 1253 long curpos = ftell(lb); 1254 /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ 1255 nc = f[a > b ? b : a - 1] - curpos; 1256 for (i = 0; i < nc; i++) 1257 diff_output("%c", getc(lb)); 1258 } 1259 if (a > b) 1260 return (0); 1261 if (diff_format == D_IFDEF) { 1262 if (inifdef) { 1263 diff_output("#else /* %s%s */\n", 1264 oldfile == 1 ? "!" : "", ifdefname); 1265 } else { 1266 if (oldfile) 1267 diff_output("#ifndef %s\n", ifdefname); 1268 else 1269 diff_output("#ifdef %s\n", ifdefname); 1270 } 1271 inifdef = 1 + oldfile; 1272 } 1273 for (i = a; i <= b; i++) { 1274 fseek(lb, f[i - 1], SEEK_SET); 1275 nc = f[i] - f[i - 1]; 1276 if ((diff_format != D_IFDEF && diff_format != D_GFORMAT) && 1277 ch != '\0') { 1278 diff_output("%c", ch); 1279 if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT 1280 || diff_format == D_UNIFIED)) 1281 diff_output("\t"); 1282 else if (diff_format != D_UNIFIED) 1283 diff_output(" "); 1284 } 1285 col = 0; 1286 for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) { 1287 if ((c = getc(lb)) == EOF) { 1288 if (diff_format == D_EDIT || diff_format == D_REVERSE || 1289 diff_format == D_NREVERSE) 1290 warnx("No newline at end of file"); 1291 else 1292 diff_output("\n\\ No newline at end of " 1293 "file\n"); 1294 return (0); 1295 } 1296 if (c == '\t' && (flags & D_EXPANDTABS)) { 1297 newcol = ((col/tabsize)+1)*tabsize; 1298 do { 1299 diff_output(" "); 1300 } while (++col < newcol); 1301 } else { 1302 if (diff_format == D_EDIT && j == 1 && c == '\n' 1303 && lastc == '.') { 1304 /* 1305 * Don't print a bare "." line 1306 * since that will confuse ed(1). 1307 * Print ".." instead and return, 1308 * giving the caller an offset 1309 * from which to restart. 1310 */ 1311 diff_output(".\n"); 1312 return (i - a + 1); 1313 } 1314 diff_output("%c", c); 1315 col++; 1316 } 1317 } 1318 } 1319 return (0); 1320 } 1321 1322 /* 1323 * Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578. 1324 */ 1325 static int 1326 readhash(FILE *f, int flags) 1327 { 1328 int i, t, space; 1329 int sum; 1330 1331 sum = 1; 1332 space = 0; 1333 if ((flags & (D_FOLDBLANKS|D_IGNOREBLANKS)) == 0) { 1334 if (flags & D_IGNORECASE) 1335 for (i = 0; (t = getc(f)) != '\n'; i++) { 1336 if (flags & D_STRIPCR && t == '\r') { 1337 t = getc(f); 1338 if (t == '\n') 1339 break; 1340 ungetc(t, f); 1341 } 1342 if (t == EOF) { 1343 if (i == 0) 1344 return (0); 1345 break; 1346 } 1347 sum = sum * 127 + chrtran[t]; 1348 } 1349 else 1350 for (i = 0; (t = getc(f)) != '\n'; i++) { 1351 if (flags & D_STRIPCR && t == '\r') { 1352 t = getc(f); 1353 if (t == '\n') 1354 break; 1355 ungetc(t, f); 1356 } 1357 if (t == EOF) { 1358 if (i == 0) 1359 return (0); 1360 break; 1361 } 1362 sum = sum * 127 + t; 1363 } 1364 } else { 1365 for (i = 0;;) { 1366 switch (t = getc(f)) { 1367 case '\r': 1368 case '\t': 1369 case '\v': 1370 case '\f': 1371 case ' ': 1372 space++; 1373 continue; 1374 default: 1375 if (space && (flags & D_IGNOREBLANKS) == 0) { 1376 i++; 1377 space = 0; 1378 } 1379 sum = sum * 127 + chrtran[t]; 1380 i++; 1381 continue; 1382 case EOF: 1383 if (i == 0) 1384 return (0); 1385 /* FALLTHROUGH */ 1386 case '\n': 1387 break; 1388 } 1389 break; 1390 } 1391 } 1392 /* 1393 * There is a remote possibility that we end up with a zero sum. 1394 * Zero is used as an EOF marker, so return 1 instead. 1395 */ 1396 return (sum == 0 ? 1 : sum); 1397 } 1398 1399 static int 1400 asciifile(FILE *f) 1401 { 1402 unsigned char buf[BUFSIZ]; 1403 size_t cnt; 1404 1405 if (f == NULL) 1406 return (1); 1407 1408 rewind(f); 1409 cnt = fread(buf, 1, sizeof(buf), f); 1410 return (memchr(buf, '\0', cnt) == NULL); 1411 } 1412 1413 #define begins_with(s, pre) (strncmp(s, pre, sizeof(pre)-1) == 0) 1414 1415 static char * 1416 match_function(const long *f, int pos, FILE *fp) 1417 { 1418 unsigned char buf[FUNCTION_CONTEXT_SIZE]; 1419 size_t nc; 1420 int last = lastline; 1421 const char *state = NULL; 1422 1423 lastline = pos; 1424 while (pos > last) { 1425 fseek(fp, f[pos - 1], SEEK_SET); 1426 nc = f[pos] - f[pos - 1]; 1427 if (nc >= sizeof(buf)) 1428 nc = sizeof(buf) - 1; 1429 nc = fread(buf, 1, nc, fp); 1430 if (nc > 0) { 1431 buf[nc] = '\0'; 1432 buf[strcspn(buf, "\n")] = '\0'; 1433 if (isalpha(buf[0]) || buf[0] == '_' || buf[0] == '$') { 1434 if (begins_with(buf, "private:")) { 1435 if (!state) 1436 state = " (private)"; 1437 } else if (begins_with(buf, "protected:")) { 1438 if (!state) 1439 state = " (protected)"; 1440 } else if (begins_with(buf, "public:")) { 1441 if (!state) 1442 state = " (public)"; 1443 } else { 1444 strlcpy(lastbuf, buf, sizeof lastbuf); 1445 if (state) 1446 strlcat(lastbuf, state, 1447 sizeof lastbuf); 1448 lastmatchline = pos; 1449 return lastbuf; 1450 } 1451 } 1452 } 1453 pos--; 1454 } 1455 return lastmatchline > 0 ? lastbuf : NULL; 1456 } 1457 1458 /* dump accumulated "context" diff changes */ 1459 static void 1460 dump_context_vec(FILE *f1, FILE *f2, int flags) 1461 { 1462 struct context_vec *cvp = context_vec_start; 1463 int lowa, upb, lowc, upd, do_output; 1464 int a, b, c, d; 1465 char ch, *f; 1466 1467 if (context_vec_start > context_vec_ptr) 1468 return; 1469 1470 b = d = 0; /* gcc */ 1471 lowa = MAX(1, cvp->a - diff_context); 1472 upb = MIN(len[0], context_vec_ptr->b + diff_context); 1473 lowc = MAX(1, cvp->c - diff_context); 1474 upd = MIN(len[1], context_vec_ptr->d + diff_context); 1475 1476 diff_output("***************"); 1477 if ((flags & D_PROTOTYPE)) { 1478 f = match_function(ixold, lowa-1, f1); 1479 if (f != NULL) 1480 diff_output(" %s", f); 1481 } 1482 diff_output("\n*** "); 1483 range(lowa, upb, ","); 1484 diff_output(" ****\n"); 1485 1486 /* 1487 * Output changes to the "old" file. The first loop suppresses 1488 * output if there were no changes to the "old" file (we'll see 1489 * the "old" lines as context in the "new" list). 1490 */ 1491 do_output = 0; 1492 for (; cvp <= context_vec_ptr; cvp++) 1493 if (cvp->a <= cvp->b) { 1494 cvp = context_vec_start; 1495 do_output++; 1496 break; 1497 } 1498 if (do_output) { 1499 while (cvp <= context_vec_ptr) { 1500 a = cvp->a; 1501 b = cvp->b; 1502 c = cvp->c; 1503 d = cvp->d; 1504 1505 if (a <= b && c <= d) 1506 ch = 'c'; 1507 else 1508 ch = (a <= b) ? 'd' : 'a'; 1509 1510 if (ch == 'a') 1511 fetch(ixold, lowa, b, f1, ' ', 0, flags); 1512 else { 1513 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); 1514 fetch(ixold, a, b, f1, 1515 ch == 'c' ? '!' : '-', 0, flags); 1516 } 1517 lowa = b + 1; 1518 cvp++; 1519 } 1520 fetch(ixold, b + 1, upb, f1, ' ', 0, flags); 1521 } 1522 /* output changes to the "new" file */ 1523 diff_output("--- "); 1524 range(lowc, upd, ","); 1525 diff_output(" ----\n"); 1526 1527 do_output = 0; 1528 for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) 1529 if (cvp->c <= cvp->d) { 1530 cvp = context_vec_start; 1531 do_output++; 1532 break; 1533 } 1534 if (do_output) { 1535 while (cvp <= context_vec_ptr) { 1536 a = cvp->a; 1537 b = cvp->b; 1538 c = cvp->c; 1539 d = cvp->d; 1540 1541 if (a <= b && c <= d) 1542 ch = 'c'; 1543 else 1544 ch = (a <= b) ? 'd' : 'a'; 1545 1546 if (ch == 'd') 1547 fetch(ixnew, lowc, d, f2, ' ', 0, flags); 1548 else { 1549 fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags); 1550 fetch(ixnew, c, d, f2, 1551 ch == 'c' ? '!' : '+', 0, flags); 1552 } 1553 lowc = d + 1; 1554 cvp++; 1555 } 1556 fetch(ixnew, d + 1, upd, f2, ' ', 0, flags); 1557 } 1558 context_vec_ptr = context_vec_start - 1; 1559 } 1560 1561 /* dump accumulated "unified" diff changes */ 1562 static void 1563 dump_unified_vec(FILE *f1, FILE *f2, int flags) 1564 { 1565 struct context_vec *cvp = context_vec_start; 1566 int lowa, upb, lowc, upd; 1567 int a, b, c, d; 1568 char ch, *f; 1569 1570 if (context_vec_start > context_vec_ptr) 1571 return; 1572 1573 b = d = 0; /* gcc */ 1574 lowa = MAX(1, cvp->a - diff_context); 1575 upb = MIN(len[0], context_vec_ptr->b + diff_context); 1576 lowc = MAX(1, cvp->c - diff_context); 1577 upd = MIN(len[1], context_vec_ptr->d + diff_context); 1578 1579 diff_output("@@ -"); 1580 uni_range(lowa, upb); 1581 diff_output(" +"); 1582 uni_range(lowc, upd); 1583 diff_output(" @@"); 1584 if ((flags & D_PROTOTYPE)) { 1585 f = match_function(ixold, lowa-1, f1); 1586 if (f != NULL) 1587 diff_output(" %s", f); 1588 } 1589 diff_output("\n"); 1590 1591 /* 1592 * Output changes in "unified" diff format--the old and new lines 1593 * are printed together. 1594 */ 1595 for (; cvp <= context_vec_ptr; cvp++) { 1596 a = cvp->a; 1597 b = cvp->b; 1598 c = cvp->c; 1599 d = cvp->d; 1600 1601 /* 1602 * c: both new and old changes 1603 * d: only changes in the old file 1604 * a: only changes in the new file 1605 */ 1606 if (a <= b && c <= d) 1607 ch = 'c'; 1608 else 1609 ch = (a <= b) ? 'd' : 'a'; 1610 1611 switch (ch) { 1612 case 'c': 1613 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); 1614 fetch(ixold, a, b, f1, '-', 0, flags); 1615 fetch(ixnew, c, d, f2, '+', 0, flags); 1616 break; 1617 case 'd': 1618 fetch(ixold, lowa, a - 1, f1, ' ', 0, flags); 1619 fetch(ixold, a, b, f1, '-', 0, flags); 1620 break; 1621 case 'a': 1622 fetch(ixnew, lowc, c - 1, f2, ' ', 0, flags); 1623 fetch(ixnew, c, d, f2, '+', 0, flags); 1624 break; 1625 } 1626 lowa = b + 1; 1627 lowc = d + 1; 1628 } 1629 fetch(ixnew, d + 1, upd, f2, ' ', 0, flags); 1630 1631 context_vec_ptr = context_vec_start - 1; 1632 } 1633 1634 static void 1635 print_header(const char *file1, const char *file2) 1636 { 1637 const char *time_format; 1638 char buf1[256]; 1639 char buf2[256]; 1640 char end1[10]; 1641 char end2[10]; 1642 struct tm tm1, tm2, *tm_ptr1, *tm_ptr2; 1643 int nsec1 = stb1.st_mtim.tv_nsec; 1644 int nsec2 = stb2.st_mtim.tv_nsec; 1645 1646 time_format = "%Y-%m-%d %H:%M:%S"; 1647 1648 if (cflag) 1649 time_format = "%c"; 1650 tm_ptr1 = localtime_r(&stb1.st_mtime, &tm1); 1651 tm_ptr2 = localtime_r(&stb2.st_mtime, &tm2); 1652 strftime(buf1, 256, time_format, tm_ptr1); 1653 strftime(buf2, 256, time_format, tm_ptr2); 1654 if (!cflag) { 1655 strftime(end1, 10, "%z", tm_ptr1); 1656 strftime(end2, 10, "%z", tm_ptr2); 1657 sprintf(buf1, "%s.%.9d %s", buf1, nsec1, end1); 1658 sprintf(buf2, "%s.%.9d %s", buf2, nsec2, end2); 1659 } 1660 if (label[0] != NULL) 1661 diff_output("%s %s\n", diff_format == D_CONTEXT ? "***" : "---", 1662 label[0]); 1663 else 1664 diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "***" : "---", 1665 file1, buf1); 1666 if (label[1] != NULL) 1667 diff_output("%s %s\n", diff_format == D_CONTEXT ? "---" : "+++", 1668 label[1]); 1669 else 1670 diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "---" : "+++", 1671 file2, buf2); 1672 } 1673