1 /*
2 * Copyright (C) 2003 Sean Chittenden <[email protected]>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29
30 #include <sys/types.h>
31 #include <sys/param.h>
32
33 #include <ctype.h>
34 #include <err.h>
35 #include <errno.h>
36 #include <stdbool.h>
37 #include <stdlib.h>
38 #include <stdint.h>
39 #include <stdio.h>
40 #include <string.h>
41 #include <unistd.h>
42
43 #include "randomize_fd.h"
44
45 static struct rand_node *rand_root;
46 static struct rand_node *rand_tail;
47
48 static struct rand_node *
rand_node_allocate(void)49 rand_node_allocate(void)
50 {
51 struct rand_node *n;
52
53 n = (struct rand_node *)malloc(sizeof(struct rand_node));
54 if (n == NULL)
55 err(1, "malloc");
56
57 n->len = 0;
58 n->cp = NULL;
59 n->next = NULL;
60 return(n);
61 }
62
63 static void
rand_node_free(struct rand_node * n)64 rand_node_free(struct rand_node *n)
65 {
66 if (n != NULL) {
67 if (n->cp != NULL)
68 free(n->cp);
69
70 free(n);
71 }
72 }
73
74 static void
rand_node_free_rec(struct rand_node * n)75 rand_node_free_rec(struct rand_node *n)
76 {
77 if (n != NULL) {
78 if (n->next != NULL)
79 rand_node_free_rec(n->next);
80
81 rand_node_free(n);
82 }
83 }
84
85 static void
rand_node_append(struct rand_node * n)86 rand_node_append(struct rand_node *n)
87 {
88 if (rand_root == NULL)
89 rand_root = rand_tail = n;
90 else {
91 rand_tail->next = n;
92 rand_tail = n;
93 }
94 }
95
96 int
randomize_fd(int fd,int type,int unique,double denom)97 randomize_fd(int fd, int type, int unique, double denom)
98 {
99 u_char *buf;
100 u_int slen;
101 u_long i, j, numnode, selected;
102 struct rand_node *n, *prev;
103 int bufleft, eof, fndstr, ret;
104 size_t bufc, buflen;
105 ssize_t len;
106
107 rand_root = rand_tail = NULL;
108 bufc = i = 0;
109 bufleft = eof = fndstr = numnode = 0;
110
111 if (type == RANDOM_TYPE_UNSET)
112 type = RANDOM_TYPE_LINES;
113
114 buflen = sizeof(u_char) * MAXBSIZE;
115 buf = (u_char *)malloc(buflen);
116 if (buf == NULL)
117 err(1, "malloc");
118
119 while (!eof) {
120 /* Check to see if we have bits in the buffer */
121 if (bufleft == 0) {
122 len = read(fd, buf, buflen);
123 if (len == -1)
124 err(1, "read");
125 else if (len == 0) {
126 eof++;
127 break;
128 } else if ((size_t)len < buflen)
129 buflen = (size_t)len;
130
131 bufleft = (int)len;
132 }
133
134 /* Look for a newline */
135 for (i = bufc; i <= buflen && bufleft >= 0; i++, bufleft--) {
136 if (i == buflen) {
137 if (fndstr) {
138 if (!eof) {
139 memmove(buf, &buf[bufc], i - bufc);
140 i -= bufc;
141 bufc = 0;
142 len = read(fd, &buf[i], buflen - i);
143 if (len == -1)
144 err(1, "read");
145 else if (len == 0) {
146 eof++;
147 break;
148 } else if (len < (ssize_t)(buflen - i))
149 buflen = i + (size_t)len;
150
151 bufleft = (int)len;
152 fndstr = 0;
153 }
154 } else {
155 buflen *= 2;
156 buf = (u_char *)realloc(buf, buflen);
157 if (buf == NULL)
158 err(1, "realloc");
159
160 if (!eof) {
161 len = read(fd, &buf[i], buflen - i);
162 if (len == -1)
163 err(1, "read");
164 else if (len == 0) {
165 eof++;
166 break;
167 } else if (len < (ssize_t)(buflen - i))
168 buflen = i + (size_t)len;
169
170 bufleft = (int)len;
171 }
172
173 }
174 }
175
176 if ((type == RANDOM_TYPE_LINES && buf[i] == '\n') ||
177 (type == RANDOM_TYPE_WORDS && isspace(buf[i])) ||
178 (eof && i == buflen - 1)) {
179 make_token:
180 if (numnode == UINT32_MAX - 1) {
181 errno = EFBIG;
182 err(1, "too many delimiters");
183 }
184 numnode++;
185 n = rand_node_allocate();
186 if (-1 != (int)i) {
187 slen = i - (u_long)bufc;
188 n->len = slen + 2;
189 n->cp = (u_char *)malloc(slen + 2);
190 if (n->cp == NULL)
191 err(1, "malloc");
192
193 memmove(n->cp, &buf[bufc], slen);
194 n->cp[slen] = buf[i];
195 n->cp[slen + 1] = '\0';
196 bufc = i + 1;
197 }
198 rand_node_append(n);
199 fndstr = 1;
200 }
201 }
202 }
203
204 /* Necessary evil to compensate for files that don't end with a newline */
205 if (bufc != i) {
206 i--;
207 goto make_token;
208 }
209
210 (void)close(fd);
211
212 free(buf);
213
214 for (i = numnode; i > 0; i--) {
215 selected = arc4random_uniform(numnode);
216
217 for (j = 0, prev = n = rand_root; n != NULL; j++, prev = n, n = n->next) {
218 if (j == selected) {
219 if (n->cp == NULL)
220 break;
221
222 if (random_uniform_denom(denom)) {
223 ret = printf("%.*s",
224 (int)n->len - 1, n->cp);
225 if (ret < 0)
226 err(1, "printf");
227 }
228 if (unique) {
229 if (n == rand_root)
230 rand_root = n->next;
231 if (n == rand_tail)
232 rand_tail = prev;
233
234 prev->next = n->next;
235 rand_node_free(n);
236 numnode--;
237 }
238 break;
239 }
240 }
241 }
242
243 fflush(stdout);
244
245 if (!unique)
246 rand_node_free_rec(rand_root);
247
248 return(0);
249 }
250