1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1998-2004 Dag-Erling Smørgrav
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer
12 * in this position and unchanged.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. The name of the author may not be used to endorse or promote products
17 * derived from this software without specific prior written permission
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33
34 #include <sys/param.h>
35
36 #include <netinet/in.h>
37
38 #include <errno.h>
39 #include <ctype.h>
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <string.h>
43
44 #include "fetch.h"
45 #include "common.h"
46
47 auth_t fetchAuthMethod;
48 int fetchLastErrCode;
49 char fetchLastErrString[MAXERRSTRING];
50 int fetchTimeout;
51 int fetchRestartCalls = 1;
52 int fetchDebug;
53
54
55 /*** Local data **************************************************************/
56
57 /*
58 * Error messages for parser errors
59 */
60 #define URL_MALFORMED 1
61 #define URL_BAD_SCHEME 2
62 #define URL_BAD_PORT 3
63 static struct fetcherr url_errlist[] = {
64 { URL_MALFORMED, FETCH_URL, "Malformed URL" },
65 { URL_BAD_SCHEME, FETCH_URL, "Invalid URL scheme" },
66 { URL_BAD_PORT, FETCH_URL, "Invalid server port" },
67 { -1, FETCH_UNKNOWN, "Unknown parser error" }
68 };
69
70
71 /*** Public API **************************************************************/
72
73 /*
74 * Select the appropriate protocol for the URL scheme, and return a
75 * read-only stream connected to the document referenced by the URL.
76 * Also fill out the struct url_stat.
77 */
78 FILE *
fetchXGet(struct url * URL,struct url_stat * us,const char * flags)79 fetchXGet(struct url *URL, struct url_stat *us, const char *flags)
80 {
81
82 if (us != NULL) {
83 us->size = -1;
84 us->atime = us->mtime = 0;
85 }
86 if (strcmp(URL->scheme, SCHEME_FILE) == 0)
87 return (fetchXGetFile(URL, us, flags));
88 else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
89 return (fetchXGetFTP(URL, us, flags));
90 else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
91 return (fetchXGetHTTP(URL, us, flags));
92 else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
93 return (fetchXGetHTTP(URL, us, flags));
94 url_seterr(URL_BAD_SCHEME);
95 return (NULL);
96 }
97
98 /*
99 * Select the appropriate protocol for the URL scheme, and return a
100 * read-only stream connected to the document referenced by the URL.
101 */
102 FILE *
fetchGet(struct url * URL,const char * flags)103 fetchGet(struct url *URL, const char *flags)
104 {
105 return (fetchXGet(URL, NULL, flags));
106 }
107
108 /*
109 * Select the appropriate protocol for the URL scheme, and return a
110 * write-only stream connected to the document referenced by the URL.
111 */
112 FILE *
fetchPut(struct url * URL,const char * flags)113 fetchPut(struct url *URL, const char *flags)
114 {
115
116 if (strcmp(URL->scheme, SCHEME_FILE) == 0)
117 return (fetchPutFile(URL, flags));
118 else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
119 return (fetchPutFTP(URL, flags));
120 else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
121 return (fetchPutHTTP(URL, flags));
122 else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
123 return (fetchPutHTTP(URL, flags));
124 url_seterr(URL_BAD_SCHEME);
125 return (NULL);
126 }
127
128 /*
129 * Select the appropriate protocol for the URL scheme, and return the
130 * size of the document referenced by the URL if it exists.
131 */
132 int
fetchStat(struct url * URL,struct url_stat * us,const char * flags)133 fetchStat(struct url *URL, struct url_stat *us, const char *flags)
134 {
135
136 if (us != NULL) {
137 us->size = -1;
138 us->atime = us->mtime = 0;
139 }
140 if (strcmp(URL->scheme, SCHEME_FILE) == 0)
141 return (fetchStatFile(URL, us, flags));
142 else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
143 return (fetchStatFTP(URL, us, flags));
144 else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
145 return (fetchStatHTTP(URL, us, flags));
146 else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
147 return (fetchStatHTTP(URL, us, flags));
148 url_seterr(URL_BAD_SCHEME);
149 return (-1);
150 }
151
152 /*
153 * Select the appropriate protocol for the URL scheme, and return a
154 * list of files in the directory pointed to by the URL.
155 */
156 struct url_ent *
fetchList(struct url * URL,const char * flags)157 fetchList(struct url *URL, const char *flags)
158 {
159
160 if (strcmp(URL->scheme, SCHEME_FILE) == 0)
161 return (fetchListFile(URL, flags));
162 else if (strcmp(URL->scheme, SCHEME_FTP) == 0)
163 return (fetchListFTP(URL, flags));
164 else if (strcmp(URL->scheme, SCHEME_HTTP) == 0)
165 return (fetchListHTTP(URL, flags));
166 else if (strcmp(URL->scheme, SCHEME_HTTPS) == 0)
167 return (fetchListHTTP(URL, flags));
168 url_seterr(URL_BAD_SCHEME);
169 return (NULL);
170 }
171
172 /*
173 * Attempt to parse the given URL; if successful, call fetchXGet().
174 */
175 FILE *
fetchXGetURL(const char * URL,struct url_stat * us,const char * flags)176 fetchXGetURL(const char *URL, struct url_stat *us, const char *flags)
177 {
178 struct url *u;
179 FILE *f;
180
181 if ((u = fetchParseURL(URL)) == NULL)
182 return (NULL);
183
184 f = fetchXGet(u, us, flags);
185
186 fetchFreeURL(u);
187 return (f);
188 }
189
190 /*
191 * Attempt to parse the given URL; if successful, call fetchGet().
192 */
193 FILE *
fetchGetURL(const char * URL,const char * flags)194 fetchGetURL(const char *URL, const char *flags)
195 {
196 return (fetchXGetURL(URL, NULL, flags));
197 }
198
199 /*
200 * Attempt to parse the given URL; if successful, call fetchPut().
201 */
202 FILE *
fetchPutURL(const char * URL,const char * flags)203 fetchPutURL(const char *URL, const char *flags)
204 {
205 struct url *u;
206 FILE *f;
207
208 if ((u = fetchParseURL(URL)) == NULL)
209 return (NULL);
210
211 f = fetchPut(u, flags);
212
213 fetchFreeURL(u);
214 return (f);
215 }
216
217 /*
218 * Attempt to parse the given URL; if successful, call fetchStat().
219 */
220 int
fetchStatURL(const char * URL,struct url_stat * us,const char * flags)221 fetchStatURL(const char *URL, struct url_stat *us, const char *flags)
222 {
223 struct url *u;
224 int s;
225
226 if ((u = fetchParseURL(URL)) == NULL)
227 return (-1);
228
229 s = fetchStat(u, us, flags);
230
231 fetchFreeURL(u);
232 return (s);
233 }
234
235 /*
236 * Attempt to parse the given URL; if successful, call fetchList().
237 */
238 struct url_ent *
fetchListURL(const char * URL,const char * flags)239 fetchListURL(const char *URL, const char *flags)
240 {
241 struct url *u;
242 struct url_ent *ue;
243
244 if ((u = fetchParseURL(URL)) == NULL)
245 return (NULL);
246
247 ue = fetchList(u, flags);
248
249 fetchFreeURL(u);
250 return (ue);
251 }
252
253 /*
254 * Make a URL
255 */
256 struct url *
fetchMakeURL(const char * scheme,const char * host,int port,const char * doc,const char * user,const char * pwd)257 fetchMakeURL(const char *scheme, const char *host, int port, const char *doc,
258 const char *user, const char *pwd)
259 {
260 struct url *u;
261
262 if (!scheme || (!host && !doc)) {
263 url_seterr(URL_MALFORMED);
264 return (NULL);
265 }
266
267 if (port < 0 || port > 65535) {
268 url_seterr(URL_BAD_PORT);
269 return (NULL);
270 }
271
272 /* allocate struct url */
273 if ((u = calloc(1, sizeof(*u))) == NULL) {
274 fetch_syserr();
275 return (NULL);
276 }
277 u->netrcfd = -1;
278
279 if ((u->doc = strdup(doc ? doc : "/")) == NULL) {
280 fetch_syserr();
281 free(u);
282 return (NULL);
283 }
284
285 #define seturl(x) snprintf(u->x, sizeof(u->x), "%s", x)
286 seturl(scheme);
287 seturl(host);
288 seturl(user);
289 seturl(pwd);
290 #undef seturl
291 u->port = port;
292
293 return (u);
294 }
295
296 /*
297 * Return value of the given hex digit.
298 */
299 static int
fetch_hexval(char ch)300 fetch_hexval(char ch)
301 {
302
303 if (ch >= '0' && ch <= '9')
304 return (ch - '0');
305 else if (ch >= 'a' && ch <= 'f')
306 return (ch - 'a' + 10);
307 else if (ch >= 'A' && ch <= 'F')
308 return (ch - 'A' + 10);
309 return (-1);
310 }
311
312 /*
313 * Decode percent-encoded URL component from src into dst, stopping at end
314 * of string, or at @ or : separators. Returns a pointer to the unhandled
315 * part of the input string (null terminator, @, or :). No terminator is
316 * written to dst (it is the caller's responsibility).
317 */
318 static const char *
fetch_pctdecode(char * dst,const char * src,size_t dlen)319 fetch_pctdecode(char *dst, const char *src, size_t dlen)
320 {
321 int d1, d2;
322 char c;
323 const char *s;
324
325 for (s = src; *s != '\0' && *s != '@' && *s != ':'; s++) {
326 if (s[0] == '%' && (d1 = fetch_hexval(s[1])) >= 0 &&
327 (d2 = fetch_hexval(s[2])) >= 0 && (d1 > 0 || d2 > 0)) {
328 c = d1 << 4 | d2;
329 s += 2;
330 } else if (s[0] == '%') {
331 /* Invalid escape sequence. */
332 return (NULL);
333 } else {
334 c = *s;
335 }
336 if (dlen-- > 0)
337 *dst++ = c;
338 else
339 return (NULL);
340 }
341 return (s);
342 }
343
344 /*
345 * Split an URL into components. URL syntax is:
346 * [method:/][/[user[:pwd]@]host[:port]/][document]
347 * This almost, but not quite, RFC1738 URL syntax.
348 */
349 struct url *
fetchParseURL(const char * URL)350 fetchParseURL(const char *URL)
351 {
352 char *doc;
353 const char *p, *q;
354 struct url *u;
355 int i, n;
356
357 /* allocate struct url */
358 if ((u = calloc(1, sizeof(*u))) == NULL) {
359 fetch_syserr();
360 return (NULL);
361 }
362 u->netrcfd = -1;
363
364 /* scheme name */
365 if ((p = strstr(URL, ":/"))) {
366 if (p - URL > URL_SCHEMELEN)
367 goto ouch;
368 for (i = 0; URL + i < p; i++)
369 u->scheme[i] = tolower((unsigned char)URL[i]);
370 URL = ++p;
371 /*
372 * Only one slash: no host, leave slash as part of document
373 * Two slashes: host follows, strip slashes
374 */
375 if (URL[1] == '/')
376 URL = (p += 2);
377 } else {
378 p = URL;
379 }
380 if (!*URL || *URL == '/' || *URL == '.' ||
381 (u->scheme[0] == '\0' &&
382 strchr(URL, '/') == NULL && strchr(URL, ':') == NULL))
383 goto nohost;
384
385 p = strpbrk(URL, "/@");
386 if (p && *p == '@') {
387 /* username */
388 q = fetch_pctdecode(u->user, URL, URL_USERLEN);
389 if (q == NULL)
390 goto ouch;
391
392 /* password */
393 if (*q == ':') {
394 q = fetch_pctdecode(u->pwd, q + 1, URL_PWDLEN);
395 if (q == NULL)
396 goto ouch;
397 }
398 p++;
399 } else {
400 p = URL;
401 }
402
403 /* hostname */
404 if (*p == '[') {
405 q = p + 1 + strspn(p + 1, ":0123456789ABCDEFabcdef");
406 if (*q++ != ']')
407 goto ouch;
408 } else {
409 /* valid characters in a DNS name */
410 q = p + strspn(p, "-." "0123456789"
411 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "_"
412 "abcdefghijklmnopqrstuvwxyz");
413 }
414 if ((*q != '\0' && *q != '/' && *q != ':') || q - p > MAXHOSTNAMELEN)
415 goto ouch;
416 for (i = 0; p + i < q; i++)
417 u->host[i] = tolower((unsigned char)p[i]);
418 u->host[i] = '\0';
419 p = q;
420
421 /* port */
422 if (*p == ':') {
423 for (n = 0, q = ++p; *q && (*q != '/'); q++) {
424 if (*q >= '0' && *q <= '9' && n < INT_MAX / 10) {
425 n = n * 10 + (*q - '0');
426 } else {
427 /* invalid port */
428 url_seterr(URL_BAD_PORT);
429 goto ouch;
430 }
431 }
432 if (n < 1 || n > IPPORT_MAX)
433 goto ouch;
434 u->port = n;
435 p = q;
436 }
437
438 nohost:
439 /* document */
440 if (!*p)
441 p = "/";
442
443 if (strcmp(u->scheme, SCHEME_HTTP) == 0 ||
444 strcmp(u->scheme, SCHEME_HTTPS) == 0) {
445 const char hexnums[] = "0123456789abcdef";
446
447 /* percent-escape whitespace. */
448 if ((doc = malloc(strlen(p) * 3 + 1)) == NULL) {
449 fetch_syserr();
450 goto ouch;
451 }
452 u->doc = doc;
453 while (*p != '\0') {
454 if (!isspace((unsigned char)*p)) {
455 *doc++ = *p++;
456 } else {
457 *doc++ = '%';
458 *doc++ = hexnums[((unsigned int)*p) >> 4];
459 *doc++ = hexnums[((unsigned int)*p) & 0xf];
460 p++;
461 }
462 }
463 *doc = '\0';
464 } else if ((u->doc = strdup(p)) == NULL) {
465 fetch_syserr();
466 goto ouch;
467 }
468
469 DEBUGF("scheme: \"%s\"\n"
470 "user: \"%s\"\n"
471 "password: \"%s\"\n"
472 "host: \"%s\"\n"
473 "port: \"%d\"\n"
474 "document: \"%s\"\n",
475 u->scheme, u->user, u->pwd,
476 u->host, u->port, u->doc);
477
478 return (u);
479
480 ouch:
481 free(u);
482 return (NULL);
483 }
484
485 /*
486 * Free a URL
487 */
488 void
fetchFreeURL(struct url * u)489 fetchFreeURL(struct url *u)
490 {
491 free(u->doc);
492 free(u);
493 }
494