|
1 /* |
|
2 * Copyright (c) 2017 Sunil Nimmagadda <sunil@openbsd.org> |
|
3 * |
|
4 * Permission to use, copy, modify, and distribute this software for any |
|
5 * purpose with or without fee is hereby granted, provided that the above |
|
6 * copyright notice and this permission notice appear in all copies. |
|
7 * |
|
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
|
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
|
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
|
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
|
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
|
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
|
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
|
15 */ |
|
16 |
|
17 /*- |
|
18 * Copyright (c) 1997 The NetBSD Foundation, Inc. |
|
19 * All rights reserved. |
|
20 * |
|
21 * This code is derived from software contributed to The NetBSD Foundation |
|
22 * by Jason Thorpe and Luke Mewburn. |
|
23 * |
|
24 * Redistribution and use in source and binary forms, with or without |
|
25 * modification, are permitted provided that the following conditions |
|
26 * are met: |
|
27 * 1. Redistributions of source code must retain the above copyright |
|
28 * notice, this list of conditions and the following disclaimer. |
|
29 * 2. Redistributions in binary form must reproduce the above copyright |
|
30 * notice, this list of conditions and the following disclaimer in the |
|
31 * documentation and/or other materials provided with the distribution. |
|
32 * |
|
33 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
|
34 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
|
35 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
|
36 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
|
37 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
|
38 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
|
39 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
|
40 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
|
41 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
|
42 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
|
43 * POSSIBILITY OF SUCH DAMAGE. |
|
44 */ |
|
45 #include <sys/types.h> |
|
46 |
|
47 #include <netinet/in.h> |
|
48 #include <resolv.h> |
|
49 |
|
50 #include <ctype.h> |
|
51 #include <err.h> |
|
52 #include <stdio.h> |
|
53 #include <stdlib.h> |
|
54 #include <string.h> |
|
55 #include <strings.h> |
|
56 |
|
57 #include "ftp.h" |
|
58 #include "xmalloc.h" |
|
59 |
|
60 #define BASICAUTH_LEN 1024 |
|
61 |
|
62 static void authority_parse(const char *, char **, char **, char **); |
|
63 static int ipv6_parse(const char *, char **, char **); |
|
64 static int unsafe_char(const char *); |
|
65 |
|
66 #ifndef NOSSL |
|
67 const char *scheme_str[] = { "http:", "ftp:", "file:", "https:" }; |
|
68 const char *port_str[] = { "80", "21", NULL, "443" }; |
|
69 #else |
|
70 const char *scheme_str[] = { "http:", "ftp:", "file:" }; |
|
71 const char *port_str[] = { "80", "21", NULL }; |
|
72 #endif /* NOSSL */ |
|
73 |
|
74 int |
|
75 url_scheme_lookup(const char *str) |
|
76 { |
|
77 size_t i; |
|
78 |
|
79 #ifdef NOSSL |
|
80 if (strncasecmp(str, "https:", 6) == 0) |
|
81 errx(1, "No HTTPS support."); |
|
82 #endif /* NOSSL */ |
|
83 |
|
84 for (i = 0; i < nitems(scheme_str); i++) |
|
85 if (strncasecmp(str, scheme_str[i], strlen(scheme_str[i])) == 0) |
|
86 return i; |
|
87 |
|
88 return -1; |
|
89 } |
|
90 |
|
91 static int |
|
92 ipv6_parse(const char *str, char **host, char **port) |
|
93 { |
|
94 char *p; |
|
95 |
|
96 if ((p = strchr(str, ']')) == NULL) { |
|
97 warnx("%s: invalid IPv6 address: %s", __func__, str); |
|
98 return 1; |
|
99 } |
|
100 |
|
101 *p++ = '\0'; |
|
102 if (strlen(str + 1) > 0) |
|
103 *host = xstrdup(str + 1); |
|
104 |
|
105 if (*p == '\0') |
|
106 return 0; |
|
107 |
|
108 if (*p++ != ':') { |
|
109 warnx("%s: invalid port: %s", __func__, p); |
|
110 free(*host); |
|
111 return 1; |
|
112 } |
|
113 |
|
114 if (strlen(p) > 0) |
|
115 *port = xstrdup(p); |
|
116 |
|
117 return 0; |
|
118 } |
|
119 |
|
120 static void |
|
121 authority_parse(const char *str, char **host, char **port, char **basic_auth) |
|
122 { |
|
123 char *p; |
|
124 |
|
125 if ((p = strchr(str, '@')) != NULL) { |
|
126 *basic_auth = xcalloc(1, BASICAUTH_LEN); |
|
127 if (b64_ntop((unsigned char *)str, p - str, |
|
128 *basic_auth, BASICAUTH_LEN) == -1) |
|
129 errx(1, "base64 encode failed"); |
|
130 |
|
131 str = ++p; |
|
132 } |
|
133 |
|
134 if ((p = strchr(str, ':')) != NULL) { |
|
135 *p++ = '\0'; |
|
136 if (strlen(p) > 0) |
|
137 *port = xstrdup(p); |
|
138 } |
|
139 |
|
140 if (strlen(str) > 0) |
|
141 *host = xstrdup(str); |
|
142 } |
|
143 |
|
144 struct url * |
|
145 xurl_parse(const char *str) |
|
146 { |
|
147 struct url *url; |
|
148 |
|
149 if ((url = url_parse(str)) == NULL) |
|
150 exit(1); |
|
151 |
|
152 return url; |
|
153 } |
|
154 |
|
155 struct url * |
|
156 url_parse(const char *str) |
|
157 { |
|
158 struct url *url; |
|
159 const char *p, *q; |
|
160 char *basic_auth, *host, *port, *path, *s; |
|
161 size_t len; |
|
162 int ip_literal, scheme; |
|
163 |
|
164 p = str; |
|
165 ip_literal = 0; |
|
166 host = port = path = basic_auth = NULL; |
|
167 while (isblank((unsigned char)*p)) |
|
168 p++; |
|
169 |
|
170 if ((q = strchr(p, ':')) == NULL) { |
|
171 warnx("%s: scheme missing: %s", __func__, str); |
|
172 return NULL; |
|
173 } |
|
174 |
|
175 if ((scheme = url_scheme_lookup(p)) == -1) { |
|
176 warnx("%s: invalid scheme: %s", __func__, p); |
|
177 return NULL; |
|
178 } |
|
179 |
|
180 p = ++q; |
|
181 if (strncmp(p, "//", 2) != 0) { |
|
182 if (scheme == S_FILE) |
|
183 goto done; |
|
184 else { |
|
185 warnx("%s: invalid url: %s", __func__, str); |
|
186 return NULL; |
|
187 } |
|
188 } |
|
189 |
|
190 p += 2; |
|
191 |
|
192 /* |
|
193 * quirk to parse file:// which isn't valid but required for |
|
194 * backwards compatibility. |
|
195 */ |
|
196 if (scheme == S_FILE) { |
|
197 q = (*p == '/') ? p : p - 1; |
|
198 goto done; |
|
199 } |
|
200 |
|
201 len = strlen(p); |
|
202 /* Authority terminated by a '/' if present */ |
|
203 if ((q = strchr(p, '/')) != NULL) |
|
204 len = q - p; |
|
205 |
|
206 s = xstrndup(p, len); |
|
207 if (*p == '[') { |
|
208 if (ipv6_parse(s, &host, &port) != 0) { |
|
209 free(s); |
|
210 return NULL; |
|
211 } |
|
212 ip_literal = 1; |
|
213 } else |
|
214 authority_parse(s, &host, &port, &basic_auth); |
|
215 |
|
216 free(s); |
|
217 if (port == NULL && scheme != S_FILE) |
|
218 port = xstrdup(port_str[scheme]); |
|
219 |
|
220 done: |
|
221 if (q != NULL) |
|
222 path = xstrdup(q); |
|
223 |
|
224 if (io_debug) { |
|
225 fprintf(stderr, |
|
226 "scheme: %s\nhost: %s\nport: %s\npath: %s\n", |
|
227 scheme_str[scheme], host, port, path); |
|
228 } |
|
229 |
|
230 url = xcalloc(1, sizeof *url); |
|
231 url->scheme = scheme; |
|
232 url->host = host; |
|
233 url->port = port; |
|
234 url->path = path; |
|
235 url->basic_auth = basic_auth; |
|
236 url->ip_literal = ip_literal; |
|
237 return url; |
|
238 } |
|
239 |
|
240 void |
|
241 url_free(struct url *url) |
|
242 { |
|
243 if (url == NULL) |
|
244 return; |
|
245 |
|
246 free(url->host); |
|
247 free(url->port); |
|
248 free(url->path); |
|
249 freezero(url->basic_auth, BASICAUTH_LEN); |
|
250 free(url); |
|
251 } |
|
252 |
|
253 void |
|
254 url_connect(struct url *url, int timeout) |
|
255 { |
|
256 switch (url->scheme) { |
|
257 case S_HTTP: |
|
258 case S_HTTPS: |
|
259 http_connect(url, timeout); |
|
260 break; |
|
261 case S_FTP: |
|
262 if (ftp_proxy) |
|
263 http_connect(url, timeout); |
|
264 else |
|
265 ftp_connect(url, timeout); |
|
266 break; |
|
267 } |
|
268 } |
|
269 |
|
270 struct url * |
|
271 url_request(struct url *url, off_t *offset, off_t *sz) |
|
272 { |
|
273 switch (url->scheme) { |
|
274 case S_HTTP: |
|
275 case S_HTTPS: |
|
276 return http_get(url, offset, sz); |
|
277 case S_FTP: |
|
278 if (ftp_proxy) |
|
279 return http_get(url, offset, sz); |
|
280 |
|
281 return ftp_get(url, offset, sz); |
|
282 case S_FILE: |
|
283 return file_get(url, offset, sz); |
|
284 } |
|
285 |
|
286 return NULL; |
|
287 } |
|
288 |
|
289 void |
|
290 url_save(struct url *url, FILE *dst_fp, off_t *offset) |
|
291 { |
|
292 switch (url->scheme) { |
|
293 case S_HTTP: |
|
294 case S_HTTPS: |
|
295 http_save(url, dst_fp, offset); |
|
296 break; |
|
297 case S_FTP: |
|
298 if (ftp_proxy) |
|
299 http_save(url, dst_fp, offset); |
|
300 else |
|
301 ftp_save(url, dst_fp, offset); |
|
302 break; |
|
303 case S_FILE: |
|
304 file_save(url, dst_fp, offset); |
|
305 break; |
|
306 } |
|
307 } |
|
308 |
|
309 void |
|
310 url_close(struct url *url) |
|
311 { |
|
312 switch (url->scheme) { |
|
313 case S_HTTP: |
|
314 case S_HTTPS: |
|
315 http_close(url); |
|
316 break; |
|
317 case S_FTP: |
|
318 if (ftp_proxy) |
|
319 http_close(url); |
|
320 else |
|
321 ftp_close(url); |
|
322 break; |
|
323 } |
|
324 } |
|
325 |
|
326 char * |
|
327 url_str(struct url *url) |
|
328 { |
|
329 char *host, *str; |
|
330 int custom_port; |
|
331 |
|
332 custom_port = strcmp(url->port, port_str[url->scheme]) ? 1 : 0; |
|
333 if (url->ip_literal) |
|
334 xasprintf(&host, "[%s]", url->host); |
|
335 else |
|
336 host = xstrdup(url->host); |
|
337 |
|
338 xasprintf(&str, "%s//%s%s%s%s", |
|
339 scheme_str[url->scheme], |
|
340 host, |
|
341 custom_port ? ":" : "", |
|
342 custom_port ? url->port : "", |
|
343 url->path ? url->path : "/"); |
|
344 |
|
345 free(host); |
|
346 return str; |
|
347 } |
|
348 |
|
349 const char * |
|
350 url_scheme_str(int scheme) |
|
351 { |
|
352 return scheme_str[scheme]; |
|
353 } |
|
354 |
|
355 const char * |
|
356 url_port_str(int scheme) |
|
357 { |
|
358 return port_str[scheme]; |
|
359 } |
|
360 |
|
361 /* |
|
362 * Encode given URL, per RFC1738. |
|
363 * Allocate and return string to the caller. |
|
364 */ |
|
365 char * |
|
366 url_encode(const char *path) |
|
367 { |
|
368 size_t i, length, new_length; |
|
369 char *epath, *epathp; |
|
370 |
|
371 length = new_length = strlen(path); |
|
372 |
|
373 /* |
|
374 * First pass: |
|
375 * Count unsafe characters, and determine length of the |
|
376 * final URL. |
|
377 */ |
|
378 for (i = 0; i < length; i++) |
|
379 if (unsafe_char(path + i)) |
|
380 new_length += 2; |
|
381 |
|
382 epath = epathp = xmalloc(new_length + 1); /* One more for '\0'. */ |
|
383 |
|
384 /* |
|
385 * Second pass: |
|
386 * Encode, and copy final URL. |
|
387 */ |
|
388 for (i = 0; i < length; i++) |
|
389 if (unsafe_char(path + i)) { |
|
390 snprintf(epathp, 4, "%%" "%02x", |
|
391 (unsigned char)path[i]); |
|
392 epathp += 3; |
|
393 } else |
|
394 *(epathp++) = path[i]; |
|
395 |
|
396 *epathp = '\0'; |
|
397 return epath; |
|
398 } |
|
399 |
|
400 /* |
|
401 * Determine whether the character needs encoding, per RFC1738: |
|
402 * - No corresponding graphic US-ASCII. |
|
403 * - Unsafe characters. |
|
404 */ |
|
405 static int |
|
406 unsafe_char(const char *c0) |
|
407 { |
|
408 const char *unsafe_chars = " <>\"#{}|\\^~[]`"; |
|
409 const unsigned char *c = (const unsigned char *)c0; |
|
410 |
|
411 /* |
|
412 * No corresponding graphic US-ASCII. |
|
413 * Control characters and octets not used in US-ASCII. |
|
414 */ |
|
415 return (iscntrl(*c) || !isascii(*c) || |
|
416 |
|
417 /* |
|
418 * Unsafe characters. |
|
419 * '%' is also unsafe, if is not followed by two |
|
420 * hexadecimal digits. |
|
421 */ |
|
422 strchr(unsafe_chars, *c) != NULL || |
|
423 (*c == '%' && (!isxdigit(*++c) || !isxdigit(*++c)))); |
|
424 } |