url.c
changeset 0 1d0ce1ebbc72
equal deleted inserted replaced
-1:000000000000 0:1d0ce1ebbc72
       
     1 /*
       
     2  * Copyright (c) 2017 Sunil Nimmagadda <sunil@openbsd.org>
       
     3  *
       
     4  * Permission to use, copy, modify, and distribute this software for any
       
     5  * purpose with or without fee is hereby granted, provided that the above
       
     6  * copyright notice and this permission notice appear in all copies.
       
     7  *
       
     8  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
       
     9  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
       
    10  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
       
    11  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
       
    12  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
       
    13  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
       
    14  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
       
    15  */
       
    16 
       
    17 /*-
       
    18  * Copyright (c) 1997 The NetBSD Foundation, Inc.
       
    19  * All rights reserved.
       
    20  *
       
    21  * This code is derived from software contributed to The NetBSD Foundation
       
    22  * by Jason Thorpe and Luke Mewburn.
       
    23  *
       
    24  * Redistribution and use in source and binary forms, with or without
       
    25  * modification, are permitted provided that the following conditions
       
    26  * are met:
       
    27  * 1. Redistributions of source code must retain the above copyright
       
    28  *    notice, this list of conditions and the following disclaimer.
       
    29  * 2. Redistributions in binary form must reproduce the above copyright
       
    30  *    notice, this list of conditions and the following disclaimer in the
       
    31  *    documentation and/or other materials provided with the distribution.
       
    32  *
       
    33  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
       
    34  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
       
    35  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
       
    36  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
       
    37  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
       
    38  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
       
    39  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
       
    40  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
       
    41  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
       
    42  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
       
    43  * POSSIBILITY OF SUCH DAMAGE.
       
    44  */
       
    45 #include <sys/types.h>
       
    46 
       
    47 #include <netinet/in.h>
       
    48 #include <resolv.h>
       
    49 
       
    50 #include <ctype.h>
       
    51 #include <err.h>
       
    52 #include <stdio.h>
       
    53 #include <stdlib.h>
       
    54 #include <string.h>
       
    55 #include <strings.h>
       
    56 
       
    57 #include "ftp.h"
       
    58 #include "xmalloc.h"
       
    59 
       
    60 #define BASICAUTH_LEN	1024
       
    61 
       
    62 static void	authority_parse(const char *, char **, char **, char **);
       
    63 static int	ipv6_parse(const char *, char **, char **);
       
    64 static int	unsafe_char(const char *);
       
    65 
       
    66 #ifndef NOSSL
       
    67 const char	*scheme_str[] = { "http:", "ftp:", "file:", "https:" };
       
    68 const char	*port_str[] = { "80", "21", NULL, "443" };
       
    69 #else
       
    70 const char	*scheme_str[] = { "http:", "ftp:", "file:" };
       
    71 const char	*port_str[] = { "80", "21", NULL };
       
    72 #endif /* NOSSL */
       
    73 
       
    74 int
       
    75 url_scheme_lookup(const char *str)
       
    76 {
       
    77 	size_t	i;
       
    78 
       
    79 #ifdef NOSSL
       
    80 	if (strncasecmp(str, "https:", 6) == 0)
       
    81 		errx(1, "No HTTPS support.");
       
    82 #endif /* NOSSL */
       
    83 
       
    84 	for (i = 0; i < nitems(scheme_str); i++)
       
    85 		if (strncasecmp(str, scheme_str[i], strlen(scheme_str[i])) == 0)
       
    86 			return i;
       
    87 
       
    88 	return -1;
       
    89 }
       
    90 
       
    91 static int
       
    92 ipv6_parse(const char *str, char **host, char **port)
       
    93 {
       
    94 	char	*p;
       
    95 
       
    96 	if ((p = strchr(str, ']')) == NULL) {
       
    97 		warnx("%s: invalid IPv6 address: %s", __func__, str);
       
    98 		return 1;
       
    99 	}
       
   100 
       
   101 	*p++ = '\0';
       
   102 	if (strlen(str + 1) > 0)
       
   103 		*host = xstrdup(str + 1);
       
   104 
       
   105 	if (*p == '\0')
       
   106 		return 0;
       
   107 
       
   108 	if (*p++ != ':') {
       
   109 		warnx("%s: invalid port: %s", __func__, p);
       
   110 		free(*host);
       
   111 		return 1;
       
   112 	}
       
   113 
       
   114 	if (strlen(p) > 0)
       
   115 		*port = xstrdup(p);
       
   116 
       
   117 	return 0;
       
   118 }
       
   119 
       
   120 static void
       
   121 authority_parse(const char *str, char **host, char **port, char **basic_auth)
       
   122 {
       
   123 	char	*p;
       
   124 
       
   125 	if ((p = strchr(str, '@')) != NULL) {
       
   126 		*basic_auth = xcalloc(1, BASICAUTH_LEN);
       
   127 		if (b64_ntop((unsigned char *)str, p - str,
       
   128 		    *basic_auth, BASICAUTH_LEN) == -1)
       
   129 			errx(1, "base64 encode failed");
       
   130 
       
   131 		str = ++p;
       
   132 	}
       
   133 
       
   134 	if ((p = strchr(str, ':')) != NULL) {
       
   135 		*p++ = '\0';
       
   136 		if (strlen(p) > 0)
       
   137 			*port = xstrdup(p);
       
   138 	}
       
   139 
       
   140 	if (strlen(str) > 0)
       
   141 		*host = xstrdup(str);
       
   142 }
       
   143 
       
   144 struct url *
       
   145 xurl_parse(const char *str)
       
   146 {
       
   147 	struct url	*url;
       
   148 
       
   149 	if ((url = url_parse(str)) == NULL)
       
   150 		exit(1);
       
   151 
       
   152 	return url;
       
   153 }
       
   154 
       
   155 struct url *
       
   156 url_parse(const char *str)
       
   157 {
       
   158 	struct url	*url;
       
   159 	const char	*p, *q;
       
   160 	char		*basic_auth, *host, *port, *path, *s;
       
   161 	size_t		 len;
       
   162 	int		 ip_literal, scheme;
       
   163 
       
   164 	p = str;
       
   165 	ip_literal = 0;
       
   166 	host = port = path = basic_auth = NULL;
       
   167 	while (isblank((unsigned char)*p))
       
   168 		p++;
       
   169 
       
   170 	if ((q = strchr(p, ':')) == NULL) {
       
   171 		warnx("%s: scheme missing: %s", __func__, str);
       
   172 		return NULL;
       
   173 	}
       
   174 
       
   175 	if ((scheme = url_scheme_lookup(p)) == -1) {
       
   176 		warnx("%s: invalid scheme: %s", __func__, p);
       
   177 		return NULL;
       
   178 	}
       
   179 
       
   180 	p = ++q;
       
   181 	if (strncmp(p, "//", 2) != 0) {
       
   182 		if (scheme == S_FILE)
       
   183 			goto done;
       
   184 		else {
       
   185 			warnx("%s: invalid url: %s", __func__, str);
       
   186 			return NULL;
       
   187 		}
       
   188 	}
       
   189 
       
   190 	p += 2;
       
   191 
       
   192 	/*
       
   193 	 * quirk to parse file:// which isn't valid but required for
       
   194 	 * backwards compatibility.
       
   195 	 */
       
   196 	if (scheme == S_FILE) {
       
   197 		q = (*p == '/') ? p : p - 1;
       
   198 		goto done;
       
   199 	}
       
   200 
       
   201 	len = strlen(p);
       
   202 	/* Authority terminated by a '/' if present */
       
   203 	if ((q = strchr(p, '/')) != NULL)
       
   204 		len = q - p;
       
   205 
       
   206 	s = xstrndup(p, len);
       
   207 	if (*p == '[') {
       
   208 		if (ipv6_parse(s, &host, &port) != 0) {
       
   209 			free(s);
       
   210 			return NULL;
       
   211 		}
       
   212 		ip_literal = 1;
       
   213 	} else
       
   214 		authority_parse(s, &host, &port, &basic_auth);
       
   215 
       
   216 	free(s);
       
   217 	if (port == NULL && scheme != S_FILE)
       
   218 		port = xstrdup(port_str[scheme]);
       
   219 
       
   220  done:
       
   221 	if (q != NULL)
       
   222 		path = xstrdup(q);
       
   223 
       
   224 	if (io_debug) {
       
   225 		fprintf(stderr,
       
   226 		    "scheme: %s\nhost: %s\nport: %s\npath: %s\n",
       
   227 		    scheme_str[scheme], host, port, path);
       
   228 	}
       
   229 
       
   230 	url = xcalloc(1, sizeof *url);
       
   231 	url->scheme = scheme;
       
   232 	url->host = host;
       
   233 	url->port = port;
       
   234 	url->path = path;
       
   235 	url->basic_auth = basic_auth;
       
   236 	url->ip_literal = ip_literal;
       
   237 	return url;
       
   238 }
       
   239 
       
   240 void
       
   241 url_free(struct url *url)
       
   242 {
       
   243 	if (url == NULL)
       
   244 		return;
       
   245 
       
   246 	free(url->host);
       
   247 	free(url->port);
       
   248 	free(url->path);
       
   249 	freezero(url->basic_auth, BASICAUTH_LEN);
       
   250 	free(url);
       
   251 }
       
   252 
       
   253 void
       
   254 url_connect(struct url *url, int timeout)
       
   255 {
       
   256 	switch (url->scheme) {
       
   257 	case S_HTTP:
       
   258 	case S_HTTPS:
       
   259 		http_connect(url, timeout);
       
   260 		break;
       
   261 	case S_FTP:
       
   262 		if (ftp_proxy)
       
   263 			http_connect(url, timeout);
       
   264 		else
       
   265 			ftp_connect(url, timeout);
       
   266 		break;
       
   267 	}
       
   268 }
       
   269 
       
   270 struct url *
       
   271 url_request(struct url *url, off_t *offset, off_t *sz)
       
   272 {
       
   273 	switch (url->scheme) {
       
   274 	case S_HTTP:
       
   275 	case S_HTTPS:
       
   276 		return http_get(url, offset, sz);
       
   277 	case S_FTP:
       
   278 		if (ftp_proxy)
       
   279 			return http_get(url, offset, sz);
       
   280 
       
   281 		return ftp_get(url, offset, sz);
       
   282 	case S_FILE:
       
   283 		return file_get(url, offset, sz);
       
   284 	}
       
   285 
       
   286 	return NULL;
       
   287 }
       
   288 
       
   289 void
       
   290 url_save(struct url *url, FILE *dst_fp, off_t *offset)
       
   291 {
       
   292 	switch (url->scheme) {
       
   293 	case S_HTTP:
       
   294 	case S_HTTPS:
       
   295 		http_save(url, dst_fp, offset);
       
   296 		break;
       
   297 	case S_FTP:
       
   298 		if (ftp_proxy)
       
   299 			http_save(url, dst_fp, offset);
       
   300 		else
       
   301 			ftp_save(url, dst_fp, offset);
       
   302 		break;
       
   303 	case S_FILE:
       
   304 		file_save(url, dst_fp, offset);
       
   305 		break;
       
   306 	}
       
   307 }
       
   308 
       
   309 void
       
   310 url_close(struct url *url)
       
   311 {
       
   312 	switch (url->scheme) {
       
   313 	case S_HTTP:
       
   314 	case S_HTTPS:
       
   315 		http_close(url);
       
   316 		break;
       
   317 	case S_FTP:
       
   318 		if (ftp_proxy)
       
   319 			http_close(url);
       
   320 		else
       
   321 			ftp_close(url);
       
   322 		break;
       
   323 	}
       
   324 }
       
   325 
       
   326 char *
       
   327 url_str(struct url *url)
       
   328 {
       
   329 	char	*host, *str;
       
   330 	int	 custom_port;
       
   331 
       
   332 	custom_port = strcmp(url->port, port_str[url->scheme]) ? 1 : 0;
       
   333 	if (url->ip_literal)
       
   334 		xasprintf(&host, "[%s]", url->host);
       
   335 	else
       
   336 		host = xstrdup(url->host);
       
   337 
       
   338 	xasprintf(&str, "%s//%s%s%s%s",
       
   339 	    scheme_str[url->scheme],
       
   340 	    host,
       
   341 	    custom_port ? ":" : "",
       
   342 	    custom_port ? url->port : "",
       
   343 	    url->path ? url->path : "/");
       
   344 
       
   345 	free(host);
       
   346 	return str;
       
   347 }
       
   348 
       
   349 const char *
       
   350 url_scheme_str(int scheme)
       
   351 {
       
   352 	return scheme_str[scheme];
       
   353 }
       
   354 
       
   355 const char *
       
   356 url_port_str(int scheme)
       
   357 {
       
   358 	return port_str[scheme];
       
   359 }
       
   360 
       
   361 /*
       
   362  * Encode given URL, per RFC1738.
       
   363  * Allocate and return string to the caller.
       
   364  */
       
   365 char *
       
   366 url_encode(const char *path)
       
   367 {
       
   368 	size_t i, length, new_length;
       
   369 	char *epath, *epathp;
       
   370 
       
   371 	length = new_length = strlen(path);
       
   372 
       
   373 	/*
       
   374 	 * First pass:
       
   375 	 * Count unsafe characters, and determine length of the
       
   376 	 * final URL.
       
   377 	 */
       
   378 	for (i = 0; i < length; i++)
       
   379 		if (unsafe_char(path + i))
       
   380 			new_length += 2;
       
   381 
       
   382 	epath = epathp = xmalloc(new_length + 1);	/* One more for '\0'. */
       
   383 
       
   384 	/*
       
   385 	 * Second pass:
       
   386 	 * Encode, and copy final URL.
       
   387 	 */
       
   388 	for (i = 0; i < length; i++)
       
   389 		if (unsafe_char(path + i)) {
       
   390 			snprintf(epathp, 4, "%%" "%02x",
       
   391 			    (unsigned char)path[i]);
       
   392 			epathp += 3;
       
   393 		} else
       
   394 			*(epathp++) = path[i];
       
   395 
       
   396 	*epathp = '\0';
       
   397 	return epath;
       
   398 }
       
   399 
       
   400 /*
       
   401  * Determine whether the character needs encoding, per RFC1738:
       
   402  * 	- No corresponding graphic US-ASCII.
       
   403  * 	- Unsafe characters.
       
   404  */
       
   405 static int
       
   406 unsafe_char(const char *c0)
       
   407 {
       
   408 	const char *unsafe_chars = " <>\"#{}|\\^~[]`";
       
   409 	const unsigned char *c = (const unsigned char *)c0;
       
   410 
       
   411 	/*
       
   412 	 * No corresponding graphic US-ASCII.
       
   413 	 * Control characters and octets not used in US-ASCII.
       
   414 	 */
       
   415 	return (iscntrl(*c) || !isascii(*c) ||
       
   416 
       
   417 	    /*
       
   418 	     * Unsafe characters.
       
   419 	     * '%' is also unsafe, if is not followed by two
       
   420 	     * hexadecimal digits.
       
   421 	     */
       
   422 	    strchr(unsafe_chars, *c) != NULL ||
       
   423 	    (*c == '%' && (!isxdigit(*++c) || !isxdigit(*++c))));
       
   424 }