url.c
changeset 0 1d0ce1ebbc72
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/url.c	Tue Dec 06 13:51:55 2022 +0000
@@ -0,0 +1,424 @@
+/*
+ * Copyright (c) 2017 Sunil Nimmagadda <sunil@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*-
+ * Copyright (c) 1997 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by Jason Thorpe and Luke Mewburn.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <sys/types.h>
+
+#include <netinet/in.h>
+#include <resolv.h>
+
+#include <ctype.h>
+#include <err.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+
+#include "ftp.h"
+#include "xmalloc.h"
+
+#define BASICAUTH_LEN	1024
+
+static void	authority_parse(const char *, char **, char **, char **);
+static int	ipv6_parse(const char *, char **, char **);
+static int	unsafe_char(const char *);
+
+#ifndef NOSSL
+const char	*scheme_str[] = { "http:", "ftp:", "file:", "https:" };
+const char	*port_str[] = { "80", "21", NULL, "443" };
+#else
+const char	*scheme_str[] = { "http:", "ftp:", "file:" };
+const char	*port_str[] = { "80", "21", NULL };
+#endif /* NOSSL */
+
+int
+url_scheme_lookup(const char *str)
+{
+	size_t	i;
+
+#ifdef NOSSL
+	if (strncasecmp(str, "https:", 6) == 0)
+		errx(1, "No HTTPS support.");
+#endif /* NOSSL */
+
+	for (i = 0; i < nitems(scheme_str); i++)
+		if (strncasecmp(str, scheme_str[i], strlen(scheme_str[i])) == 0)
+			return i;
+
+	return -1;
+}
+
+static int
+ipv6_parse(const char *str, char **host, char **port)
+{
+	char	*p;
+
+	if ((p = strchr(str, ']')) == NULL) {
+		warnx("%s: invalid IPv6 address: %s", __func__, str);
+		return 1;
+	}
+
+	*p++ = '\0';
+	if (strlen(str + 1) > 0)
+		*host = xstrdup(str + 1);
+
+	if (*p == '\0')
+		return 0;
+
+	if (*p++ != ':') {
+		warnx("%s: invalid port: %s", __func__, p);
+		free(*host);
+		return 1;
+	}
+
+	if (strlen(p) > 0)
+		*port = xstrdup(p);
+
+	return 0;
+}
+
+static void
+authority_parse(const char *str, char **host, char **port, char **basic_auth)
+{
+	char	*p;
+
+	if ((p = strchr(str, '@')) != NULL) {
+		*basic_auth = xcalloc(1, BASICAUTH_LEN);
+		if (b64_ntop((unsigned char *)str, p - str,
+		    *basic_auth, BASICAUTH_LEN) == -1)
+			errx(1, "base64 encode failed");
+
+		str = ++p;
+	}
+
+	if ((p = strchr(str, ':')) != NULL) {
+		*p++ = '\0';
+		if (strlen(p) > 0)
+			*port = xstrdup(p);
+	}
+
+	if (strlen(str) > 0)
+		*host = xstrdup(str);
+}
+
+struct url *
+xurl_parse(const char *str)
+{
+	struct url	*url;
+
+	if ((url = url_parse(str)) == NULL)
+		exit(1);
+
+	return url;
+}
+
+struct url *
+url_parse(const char *str)
+{
+	struct url	*url;
+	const char	*p, *q;
+	char		*basic_auth, *host, *port, *path, *s;
+	size_t		 len;
+	int		 ip_literal, scheme;
+
+	p = str;
+	ip_literal = 0;
+	host = port = path = basic_auth = NULL;
+	while (isblank((unsigned char)*p))
+		p++;
+
+	if ((q = strchr(p, ':')) == NULL) {
+		warnx("%s: scheme missing: %s", __func__, str);
+		return NULL;
+	}
+
+	if ((scheme = url_scheme_lookup(p)) == -1) {
+		warnx("%s: invalid scheme: %s", __func__, p);
+		return NULL;
+	}
+
+	p = ++q;
+	if (strncmp(p, "//", 2) != 0) {
+		if (scheme == S_FILE)
+			goto done;
+		else {
+			warnx("%s: invalid url: %s", __func__, str);
+			return NULL;
+		}
+	}
+
+	p += 2;
+
+	/*
+	 * quirk to parse file:// which isn't valid but required for
+	 * backwards compatibility.
+	 */
+	if (scheme == S_FILE) {
+		q = (*p == '/') ? p : p - 1;
+		goto done;
+	}
+
+	len = strlen(p);
+	/* Authority terminated by a '/' if present */
+	if ((q = strchr(p, '/')) != NULL)
+		len = q - p;
+
+	s = xstrndup(p, len);
+	if (*p == '[') {
+		if (ipv6_parse(s, &host, &port) != 0) {
+			free(s);
+			return NULL;
+		}
+		ip_literal = 1;
+	} else
+		authority_parse(s, &host, &port, &basic_auth);
+
+	free(s);
+	if (port == NULL && scheme != S_FILE)
+		port = xstrdup(port_str[scheme]);
+
+ done:
+	if (q != NULL)
+		path = xstrdup(q);
+
+	if (io_debug) {
+		fprintf(stderr,
+		    "scheme: %s\nhost: %s\nport: %s\npath: %s\n",
+		    scheme_str[scheme], host, port, path);
+	}
+
+	url = xcalloc(1, sizeof *url);
+	url->scheme = scheme;
+	url->host = host;
+	url->port = port;
+	url->path = path;
+	url->basic_auth = basic_auth;
+	url->ip_literal = ip_literal;
+	return url;
+}
+
+void
+url_free(struct url *url)
+{
+	if (url == NULL)
+		return;
+
+	free(url->host);
+	free(url->port);
+	free(url->path);
+	freezero(url->basic_auth, BASICAUTH_LEN);
+	free(url);
+}
+
+void
+url_connect(struct url *url, int timeout)
+{
+	switch (url->scheme) {
+	case S_HTTP:
+	case S_HTTPS:
+		http_connect(url, timeout);
+		break;
+	case S_FTP:
+		if (ftp_proxy)
+			http_connect(url, timeout);
+		else
+			ftp_connect(url, timeout);
+		break;
+	}
+}
+
+struct url *
+url_request(struct url *url, off_t *offset, off_t *sz)
+{
+	switch (url->scheme) {
+	case S_HTTP:
+	case S_HTTPS:
+		return http_get(url, offset, sz);
+	case S_FTP:
+		if (ftp_proxy)
+			return http_get(url, offset, sz);
+
+		return ftp_get(url, offset, sz);
+	case S_FILE:
+		return file_get(url, offset, sz);
+	}
+
+	return NULL;
+}
+
+void
+url_save(struct url *url, FILE *dst_fp, off_t *offset)
+{
+	switch (url->scheme) {
+	case S_HTTP:
+	case S_HTTPS:
+		http_save(url, dst_fp, offset);
+		break;
+	case S_FTP:
+		if (ftp_proxy)
+			http_save(url, dst_fp, offset);
+		else
+			ftp_save(url, dst_fp, offset);
+		break;
+	case S_FILE:
+		file_save(url, dst_fp, offset);
+		break;
+	}
+}
+
+void
+url_close(struct url *url)
+{
+	switch (url->scheme) {
+	case S_HTTP:
+	case S_HTTPS:
+		http_close(url);
+		break;
+	case S_FTP:
+		if (ftp_proxy)
+			http_close(url);
+		else
+			ftp_close(url);
+		break;
+	}
+}
+
+char *
+url_str(struct url *url)
+{
+	char	*host, *str;
+	int	 custom_port;
+
+	custom_port = strcmp(url->port, port_str[url->scheme]) ? 1 : 0;
+	if (url->ip_literal)
+		xasprintf(&host, "[%s]", url->host);
+	else
+		host = xstrdup(url->host);
+
+	xasprintf(&str, "%s//%s%s%s%s",
+	    scheme_str[url->scheme],
+	    host,
+	    custom_port ? ":" : "",
+	    custom_port ? url->port : "",
+	    url->path ? url->path : "/");
+
+	free(host);
+	return str;
+}
+
+const char *
+url_scheme_str(int scheme)
+{
+	return scheme_str[scheme];
+}
+
+const char *
+url_port_str(int scheme)
+{
+	return port_str[scheme];
+}
+
+/*
+ * Encode given URL, per RFC1738.
+ * Allocate and return string to the caller.
+ */
+char *
+url_encode(const char *path)
+{
+	size_t i, length, new_length;
+	char *epath, *epathp;
+
+	length = new_length = strlen(path);
+
+	/*
+	 * First pass:
+	 * Count unsafe characters, and determine length of the
+	 * final URL.
+	 */
+	for (i = 0; i < length; i++)
+		if (unsafe_char(path + i))
+			new_length += 2;
+
+	epath = epathp = xmalloc(new_length + 1);	/* One more for '\0'. */
+
+	/*
+	 * Second pass:
+	 * Encode, and copy final URL.
+	 */
+	for (i = 0; i < length; i++)
+		if (unsafe_char(path + i)) {
+			snprintf(epathp, 4, "%%" "%02x",
+			    (unsigned char)path[i]);
+			epathp += 3;
+		} else
+			*(epathp++) = path[i];
+
+	*epathp = '\0';
+	return epath;
+}
+
+/*
+ * Determine whether the character needs encoding, per RFC1738:
+ * 	- No corresponding graphic US-ASCII.
+ * 	- Unsafe characters.
+ */
+static int
+unsafe_char(const char *c0)
+{
+	const char *unsafe_chars = " <>\"#{}|\\^~[]`";
+	const unsigned char *c = (const unsigned char *)c0;
+
+	/*
+	 * No corresponding graphic US-ASCII.
+	 * Control characters and octets not used in US-ASCII.
+	 */
+	return (iscntrl(*c) || !isascii(*c) ||
+
+	    /*
+	     * Unsafe characters.
+	     * '%' is also unsafe, if is not followed by two
+	     * hexadecimal digits.
+	     */
+	    strchr(unsafe_chars, *c) != NULL ||
+	    (*c == '%' && (!isxdigit(*++c) || !isxdigit(*++c))));
+}