600 lines
14 KiB
C
600 lines
14 KiB
C
//
|
|
// Copyright 2020 Staysail Systems, Inc. <info@staysail.tech>
|
|
// Copyright 2018 Capitar IT Group BV <info@capitar.com>
|
|
//
|
|
// This software is supplied under the terms of the MIT License, a
|
|
// copy of which should be located in the distribution where this
|
|
// file was obtained (LICENSE.txt). A copy of the license may also be
|
|
// found online at https://opensource.org/licenses/MIT.
|
|
//
|
|
|
|
#include "core/nng_impl.h"
|
|
|
|
#include <ctype.h>
|
|
#include <stdbool.h>
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
|
|
#include "url.h"
|
|
|
|
static uint8_t
|
|
url_hex_val(char c)
|
|
{
|
|
if ((c >= '0') && (c <= '9')) {
|
|
return (c - '0');
|
|
}
|
|
if ((c >= 'A') && (c <= 'F')) {
|
|
return ((c - 'A') + 10);
|
|
}
|
|
if ((c >= 'a') && (c <= 'f')) {
|
|
return ((c - 'a') + 10);
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
// This returns either 0, or NNG_EINVAL, if the supplied input string
|
|
// is malformed UTF-8. We consider UTF-8 malformed when the sequence
|
|
// is an invalid code point, not the shortest possible code point, or
|
|
// incomplete.
|
|
static int
|
|
url_utf8_validate(void *arg)
|
|
{
|
|
uint8_t *s = arg;
|
|
uint32_t v, minv;
|
|
int nb;
|
|
|
|
while (*s) {
|
|
if ((s[0] & 0x80u) == 0) {
|
|
s++;
|
|
continue;
|
|
}
|
|
if ((s[0] & 0xe0u) == 0xc0) {
|
|
// 0x80 thru 0x7ff
|
|
v = (s[0] & 0x1fu);
|
|
minv = 0x80;
|
|
nb = 1;
|
|
} else if ((s[0] & 0xf0u) == 0xe0) {
|
|
v = (s[0] & 0xfu);
|
|
minv = 0x800;
|
|
nb = 2;
|
|
} else if ((s[0] & 0xf8u) == 0xf0) {
|
|
v = (s[0] & 0x7u);
|
|
minv = 0x10000;
|
|
nb = 3;
|
|
} else {
|
|
// invalid byte, either continuation, or too many
|
|
// leading 1 bits.
|
|
return (NNG_EINVAL);
|
|
}
|
|
s++;
|
|
for (int i = 0; i < nb; i++) {
|
|
if ((s[0] & 0xc0u) != 0x80) {
|
|
return (NNG_EINVAL); // not continuation
|
|
}
|
|
s++;
|
|
v <<= 6u;
|
|
v += s[0] & 0x3fu;
|
|
}
|
|
if (v < minv) {
|
|
return (NNG_EINVAL);
|
|
}
|
|
if ((v >= 0xd800) && (v <= 0xdfff)) {
|
|
return (NNG_EINVAL);
|
|
}
|
|
if (v > 0x10ffff) {
|
|
return (NNG_EINVAL);
|
|
}
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
size_t
|
|
nni_url_decode(uint8_t *out, const char *in, size_t max_len)
|
|
{
|
|
size_t len;
|
|
uint8_t c;
|
|
|
|
len = 0;
|
|
while ((c = (uint8_t) *in) != '\0') {
|
|
if (len >= max_len) {
|
|
return ((size_t) -1);
|
|
}
|
|
if (c == '%') {
|
|
in++;
|
|
if ((!isxdigit(in[0])) || (!isxdigit(in[1]))) {
|
|
return ((size_t) -1);
|
|
}
|
|
out[len] = url_hex_val(*in++);
|
|
out[len] <<= 4u;
|
|
out[len] += url_hex_val(*in++);
|
|
len++;
|
|
} else {
|
|
out[len++] = c;
|
|
in++;
|
|
}
|
|
}
|
|
return (len);
|
|
}
|
|
|
|
static int
|
|
url_canonify_uri(char **outp, const char *in)
|
|
{
|
|
char * out;
|
|
size_t src, dst, len;
|
|
uint8_t c;
|
|
int rv;
|
|
bool skip;
|
|
|
|
// We know that the transform is strictly "reducing".
|
|
if ((out = nni_strdup(in)) == NULL) {
|
|
return (NNG_ENOMEM);
|
|
}
|
|
len = strlen(out);
|
|
|
|
// First pass, convert '%xx' for safe characters to unescaped forms.
|
|
src = dst = 0;
|
|
while ((c = out[src]) != 0) {
|
|
if (c == '%') {
|
|
if ((!isxdigit(out[src + 1])) ||
|
|
(!isxdigit(out[src + 2]))) {
|
|
nni_free(out, len);
|
|
return (NNG_EINVAL);
|
|
}
|
|
c = url_hex_val(out[src + 1]);
|
|
c *= 16;
|
|
c += url_hex_val(out[src + 2]);
|
|
// If it's a safe character, decode, otherwise leave
|
|
// it alone. We also decode valid high-bytes for
|
|
// UTF-8, which will let us validate them and use
|
|
// those characters in file names later.
|
|
if (((c >= 'A') && (c <= 'Z')) ||
|
|
((c >= 'a') && (c <= 'z')) ||
|
|
((c >= '0') && (c <= '9')) || (c == '.') ||
|
|
(c == '~') || (c == '_') || (c == '-') ||
|
|
(c >= 0x80)) {
|
|
out[dst++] = (char) c;
|
|
} else {
|
|
out[dst++] = '%';
|
|
out[dst++] = toupper((uint8_t) out[src + 1]);
|
|
out[dst++] = toupper((uint8_t) out[src + 2]);
|
|
}
|
|
src += 3;
|
|
continue;
|
|
} else {
|
|
out[dst++] = out[src++];
|
|
}
|
|
}
|
|
out[dst] = 0;
|
|
|
|
// Second pass, eliminate redundant //.
|
|
src = dst = 0;
|
|
skip = false;
|
|
while ((c = out[src]) != 0) {
|
|
if ((c == '/') && (!skip)) {
|
|
out[dst++] = '/';
|
|
while (out[src] == '/') {
|
|
src++;
|
|
}
|
|
continue;
|
|
}
|
|
if ((c == '?') || (c == '#')) {
|
|
skip = true;
|
|
}
|
|
out[dst++] = (char) c;
|
|
src++;
|
|
}
|
|
out[dst] = 0;
|
|
|
|
// Second pass, reduce /. and /.. elements, but only in the path.
|
|
src = dst = 0;
|
|
skip = false;
|
|
while ((c = out[src]) != 0) {
|
|
if ((c == '/') && (!skip)) {
|
|
if ((strncmp(out + src, "/..", 3) == 0) &&
|
|
(out[src + 3] == 0 || out[src + 3] == '#' ||
|
|
out[src + 3] == '?' || out[src + 3] == '/')) {
|
|
|
|
if (dst > 0) {
|
|
do {
|
|
dst--;
|
|
} while ((dst) && (out[dst] != '/'));
|
|
}
|
|
src += 3;
|
|
continue;
|
|
}
|
|
if ((strncmp(out + src, "/.", 2) == 0) &&
|
|
(out[src + 2] == 0 || out[src + 2] == '#' ||
|
|
out[src + 2] == '?' || out[src + 2] == '/')) {
|
|
src += 2; // just skip over it
|
|
continue;
|
|
}
|
|
out[dst++] = '/';
|
|
src++;
|
|
} else {
|
|
if ((c == '?') || (c == '#')) {
|
|
skip = true;
|
|
}
|
|
out[dst++] = (char) c;
|
|
src++;
|
|
}
|
|
}
|
|
out[dst] = 0;
|
|
|
|
// Finally lets make sure that the results are valid UTF-8.
|
|
// This guards against using UTF-8 redundancy to break security.
|
|
if ((rv = url_utf8_validate(out)) != 0) {
|
|
nni_free(out, len);
|
|
return (rv);
|
|
}
|
|
|
|
*outp = nni_strdup(out);
|
|
nni_free(out, len);
|
|
return (*outp == NULL ? NNG_ENOMEM : 0);
|
|
}
|
|
|
|
static struct {
|
|
const char *scheme;
|
|
const char *port;
|
|
} nni_url_default_ports[] = {
|
|
// This list is not exhaustive, but likely covers the main ones we
|
|
// care about. Feel free to add additional ones as use cases arise.
|
|
// Note also that we don't use "default" ports for SP protocols
|
|
// that have no "default" port, like tcp:// or tls+tcp://.
|
|
// clang-format off
|
|
{ "git", "9418" },
|
|
{ "gopher", "70" },
|
|
{ "http", "80" },
|
|
{ "https", "443" },
|
|
{ "ssh", "22" },
|
|
{ "telnet", "23" },
|
|
{ "ws", "80" },
|
|
{ "wss", "443" },
|
|
{ NULL, NULL },
|
|
// clang-format on
|
|
};
|
|
|
|
const char *
|
|
nni_url_default_port(const char *scheme)
|
|
{
|
|
const char *s;
|
|
|
|
for (int i = 0; (s = nni_url_default_ports[i].scheme) != NULL; i++) {
|
|
size_t l = strlen(s);
|
|
if (strncmp(s, scheme, strlen(s)) != 0) {
|
|
continue;
|
|
}
|
|
// It can have a suffix of either "4" or "6" to restrict
|
|
// the address family. This is an NNG extension.
|
|
switch (scheme[l]) {
|
|
case '\0':
|
|
return (nni_url_default_ports[i].port);
|
|
case '4':
|
|
case '6':
|
|
if (scheme[l + 1] == '\0') {
|
|
return (nni_url_default_ports[i].port);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
return ("");
|
|
}
|
|
|
|
// URLs usually follow the following format:
|
|
//
|
|
// scheme:[//[userinfo@]host][/]path[?query][#fragment]
|
|
//
|
|
// There are other URL formats, for example mailto: but these are
|
|
// generally not used with nanomsg transports. Golang calls these
|
|
//
|
|
// scheme:opaque[?query][#fragment]
|
|
//
|
|
// Nanomsg URLs are always of the first form, we always require a
|
|
// scheme with a leading //, such as http:// or tcp://. So our parser
|
|
// is a bit more restricted, but sufficient for our needs.
|
|
int
|
|
nni_url_parse(nni_url **urlp, const char *raw)
|
|
{
|
|
nni_url * url;
|
|
size_t len;
|
|
const char *s;
|
|
char c;
|
|
int rv;
|
|
|
|
if ((url = NNI_ALLOC_STRUCT(url)) == NULL) {
|
|
return (NNG_ENOMEM);
|
|
}
|
|
|
|
if ((url->u_rawurl = nni_strdup(raw)) == NULL) {
|
|
rv = NNG_ENOMEM;
|
|
goto error;
|
|
}
|
|
|
|
// Grab the scheme.
|
|
s = raw;
|
|
for (len = 0; (c = s[len]) != ':'; len++) {
|
|
if (c == 0) {
|
|
break;
|
|
}
|
|
}
|
|
if (strncmp(s + len, "://", 3) != 0) {
|
|
rv = NNG_EINVAL;
|
|
goto error;
|
|
}
|
|
|
|
if ((url->u_scheme = nni_alloc(len + 1)) == NULL) {
|
|
rv = NNG_ENOMEM;
|
|
goto error;
|
|
}
|
|
for (size_t i = 0; i < len; i++) {
|
|
url->u_scheme[i] = (char) tolower(s[i]);
|
|
}
|
|
url->u_scheme[len] = '\0';
|
|
s += len + 3; // strlen("://")
|
|
|
|
// For compatibility reasons, we treat ipc:// and inproc:// paths
|
|
// specially. These names URLs have a path name (ipc) or arbitrary
|
|
// string (inproc) and don't include anything like a host. Note that
|
|
// in the case of path names, it is incumbent upon the application to
|
|
// ensure that valid and safe path names are used. Note also that
|
|
// path names are not canonicalized, which means that the address and
|
|
// URL properties for relative paths won't be portable to other
|
|
// processes unless they are in the same directory. When in doubt,
|
|
// we recommend using absolute paths, such as ipc:///var/run/socket.
|
|
|
|
if ((strcmp(url->u_scheme, "ipc") == 0) ||
|
|
(strcmp(url->u_scheme, "unix") == 0) ||
|
|
(strcmp(url->u_scheme, "abstract") == 0) ||
|
|
(strcmp(url->u_scheme, "inproc") == 0)) {
|
|
if ((url->u_path = nni_strdup(s)) == NULL) {
|
|
rv = NNG_ENOMEM;
|
|
goto error;
|
|
}
|
|
*urlp = url;
|
|
return (0);
|
|
}
|
|
|
|
// Look for host part (including colon). Will be terminated by
|
|
// a path, or NUL. May also include an "@", separating a user
|
|
// field.
|
|
for (len = 0; (c = s[len]) != '/'; len++) {
|
|
if ((c == '\0') || (c == '#') || (c == '?')) {
|
|
break;
|
|
}
|
|
if (c == '@') {
|
|
// This is a username.
|
|
if (url->u_userinfo != NULL) { // we already have one
|
|
rv = NNG_EINVAL;
|
|
goto error;
|
|
}
|
|
if ((url->u_userinfo = nni_alloc(len + 1)) == NULL) {
|
|
rv = NNG_ENOMEM;
|
|
goto error;
|
|
}
|
|
memcpy(url->u_userinfo, s, len);
|
|
url->u_userinfo[len] = '\0';
|
|
s += len + 1; // skip past user@ ...
|
|
len = 0;
|
|
}
|
|
}
|
|
|
|
// If the hostname part is just '*', skip over it. (We treat it
|
|
// as an empty host for legacy nanomsg compatibility. This may be
|
|
// non-RFC compliant, but we're really only interested in parsing
|
|
// nanomsg URLs.)
|
|
if (((len == 1) && (s[0] == '*')) ||
|
|
((len > 1) && (strncmp(s, "*:", 2) == 0))) {
|
|
s++;
|
|
len--;
|
|
}
|
|
|
|
if ((url->u_host = nni_alloc(len + 1)) == NULL) {
|
|
rv = NNG_ENOMEM;
|
|
goto error;
|
|
}
|
|
// Copy the host portion, but make it lower case (hostnames are
|
|
// case insensitive).
|
|
for (size_t i = 0; i < len; i++) {
|
|
url->u_host[i] = (char) tolower(s[i]);
|
|
}
|
|
url->u_host[len] = '\0';
|
|
s += len;
|
|
|
|
if ((rv = url_canonify_uri(&url->u_requri, s)) != 0) {
|
|
goto error;
|
|
}
|
|
|
|
s = url->u_requri;
|
|
for (len = 0; (c = s[len]) != '\0'; len++) {
|
|
if ((c == '?') || (c == '#')) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
if ((url->u_path = nni_alloc(len + 1)) == NULL) {
|
|
rv = NNG_ENOMEM;
|
|
goto error;
|
|
}
|
|
memcpy(url->u_path, s, len);
|
|
url->u_path[len] = '\0';
|
|
|
|
s += len;
|
|
|
|
// Look for query info portion.
|
|
if (s[0] == '?') {
|
|
s++;
|
|
for (len = 0; (c = s[len]) != '\0'; len++) {
|
|
if (c == '#') {
|
|
break;
|
|
}
|
|
}
|
|
if ((url->u_query = nni_alloc(len + 1)) == NULL) {
|
|
rv = NNG_ENOMEM;
|
|
goto error;
|
|
}
|
|
memcpy(url->u_query, s, len);
|
|
url->u_query[len] = '\0';
|
|
s += len;
|
|
}
|
|
|
|
// Look for fragment. Will always be last, so we just use
|
|
// strdup.
|
|
if (s[0] == '#') {
|
|
if ((url->u_fragment = nni_strdup(s + 1)) == NULL) {
|
|
rv = NNG_ENOMEM;
|
|
goto error;
|
|
}
|
|
}
|
|
|
|
// Now go back to the host portion, and look for a separate
|
|
// port We also yank off the "[" part for IPv6 addresses.
|
|
s = url->u_host;
|
|
if (s[0] == '[') {
|
|
s++;
|
|
for (len = 0; s[len] != ']'; len++) {
|
|
if (s[len] == '\0') {
|
|
rv = NNG_EINVAL;
|
|
goto error;
|
|
}
|
|
}
|
|
if ((s[len + 1] != ':') && (s[len + 1] != '\0')) {
|
|
rv = NNG_EINVAL;
|
|
goto error;
|
|
}
|
|
} else {
|
|
for (len = 0; s[len] != ':'; len++) {
|
|
if (s[len] == '\0') {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if ((url->u_hostname = nni_alloc(len + 1)) == NULL) {
|
|
rv = NNG_ENOMEM;
|
|
goto error;
|
|
}
|
|
memcpy(url->u_hostname, s, len);
|
|
url->u_hostname[len] = '\0';
|
|
s += len;
|
|
|
|
if (s[0] == ']') {
|
|
s++; // skip over ']', only used with IPv6 addresses
|
|
}
|
|
if (s[0] == ':') {
|
|
// If a colon was present, but no port value present, then
|
|
// that is an error.
|
|
if (s[1] == '\0') {
|
|
rv = NNG_EINVAL;
|
|
goto error;
|
|
}
|
|
url->u_port = nni_strdup(s + 1);
|
|
} else {
|
|
url->u_port = nni_strdup(nni_url_default_port(url->u_scheme));
|
|
}
|
|
if (url->u_port == NULL) {
|
|
rv = NNG_ENOMEM;
|
|
goto error;
|
|
}
|
|
|
|
*urlp = url;
|
|
return (0);
|
|
|
|
error:
|
|
nni_url_free(url);
|
|
return (rv);
|
|
}
|
|
|
|
void
|
|
nni_url_free(nni_url *url)
|
|
{
|
|
if (url != NULL) {
|
|
nni_strfree(url->u_rawurl);
|
|
nni_strfree(url->u_scheme);
|
|
nni_strfree(url->u_userinfo);
|
|
nni_strfree(url->u_host);
|
|
nni_strfree(url->u_hostname);
|
|
nni_strfree(url->u_port);
|
|
nni_strfree(url->u_path);
|
|
nni_strfree(url->u_query);
|
|
nni_strfree(url->u_fragment);
|
|
nni_strfree(url->u_requri);
|
|
NNI_FREE_STRUCT(url);
|
|
}
|
|
}
|
|
|
|
int
|
|
nni_url_asprintf(char **str, const nni_url *url)
|
|
{
|
|
const char *scheme = url->u_scheme;
|
|
const char *port = url->u_port;
|
|
const char *host = url->u_hostname;
|
|
const char *hostob = "";
|
|
const char *hostcb = "";
|
|
|
|
if ((strcmp(scheme, "ipc") == 0) || (strcmp(scheme, "inproc") == 0) ||
|
|
(strcmp(scheme, "unix") == 0) ||
|
|
(strcmp(scheme, "ipc+abstract") == 0) ||
|
|
(strcmp(scheme, "unix+abstract") == 0)) {
|
|
return (nni_asprintf(str, "%s://%s", scheme, url->u_path));
|
|
}
|
|
|
|
if (port != NULL) {
|
|
if ((strlen(port) == 0) ||
|
|
(strcmp(nni_url_default_port(scheme), port) == 0)) {
|
|
port = NULL;
|
|
}
|
|
}
|
|
if (strcmp(host, "*") == 0) {
|
|
host = "";
|
|
}
|
|
if (strchr(host, ':') != 0) {
|
|
hostob = "[";
|
|
hostcb = "]";
|
|
}
|
|
return (nni_asprintf(str, "%s://%s%s%s%s%s%s", scheme, hostob, host,
|
|
hostcb, port != NULL ? ":" : "", port != NULL ? port : "",
|
|
url->u_requri != NULL ? url->u_requri : ""));
|
|
}
|
|
|
|
// nni_url_asprintf_port is like nni_url_asprintf, but includes a port
|
|
// override. If non-zero, this port number replaces the port number
|
|
// in the port string.
|
|
int
|
|
nni_url_asprintf_port(char **str, const nni_url *url, int port)
|
|
{
|
|
char portstr[16];
|
|
nni_url myurl = *url;
|
|
|
|
if (port > 0) {
|
|
(void) snprintf(portstr, sizeof(portstr), "%d", port);
|
|
myurl.u_port = portstr;
|
|
}
|
|
return (nni_url_asprintf(str, &myurl));
|
|
}
|
|
|
|
#define URL_COPYSTR(d, s) ((s != NULL) && ((d = nni_strdup(s)) == NULL))
|
|
|
|
int
|
|
nni_url_clone(nni_url **dstp, const nni_url *src)
|
|
{
|
|
nni_url *dst;
|
|
|
|
if ((dst = NNI_ALLOC_STRUCT(dst)) == NULL) {
|
|
return (NNG_ENOMEM);
|
|
}
|
|
if (URL_COPYSTR(dst->u_rawurl, src->u_rawurl) ||
|
|
URL_COPYSTR(dst->u_scheme, src->u_scheme) ||
|
|
URL_COPYSTR(dst->u_userinfo, src->u_userinfo) ||
|
|
URL_COPYSTR(dst->u_host, src->u_host) ||
|
|
URL_COPYSTR(dst->u_hostname, src->u_hostname) ||
|
|
URL_COPYSTR(dst->u_port, src->u_port) ||
|
|
URL_COPYSTR(dst->u_requri, src->u_requri) ||
|
|
URL_COPYSTR(dst->u_path, src->u_path) ||
|
|
URL_COPYSTR(dst->u_query, src->u_query) ||
|
|
URL_COPYSTR(dst->u_fragment, src->u_fragment)) {
|
|
nni_url_free(dst);
|
|
return (NNG_ENOMEM);
|
|
}
|
|
*dstp = dst;
|
|
return (0);
|
|
}
|
|
|
|
#undef URL_COPYSTR
|