git/url.c
Jeff King 00b6c178c3 use strbuf_complete to conditionally append slash
When working with paths in strbufs, we frequently want to
ensure that a directory contains a trailing slash before
appending to it. We can shorten this code (and make the
intent more obvious) by calling strbuf_complete.

Most of these cases are trivially identical conversions, but
there are two things to note:

  - in a few cases we did not check that the strbuf is
    non-empty (which would lead to an out-of-bounds memory
    access). These were generally not triggerable in
    practice, either from earlier assertions, or typically
    because we would have just fed the strbuf to opendir(),
    which would choke on an empty path.

  - in a few cases we indexed the buffer with "original_len"
    or similar, rather than the current sb->len, and it is
    not immediately obvious from the diff that they are the
    same. In all of these cases, I manually verified that
    the strbuf does not change between the assignment and
    the strbuf_complete call.

This does not convert cases which look like:

  if (sb->len && !is_dir_sep(sb->buf[sb->len - 1]))
	  strbuf_addch(sb, '/');

as those are obviously semantically different. Some of these
cases arguably should be doing that, but that is out of
scope for this change, which aims purely for cleanup with no
behavior change (and at least it will make such sites easier
to find and examine in the future, as we can grep for
strbuf_complete).

Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-10-05 11:08:06 -07:00

132 lines
2.7 KiB
C

#include "cache.h"
#include "url.h"
int is_urlschemechar(int first_flag, int ch)
{
/*
* The set of valid URL schemes, as per STD66 (RFC3986) is
* '[A-Za-z][A-Za-z0-9+.-]*'. But use sightly looser check
* of '[A-Za-z0-9][A-Za-z0-9+.-]*' because earlier version
* of check used '[A-Za-z0-9]+' so not to break any remote
* helpers.
*/
int alphanumeric, special;
alphanumeric = ch > 0 && isalnum(ch);
special = ch == '+' || ch == '-' || ch == '.';
return alphanumeric || (!first_flag && special);
}
int is_url(const char *url)
{
/* Is "scheme" part reasonable? */
if (!url || !is_urlschemechar(1, *url++))
return 0;
while (*url && *url != ':') {
if (!is_urlschemechar(0, *url++))
return 0;
}
/* We've seen "scheme"; we want colon-slash-slash */
return (url[0] == ':' && url[1] == '/' && url[2] == '/');
}
static int url_decode_char(const char *q)
{
int i;
unsigned char val = 0;
for (i = 0; i < 2; i++) {
unsigned char c = *q++;
val <<= 4;
if (c >= '0' && c <= '9')
val += c - '0';
else if (c >= 'a' && c <= 'f')
val += c - 'a' + 10;
else if (c >= 'A' && c <= 'F')
val += c - 'A' + 10;
else
return -1;
}
return val;
}
static char *url_decode_internal(const char **query, int len,
const char *stop_at, struct strbuf *out,
int decode_plus)
{
const char *q = *query;
while (len) {
unsigned char c = *q;
if (!c)
break;
if (stop_at && strchr(stop_at, c)) {
q++;
len--;
break;
}
if (c == '%') {
int val = url_decode_char(q + 1);
if (0 <= val) {
strbuf_addch(out, val);
q += 3;
len -= 3;
continue;
}
}
if (decode_plus && c == '+')
strbuf_addch(out, ' ');
else
strbuf_addch(out, c);
q++;
len--;
}
*query = q;
return strbuf_detach(out, NULL);
}
char *url_decode(const char *url)
{
return url_decode_mem(url, strlen(url));
}
char *url_decode_mem(const char *url, int len)
{
struct strbuf out = STRBUF_INIT;
const char *colon = memchr(url, ':', len);
/* Skip protocol part if present */
if (colon && url < colon) {
strbuf_add(&out, url, colon - url);
len -= colon - url;
url = colon;
}
return url_decode_internal(&url, len, NULL, &out, 0);
}
char *url_decode_parameter_name(const char **query)
{
struct strbuf out = STRBUF_INIT;
return url_decode_internal(query, -1, "&=", &out, 1);
}
char *url_decode_parameter_value(const char **query)
{
struct strbuf out = STRBUF_INIT;
return url_decode_internal(query, -1, "&", &out, 1);
}
void end_url_with_slash(struct strbuf *buf, const char *url)
{
strbuf_addstr(buf, url);
strbuf_complete(buf, '/');
}
void str_end_url_with_slash(const char *url, char **dest) {
struct strbuf buf = STRBUF_INIT;
end_url_with_slash(&buf, url);
free(*dest);
*dest = strbuf_detach(&buf, NULL);
}