git/path.c

547 lines
12 KiB
C
Raw Normal View History

/*
* I'm tired of doing "vsnprintf()" etc just to open a
* file, so here's a "return static buffer with printf"
* interface for paths.
*
* It's obviously not thread-safe. Sue me. But it's quite
* useful for doing things like
*
* f = open(mkpath("%s/%s.git", base, name), O_RDONLY);
*
* which is what it's designed for.
*/
#include "cache.h"
static char bad_path[] = "/bad-path/";
static char *get_pathname(void)
{
static char pathname_array[4][PATH_MAX];
static int index;
return pathname_array[3 & ++index];
}
static char *cleanup_path(char *path)
{
/* Clean it up */
if (!memcmp(path, "./", 2)) {
path += 2;
while (*path == '/')
path++;
}
return path;
}
char *mksnpath(char *buf, size_t n, const char *fmt, ...)
{
va_list args;
unsigned len;
va_start(args, fmt);
len = vsnprintf(buf, n, fmt, args);
va_end(args);
if (len >= n) {
strlcpy(buf, bad_path, n);
return buf;
}
return cleanup_path(buf);
}
static char *git_vsnpath(char *buf, size_t n, const char *fmt, va_list args)
{
const char *git_dir = get_git_dir();
size_t len;
len = strlen(git_dir);
if (n < len + 1)
goto bad;
memcpy(buf, git_dir, len);
if (len && !is_dir_sep(git_dir[len-1]))
buf[len++] = '/';
len += vsnprintf(buf + len, n - len, fmt, args);
if (len >= n)
goto bad;
return cleanup_path(buf);
bad:
strlcpy(buf, bad_path, n);
return buf;
}
char *git_snpath(char *buf, size_t n, const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
(void)git_vsnpath(buf, n, fmt, args);
va_end(args);
return buf;
}
char *git_pathdup(const char *fmt, ...)
{
char path[PATH_MAX];
va_list args;
va_start(args, fmt);
(void)git_vsnpath(path, sizeof(path), fmt, args);
va_end(args);
return xstrdup(path);
}
char *mkpath(const char *fmt, ...)
{
va_list args;
unsigned len;
char *pathname = get_pathname();
va_start(args, fmt);
len = vsnprintf(pathname, PATH_MAX, fmt, args);
va_end(args);
if (len >= PATH_MAX)
return bad_path;
return cleanup_path(pathname);
}
char *git_path(const char *fmt, ...)
{
const char *git_dir = get_git_dir();
char *pathname = get_pathname();
va_list args;
unsigned len;
len = strlen(git_dir);
if (len > PATH_MAX-100)
return bad_path;
memcpy(pathname, git_dir, len);
if (len && git_dir[len-1] != '/')
pathname[len++] = '/';
va_start(args, fmt);
len += vsnprintf(pathname + len, PATH_MAX - len, fmt, args);
va_end(args);
if (len >= PATH_MAX)
return bad_path;
return cleanup_path(pathname);
}
/* git_mkstemp() - create tmp file honoring TMPDIR variable */
int git_mkstemp(char *path, size_t len, const char *template)
{
const char *tmp;
size_t n;
tmp = getenv("TMPDIR");
if (!tmp)
tmp = "/tmp";
n = snprintf(path, len, "%s/%s", tmp, template);
if (len <= n) {
errno = ENAMETOOLONG;
return -1;
}
return mkstemp(path);
}
int validate_headref(const char *path)
{
struct stat st;
char *buf, buffer[256];
unsigned char sha1[20];
int fd;
ssize_t len;
if (lstat(path, &st) < 0)
return -1;
/* Make sure it is a "refs/.." symlink */
if (S_ISLNK(st.st_mode)) {
len = readlink(path, buffer, sizeof(buffer)-1);
if (len >= 5 && !memcmp("refs/", buffer, 5))
return 0;
return -1;
}
/*
* Anything else, just open it and try to see if it is a symbolic ref.
*/
fd = open(path, O_RDONLY);
if (fd < 0)
return -1;
len = read_in_full(fd, buffer, sizeof(buffer)-1);
close(fd);
/*
* Is it a symbolic ref?
*/
if (len < 4)
return -1;
if (!memcmp("ref:", buffer, 4)) {
buf = buffer + 4;
len -= 4;
while (len && isspace(*buf))
buf++, len--;
if (len >= 5 && !memcmp("refs/", buf, 5))
return 0;
}
/*
* Is this a detached HEAD?
*/
if (!get_sha1_hex(buffer, sha1))
return 0;
return -1;
}
[PATCH] daemon.c and path.enter_repo(): revamp path validation. The whitelist of git-daemon is checked against return value from enter_repo(), and enter_repo() used to return the value obtained from getcwd() to avoid directory aliasing issues as discussed earier (mid October 2005). Unfortunately, it did not go well as we hoped. For example, /pub on a kernel.org public machine is a symlink to its real mountpoint, and it is understandable that the administrator does not want to adjust the whitelist every time /pub needs to point at a different partition for storage allcation or whatever reasons. Being able to keep using /pub/scm as the whitelist is a desirable property. So this version of enter_repo() reports what it used to chdir() and validate, but does not use getcwd() to canonicalize the directory name. When it sees a user relative path ~user/path, it internally resolves it to try chdir() there, but it still reports ~user/path (possibly after appending .git if allowed to do so, in which case it would report ~user/path.git). What this means is that if a whitelist wants to allow a user relative path, it needs to say "~" (for all users) or list user home directories like "~alice" "~bob". And no, you cannot say /home if the advertised way to access user home directories are ~alice,~bob, etc. The whole point of this is to avoid unnecessary aliasing issues. Anyway, because of this, daemon needs to do a bit more work to guard itself. Namely, it needs to make sure that the accessor does not try to exploit its leading path match rule by inserting /../ in the middle or hanging /.. at the end. I resurrected the belts and suspender paranoia code HPA did for this purpose. This check cannot be done in the enter_repo() unconditionally, because there are valid callers of enter_repo() that want to honor /../; authorized users coming over ssh to run send-pack and fetch-pack should be allowed to do so. Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 17:45:57 +08:00
static char *user_path(char *buf, char *path, int sz)
{
[PATCH] daemon.c and path.enter_repo(): revamp path validation. The whitelist of git-daemon is checked against return value from enter_repo(), and enter_repo() used to return the value obtained from getcwd() to avoid directory aliasing issues as discussed earier (mid October 2005). Unfortunately, it did not go well as we hoped. For example, /pub on a kernel.org public machine is a symlink to its real mountpoint, and it is understandable that the administrator does not want to adjust the whitelist every time /pub needs to point at a different partition for storage allcation or whatever reasons. Being able to keep using /pub/scm as the whitelist is a desirable property. So this version of enter_repo() reports what it used to chdir() and validate, but does not use getcwd() to canonicalize the directory name. When it sees a user relative path ~user/path, it internally resolves it to try chdir() there, but it still reports ~user/path (possibly after appending .git if allowed to do so, in which case it would report ~user/path.git). What this means is that if a whitelist wants to allow a user relative path, it needs to say "~" (for all users) or list user home directories like "~alice" "~bob". And no, you cannot say /home if the advertised way to access user home directories are ~alice,~bob, etc. The whole point of this is to avoid unnecessary aliasing issues. Anyway, because of this, daemon needs to do a bit more work to guard itself. Namely, it needs to make sure that the accessor does not try to exploit its leading path match rule by inserting /../ in the middle or hanging /.. at the end. I resurrected the belts and suspender paranoia code HPA did for this purpose. This check cannot be done in the enter_repo() unconditionally, because there are valid callers of enter_repo() that want to honor /../; authorized users coming over ssh to run send-pack and fetch-pack should be allowed to do so. Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 17:45:57 +08:00
struct passwd *pw;
char *slash;
int len, baselen;
[PATCH] daemon.c and path.enter_repo(): revamp path validation. The whitelist of git-daemon is checked against return value from enter_repo(), and enter_repo() used to return the value obtained from getcwd() to avoid directory aliasing issues as discussed earier (mid October 2005). Unfortunately, it did not go well as we hoped. For example, /pub on a kernel.org public machine is a symlink to its real mountpoint, and it is understandable that the administrator does not want to adjust the whitelist every time /pub needs to point at a different partition for storage allcation or whatever reasons. Being able to keep using /pub/scm as the whitelist is a desirable property. So this version of enter_repo() reports what it used to chdir() and validate, but does not use getcwd() to canonicalize the directory name. When it sees a user relative path ~user/path, it internally resolves it to try chdir() there, but it still reports ~user/path (possibly after appending .git if allowed to do so, in which case it would report ~user/path.git). What this means is that if a whitelist wants to allow a user relative path, it needs to say "~" (for all users) or list user home directories like "~alice" "~bob". And no, you cannot say /home if the advertised way to access user home directories are ~alice,~bob, etc. The whole point of this is to avoid unnecessary aliasing issues. Anyway, because of this, daemon needs to do a bit more work to guard itself. Namely, it needs to make sure that the accessor does not try to exploit its leading path match rule by inserting /../ in the middle or hanging /.. at the end. I resurrected the belts and suspender paranoia code HPA did for this purpose. This check cannot be done in the enter_repo() unconditionally, because there are valid callers of enter_repo() that want to honor /../; authorized users coming over ssh to run send-pack and fetch-pack should be allowed to do so. Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 17:45:57 +08:00
if (!path || path[0] != '~')
return NULL;
path++;
slash = strchr(path, '/');
if (path[0] == '/' || !path[0]) {
pw = getpwuid(getuid());
}
else {
if (slash) {
*slash = 0;
pw = getpwnam(path);
*slash = '/';
}
[PATCH] daemon.c and path.enter_repo(): revamp path validation. The whitelist of git-daemon is checked against return value from enter_repo(), and enter_repo() used to return the value obtained from getcwd() to avoid directory aliasing issues as discussed earier (mid October 2005). Unfortunately, it did not go well as we hoped. For example, /pub on a kernel.org public machine is a symlink to its real mountpoint, and it is understandable that the administrator does not want to adjust the whitelist every time /pub needs to point at a different partition for storage allcation or whatever reasons. Being able to keep using /pub/scm as the whitelist is a desirable property. So this version of enter_repo() reports what it used to chdir() and validate, but does not use getcwd() to canonicalize the directory name. When it sees a user relative path ~user/path, it internally resolves it to try chdir() there, but it still reports ~user/path (possibly after appending .git if allowed to do so, in which case it would report ~user/path.git). What this means is that if a whitelist wants to allow a user relative path, it needs to say "~" (for all users) or list user home directories like "~alice" "~bob". And no, you cannot say /home if the advertised way to access user home directories are ~alice,~bob, etc. The whole point of this is to avoid unnecessary aliasing issues. Anyway, because of this, daemon needs to do a bit more work to guard itself. Namely, it needs to make sure that the accessor does not try to exploit its leading path match rule by inserting /../ in the middle or hanging /.. at the end. I resurrected the belts and suspender paranoia code HPA did for this purpose. This check cannot be done in the enter_repo() unconditionally, because there are valid callers of enter_repo() that want to honor /../; authorized users coming over ssh to run send-pack and fetch-pack should be allowed to do so. Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 17:45:57 +08:00
else
pw = getpwnam(path);
}
[PATCH] daemon.c and path.enter_repo(): revamp path validation. The whitelist of git-daemon is checked against return value from enter_repo(), and enter_repo() used to return the value obtained from getcwd() to avoid directory aliasing issues as discussed earier (mid October 2005). Unfortunately, it did not go well as we hoped. For example, /pub on a kernel.org public machine is a symlink to its real mountpoint, and it is understandable that the administrator does not want to adjust the whitelist every time /pub needs to point at a different partition for storage allcation or whatever reasons. Being able to keep using /pub/scm as the whitelist is a desirable property. So this version of enter_repo() reports what it used to chdir() and validate, but does not use getcwd() to canonicalize the directory name. When it sees a user relative path ~user/path, it internally resolves it to try chdir() there, but it still reports ~user/path (possibly after appending .git if allowed to do so, in which case it would report ~user/path.git). What this means is that if a whitelist wants to allow a user relative path, it needs to say "~" (for all users) or list user home directories like "~alice" "~bob". And no, you cannot say /home if the advertised way to access user home directories are ~alice,~bob, etc. The whole point of this is to avoid unnecessary aliasing issues. Anyway, because of this, daemon needs to do a bit more work to guard itself. Namely, it needs to make sure that the accessor does not try to exploit its leading path match rule by inserting /../ in the middle or hanging /.. at the end. I resurrected the belts and suspender paranoia code HPA did for this purpose. This check cannot be done in the enter_repo() unconditionally, because there are valid callers of enter_repo() that want to honor /../; authorized users coming over ssh to run send-pack and fetch-pack should be allowed to do so. Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 17:45:57 +08:00
if (!pw || !pw->pw_dir || sz <= strlen(pw->pw_dir))
return NULL;
baselen = strlen(pw->pw_dir);
memcpy(buf, pw->pw_dir, baselen);
while ((1 < baselen) && (buf[baselen-1] == '/')) {
buf[baselen-1] = 0;
baselen--;
}
if (slash && slash[1]) {
len = strlen(slash);
if (sz <= baselen + len)
return NULL;
memcpy(buf + baselen, slash, len + 1);
}
return buf;
}
[PATCH] daemon.c and path.enter_repo(): revamp path validation. The whitelist of git-daemon is checked against return value from enter_repo(), and enter_repo() used to return the value obtained from getcwd() to avoid directory aliasing issues as discussed earier (mid October 2005). Unfortunately, it did not go well as we hoped. For example, /pub on a kernel.org public machine is a symlink to its real mountpoint, and it is understandable that the administrator does not want to adjust the whitelist every time /pub needs to point at a different partition for storage allcation or whatever reasons. Being able to keep using /pub/scm as the whitelist is a desirable property. So this version of enter_repo() reports what it used to chdir() and validate, but does not use getcwd() to canonicalize the directory name. When it sees a user relative path ~user/path, it internally resolves it to try chdir() there, but it still reports ~user/path (possibly after appending .git if allowed to do so, in which case it would report ~user/path.git). What this means is that if a whitelist wants to allow a user relative path, it needs to say "~" (for all users) or list user home directories like "~alice" "~bob". And no, you cannot say /home if the advertised way to access user home directories are ~alice,~bob, etc. The whole point of this is to avoid unnecessary aliasing issues. Anyway, because of this, daemon needs to do a bit more work to guard itself. Namely, it needs to make sure that the accessor does not try to exploit its leading path match rule by inserting /../ in the middle or hanging /.. at the end. I resurrected the belts and suspender paranoia code HPA did for this purpose. This check cannot be done in the enter_repo() unconditionally, because there are valid callers of enter_repo() that want to honor /../; authorized users coming over ssh to run send-pack and fetch-pack should be allowed to do so. Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 17:45:57 +08:00
/*
* First, one directory to try is determined by the following algorithm.
*
* (0) If "strict" is given, the path is used as given and no DWIM is
* done. Otherwise:
* (1) "~/path" to mean path under the running user's home directory;
* (2) "~user/path" to mean path under named user's home directory;
* (3) "relative/path" to mean cwd relative directory; or
* (4) "/absolute/path" to mean absolute directory.
*
* Unless "strict" is given, we try access() for existence of "%s.git/.git",
* "%s/.git", "%s.git", "%s" in this order. The first one that exists is
* what we try.
*
* Second, we try chdir() to that. Upon failure, we return NULL.
*
* Then, we try if the current directory is a valid git repository.
* Upon failure, we return NULL.
*
* If all goes well, we return the directory we used to chdir() (but
* before ~user is expanded), avoiding getcwd() resolving symbolic
* links. User relative paths are also returned as they are given,
* except DWIM suffixing.
*/
char *enter_repo(char *path, int strict)
{
[PATCH] daemon.c and path.enter_repo(): revamp path validation. The whitelist of git-daemon is checked against return value from enter_repo(), and enter_repo() used to return the value obtained from getcwd() to avoid directory aliasing issues as discussed earier (mid October 2005). Unfortunately, it did not go well as we hoped. For example, /pub on a kernel.org public machine is a symlink to its real mountpoint, and it is understandable that the administrator does not want to adjust the whitelist every time /pub needs to point at a different partition for storage allcation or whatever reasons. Being able to keep using /pub/scm as the whitelist is a desirable property. So this version of enter_repo() reports what it used to chdir() and validate, but does not use getcwd() to canonicalize the directory name. When it sees a user relative path ~user/path, it internally resolves it to try chdir() there, but it still reports ~user/path (possibly after appending .git if allowed to do so, in which case it would report ~user/path.git). What this means is that if a whitelist wants to allow a user relative path, it needs to say "~" (for all users) or list user home directories like "~alice" "~bob". And no, you cannot say /home if the advertised way to access user home directories are ~alice,~bob, etc. The whole point of this is to avoid unnecessary aliasing issues. Anyway, because of this, daemon needs to do a bit more work to guard itself. Namely, it needs to make sure that the accessor does not try to exploit its leading path match rule by inserting /../ in the middle or hanging /.. at the end. I resurrected the belts and suspender paranoia code HPA did for this purpose. This check cannot be done in the enter_repo() unconditionally, because there are valid callers of enter_repo() that want to honor /../; authorized users coming over ssh to run send-pack and fetch-pack should be allowed to do so. Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 17:45:57 +08:00
static char used_path[PATH_MAX];
static char validated_path[PATH_MAX];
if (!path)
return NULL;
[PATCH] daemon.c and path.enter_repo(): revamp path validation. The whitelist of git-daemon is checked against return value from enter_repo(), and enter_repo() used to return the value obtained from getcwd() to avoid directory aliasing issues as discussed earier (mid October 2005). Unfortunately, it did not go well as we hoped. For example, /pub on a kernel.org public machine is a symlink to its real mountpoint, and it is understandable that the administrator does not want to adjust the whitelist every time /pub needs to point at a different partition for storage allcation or whatever reasons. Being able to keep using /pub/scm as the whitelist is a desirable property. So this version of enter_repo() reports what it used to chdir() and validate, but does not use getcwd() to canonicalize the directory name. When it sees a user relative path ~user/path, it internally resolves it to try chdir() there, but it still reports ~user/path (possibly after appending .git if allowed to do so, in which case it would report ~user/path.git). What this means is that if a whitelist wants to allow a user relative path, it needs to say "~" (for all users) or list user home directories like "~alice" "~bob". And no, you cannot say /home if the advertised way to access user home directories are ~alice,~bob, etc. The whole point of this is to avoid unnecessary aliasing issues. Anyway, because of this, daemon needs to do a bit more work to guard itself. Namely, it needs to make sure that the accessor does not try to exploit its leading path match rule by inserting /../ in the middle or hanging /.. at the end. I resurrected the belts and suspender paranoia code HPA did for this purpose. This check cannot be done in the enter_repo() unconditionally, because there are valid callers of enter_repo() that want to honor /../; authorized users coming over ssh to run send-pack and fetch-pack should be allowed to do so. Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 17:45:57 +08:00
if (!strict) {
static const char *suffix[] = {
".git/.git", "/.git", ".git", "", NULL,
};
int len = strlen(path);
int i;
while ((1 < len) && (path[len-1] == '/')) {
path[len-1] = 0;
len--;
}
if (PATH_MAX <= len)
return NULL;
[PATCH] daemon.c and path.enter_repo(): revamp path validation. The whitelist of git-daemon is checked against return value from enter_repo(), and enter_repo() used to return the value obtained from getcwd() to avoid directory aliasing issues as discussed earier (mid October 2005). Unfortunately, it did not go well as we hoped. For example, /pub on a kernel.org public machine is a symlink to its real mountpoint, and it is understandable that the administrator does not want to adjust the whitelist every time /pub needs to point at a different partition for storage allcation or whatever reasons. Being able to keep using /pub/scm as the whitelist is a desirable property. So this version of enter_repo() reports what it used to chdir() and validate, but does not use getcwd() to canonicalize the directory name. When it sees a user relative path ~user/path, it internally resolves it to try chdir() there, but it still reports ~user/path (possibly after appending .git if allowed to do so, in which case it would report ~user/path.git). What this means is that if a whitelist wants to allow a user relative path, it needs to say "~" (for all users) or list user home directories like "~alice" "~bob". And no, you cannot say /home if the advertised way to access user home directories are ~alice,~bob, etc. The whole point of this is to avoid unnecessary aliasing issues. Anyway, because of this, daemon needs to do a bit more work to guard itself. Namely, it needs to make sure that the accessor does not try to exploit its leading path match rule by inserting /../ in the middle or hanging /.. at the end. I resurrected the belts and suspender paranoia code HPA did for this purpose. This check cannot be done in the enter_repo() unconditionally, because there are valid callers of enter_repo() that want to honor /../; authorized users coming over ssh to run send-pack and fetch-pack should be allowed to do so. Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 17:45:57 +08:00
if (path[0] == '~') {
if (!user_path(used_path, path, PATH_MAX))
return NULL;
strcpy(validated_path, path);
path = used_path;
}
else if (PATH_MAX - 10 < len)
return NULL;
else {
path = strcpy(used_path, path);
strcpy(validated_path, path);
}
len = strlen(path);
for (i = 0; suffix[i]; i++) {
strcpy(path + len, suffix[i]);
if (!access(path, F_OK)) {
strcat(validated_path, suffix[i]);
break;
}
}
if (!suffix[i] || chdir(path))
return NULL;
[PATCH] daemon.c and path.enter_repo(): revamp path validation. The whitelist of git-daemon is checked against return value from enter_repo(), and enter_repo() used to return the value obtained from getcwd() to avoid directory aliasing issues as discussed earier (mid October 2005). Unfortunately, it did not go well as we hoped. For example, /pub on a kernel.org public machine is a symlink to its real mountpoint, and it is understandable that the administrator does not want to adjust the whitelist every time /pub needs to point at a different partition for storage allcation or whatever reasons. Being able to keep using /pub/scm as the whitelist is a desirable property. So this version of enter_repo() reports what it used to chdir() and validate, but does not use getcwd() to canonicalize the directory name. When it sees a user relative path ~user/path, it internally resolves it to try chdir() there, but it still reports ~user/path (possibly after appending .git if allowed to do so, in which case it would report ~user/path.git). What this means is that if a whitelist wants to allow a user relative path, it needs to say "~" (for all users) or list user home directories like "~alice" "~bob". And no, you cannot say /home if the advertised way to access user home directories are ~alice,~bob, etc. The whole point of this is to avoid unnecessary aliasing issues. Anyway, because of this, daemon needs to do a bit more work to guard itself. Namely, it needs to make sure that the accessor does not try to exploit its leading path match rule by inserting /../ in the middle or hanging /.. at the end. I resurrected the belts and suspender paranoia code HPA did for this purpose. This check cannot be done in the enter_repo() unconditionally, because there are valid callers of enter_repo() that want to honor /../; authorized users coming over ssh to run send-pack and fetch-pack should be allowed to do so. Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 17:45:57 +08:00
path = validated_path;
}
[PATCH] daemon.c and path.enter_repo(): revamp path validation. The whitelist of git-daemon is checked against return value from enter_repo(), and enter_repo() used to return the value obtained from getcwd() to avoid directory aliasing issues as discussed earier (mid October 2005). Unfortunately, it did not go well as we hoped. For example, /pub on a kernel.org public machine is a symlink to its real mountpoint, and it is understandable that the administrator does not want to adjust the whitelist every time /pub needs to point at a different partition for storage allcation or whatever reasons. Being able to keep using /pub/scm as the whitelist is a desirable property. So this version of enter_repo() reports what it used to chdir() and validate, but does not use getcwd() to canonicalize the directory name. When it sees a user relative path ~user/path, it internally resolves it to try chdir() there, but it still reports ~user/path (possibly after appending .git if allowed to do so, in which case it would report ~user/path.git). What this means is that if a whitelist wants to allow a user relative path, it needs to say "~" (for all users) or list user home directories like "~alice" "~bob". And no, you cannot say /home if the advertised way to access user home directories are ~alice,~bob, etc. The whole point of this is to avoid unnecessary aliasing issues. Anyway, because of this, daemon needs to do a bit more work to guard itself. Namely, it needs to make sure that the accessor does not try to exploit its leading path match rule by inserting /../ in the middle or hanging /.. at the end. I resurrected the belts and suspender paranoia code HPA did for this purpose. This check cannot be done in the enter_repo() unconditionally, because there are valid callers of enter_repo() that want to honor /../; authorized users coming over ssh to run send-pack and fetch-pack should be allowed to do so. Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 17:45:57 +08:00
else if (chdir(path))
return NULL;
[PATCH] daemon.c and path.enter_repo(): revamp path validation. The whitelist of git-daemon is checked against return value from enter_repo(), and enter_repo() used to return the value obtained from getcwd() to avoid directory aliasing issues as discussed earier (mid October 2005). Unfortunately, it did not go well as we hoped. For example, /pub on a kernel.org public machine is a symlink to its real mountpoint, and it is understandable that the administrator does not want to adjust the whitelist every time /pub needs to point at a different partition for storage allcation or whatever reasons. Being able to keep using /pub/scm as the whitelist is a desirable property. So this version of enter_repo() reports what it used to chdir() and validate, but does not use getcwd() to canonicalize the directory name. When it sees a user relative path ~user/path, it internally resolves it to try chdir() there, but it still reports ~user/path (possibly after appending .git if allowed to do so, in which case it would report ~user/path.git). What this means is that if a whitelist wants to allow a user relative path, it needs to say "~" (for all users) or list user home directories like "~alice" "~bob". And no, you cannot say /home if the advertised way to access user home directories are ~alice,~bob, etc. The whole point of this is to avoid unnecessary aliasing issues. Anyway, because of this, daemon needs to do a bit more work to guard itself. Namely, it needs to make sure that the accessor does not try to exploit its leading path match rule by inserting /../ in the middle or hanging /.. at the end. I resurrected the belts and suspender paranoia code HPA did for this purpose. This check cannot be done in the enter_repo() unconditionally, because there are valid callers of enter_repo() that want to honor /../; authorized users coming over ssh to run send-pack and fetch-pack should be allowed to do so. Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 17:45:57 +08:00
if (access("objects", X_OK) == 0 && access("refs", X_OK) == 0 &&
validate_headref("HEAD") == 0) {
setenv(GIT_DIR_ENVIRONMENT, ".", 1);
check_repository_format();
[PATCH] daemon.c and path.enter_repo(): revamp path validation. The whitelist of git-daemon is checked against return value from enter_repo(), and enter_repo() used to return the value obtained from getcwd() to avoid directory aliasing issues as discussed earier (mid October 2005). Unfortunately, it did not go well as we hoped. For example, /pub on a kernel.org public machine is a symlink to its real mountpoint, and it is understandable that the administrator does not want to adjust the whitelist every time /pub needs to point at a different partition for storage allcation or whatever reasons. Being able to keep using /pub/scm as the whitelist is a desirable property. So this version of enter_repo() reports what it used to chdir() and validate, but does not use getcwd() to canonicalize the directory name. When it sees a user relative path ~user/path, it internally resolves it to try chdir() there, but it still reports ~user/path (possibly after appending .git if allowed to do so, in which case it would report ~user/path.git). What this means is that if a whitelist wants to allow a user relative path, it needs to say "~" (for all users) or list user home directories like "~alice" "~bob". And no, you cannot say /home if the advertised way to access user home directories are ~alice,~bob, etc. The whole point of this is to avoid unnecessary aliasing issues. Anyway, because of this, daemon needs to do a bit more work to guard itself. Namely, it needs to make sure that the accessor does not try to exploit its leading path match rule by inserting /../ in the middle or hanging /.. at the end. I resurrected the belts and suspender paranoia code HPA did for this purpose. This check cannot be done in the enter_repo() unconditionally, because there are valid callers of enter_repo() that want to honor /../; authorized users coming over ssh to run send-pack and fetch-pack should be allowed to do so. Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-12-03 17:45:57 +08:00
return path;
}
return NULL;
}
int adjust_shared_perm(const char *path)
{
struct stat st;
int mode;
if (!shared_repository)
return 0;
if (lstat(path, &st) < 0)
return -1;
mode = st.st_mode;
if (shared_repository) {
int tweak = shared_repository;
if (!(mode & S_IWUSR))
tweak &= ~0222;
mode |= tweak;
} else {
/* Preserve old PERM_UMASK behaviour */
if (mode & S_IWUSR)
mode |= S_IWGRP;
}
if (S_ISDIR(mode)) {
mode |= FORCE_DIR_SET_GID;
/* Copy read bits to execute bits */
mode |= (shared_repository & 0444) >> 2;
}
if ((mode & st.st_mode) != mode && chmod(path, mode) < 0)
return -2;
return 0;
}
Make git_dir a path relative to work_tree in setup_work_tree() Once we find the absolute paths for git_dir and work_tree, we can make git_dir a relative path since we know pwd will be work_tree. This should save the kernel some time traversing the path to work_tree all the time if git_dir is inside work_tree. Daniel's patch didn't apply for me as-is, so I recreated it with some differences, and here are the numbers from ten runs each. There is some IO for me - probably due to more-or-less random flushing of the journal - so the variation is bigger than I'd like, but whatever: Before: real 0m8.135s real 0m7.933s real 0m8.080s real 0m7.954s real 0m7.949s real 0m8.112s real 0m7.934s real 0m8.059s real 0m7.979s real 0m8.038s After: real 0m7.685s real 0m7.968s real 0m7.703s real 0m7.850s real 0m7.995s real 0m7.817s real 0m7.963s real 0m7.955s real 0m7.848s real 0m7.969s Now, going by "best of ten" (on the assumption that the longer numbers are all due to IO), I'm saying a 7.933s -> 7.685s reduction, and it does seem to be outside of the noise (ie the "after" case never broke 8s, while the "before" case did so half the time). So looks like about 3% to me. Doing it for a slightly smaller test-case (just the "arch" subdirectory) gets more stable numbers probably due to not filling the journal with metadata updates, so we have: Before: real 0m1.633s real 0m1.633s real 0m1.633s real 0m1.632s real 0m1.632s real 0m1.630s real 0m1.634s real 0m1.631s real 0m1.632s real 0m1.632s After: real 0m1.610s real 0m1.609s real 0m1.610s real 0m1.608s real 0m1.607s real 0m1.610s real 0m1.609s real 0m1.611s real 0m1.608s real 0m1.611s where I'ld just take the averages and say 1.632 vs 1.610, which is just over 1% peformance improvement. So it's not in the noise, but it's not as big as I initially thought and measured. (That said, it obviously depends on how deep the working directory path is too, and whether it is behind NFS or something else that might need to cause more work to look up). Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-20 03:34:06 +08:00
const char *make_relative_path(const char *abs, const char *base)
{
static char buf[PATH_MAX + 1];
int baselen;
if (!base)
return abs;
baselen = strlen(base);
if (prefixcmp(abs, base))
return abs;
if (abs[baselen] == '/')
baselen++;
else if (base[baselen - 1] != '/')
return abs;
strcpy(buf, abs + baselen);
return buf;
}
/*
* path = absolute path
* buf = buffer of at least max(2, strlen(path)+1) bytes
* It is okay if buf == path, but they should not overlap otherwise.
*
* Performs the following normalizations on path, storing the result in buf:
* - Removes trailing slashes.
* - Removes empty components.
* - Removes "." components.
* - Removes ".." components, and the components the precede them.
* "" and paths that contain only slashes are normalized to "/".
* Returns the length of the output.
*
* Note that this function is purely textual. It does not follow symlinks,
* verify the existence of the path, or make any system calls.
*/
int normalize_absolute_path(char *buf, const char *path)
{
const char *comp_start = path, *comp_end = path;
char *dst = buf;
int comp_len;
assert(buf);
assert(path);
while (*comp_start) {
assert(*comp_start == '/');
while (*++comp_end && *comp_end != '/')
; /* nothing */
comp_len = comp_end - comp_start;
if (!strncmp("/", comp_start, comp_len) ||
!strncmp("/.", comp_start, comp_len))
goto next;
if (!strncmp("/..", comp_start, comp_len)) {
while (dst > buf && *--dst != '/')
; /* nothing */
goto next;
}
memmove(dst, comp_start, comp_len);
dst += comp_len;
next:
comp_start = comp_end;
}
if (dst == buf)
*dst++ = '/';
*dst = '\0';
return dst - buf;
}
int normalize_path_copy(char *dst, const char *src)
{
char *dst0;
if (has_dos_drive_prefix(src)) {
*dst++ = *src++;
*dst++ = *src++;
}
dst0 = dst;
if (is_dir_sep(*src)) {
*dst++ = '/';
while (is_dir_sep(*src))
src++;
}
for (;;) {
char c = *src;
/*
* A path component that begins with . could be
* special:
* (1) "." and ends -- ignore and terminate.
* (2) "./" -- ignore them, eat slash and continue.
* (3) ".." and ends -- strip one and terminate.
* (4) "../" -- strip one, eat slash and continue.
*/
if (c == '.') {
if (!src[1]) {
/* (1) */
src++;
} else if (is_dir_sep(src[1])) {
/* (2) */
src += 2;
while (is_dir_sep(*src))
src++;
continue;
} else if (src[1] == '.') {
if (!src[2]) {
/* (3) */
src += 2;
goto up_one;
} else if (is_dir_sep(src[2])) {
/* (4) */
src += 3;
while (is_dir_sep(*src))
src++;
goto up_one;
}
}
}
/* copy up to the next '/', and eat all '/' */
while ((c = *src++) != '\0' && !is_dir_sep(c))
*dst++ = c;
if (is_dir_sep(c)) {
*dst++ = '/';
while (is_dir_sep(c))
c = *src++;
src--;
} else if (!c)
break;
continue;
up_one:
/*
* dst0..dst is prefix portion, and dst[-1] is '/';
* go up one level.
*/
dst -= 2; /* go past trailing '/' if any */
if (dst < dst0)
return -1;
while (1) {
if (dst <= dst0)
break;
c = *dst--;
if (c == '/') { /* MinGW: cannot be '\\' anymore */
dst += 2;
break;
}
}
}
*dst = '\0';
return 0;
}
/*
* path = Canonical absolute path
* prefix_list = Colon-separated list of absolute paths
*
* Determines, for each path in prefix_list, whether the "prefix" really
* is an ancestor directory of path. Returns the length of the longest
* ancestor directory, excluding any trailing slashes, or -1 if no prefix
* is an ancestor. (Note that this means 0 is returned if prefix_list is
* "/".) "/foo" is not considered an ancestor of "/foobar". Directories
* are not considered to be their own ancestors. path must be in a
* canonical form: empty components, or "." or ".." components are not
* allowed. prefix_list may be null, which is like "".
*/
int longest_ancestor_length(const char *path, const char *prefix_list)
{
char buf[PATH_MAX+1];
const char *ceil, *colon;
int len, max_len = -1;
if (prefix_list == NULL || !strcmp(path, "/"))
return -1;
for (colon = ceil = prefix_list; *colon; ceil = colon+1) {
for (colon = ceil; *colon && *colon != PATH_SEP; colon++);
len = colon - ceil;
if (len == 0 || len > PATH_MAX || !is_absolute_path(ceil))
continue;
strlcpy(buf, ceil, len+1);
if (normalize_path_copy(buf, buf) < 0)
continue;
len = strlen(buf);
if (len > 0 && buf[len-1] == '/')
buf[--len] = '\0';
if (!strncmp(path, buf, len) &&
path[len] == '/' &&
len > max_len) {
max_len = len;
}
}
return max_len;
}