git/connect.c

829 lines
19 KiB
C
Raw Normal View History

#include "git-compat-util.h"
#include "cache.h"
#include "pkt-line.h"
#include "quote.h"
#include "refs.h"
#include "run-command.h"
#include "remote.h"
#include "connect.h"
#include "url.h"
#include "string-list.h"
#include "sha1-array.h"
transport: add a protocol-whitelist environment variable If we are cloning an untrusted remote repository into a sandbox, we may also want to fetch remote submodules in order to get the complete view as intended by the other side. However, that opens us up to attacks where a malicious user gets us to clone something they would not otherwise have access to (this is not necessarily a problem by itself, but we may then act on the cloned contents in a way that exposes them to the attacker). Ideally such a setup would sandbox git entirely away from high-value items, but this is not always practical or easy to set up (e.g., OS network controls may block multiple protocols, and we would want to enable some but not others). We can help this case by providing a way to restrict particular protocols. We use a whitelist in the environment. This is more annoying to set up than a blacklist, but defaults to safety if the set of protocols git supports grows). If no whitelist is specified, we continue to default to allowing all protocols (this is an "unsafe" default, but since the minority of users will want this sandboxing effect, it is the only sensible one). A note on the tests: ideally these would all be in a single test file, but the git-daemon and httpd test infrastructure is an all-or-nothing proposition rather than a test-by-test prerequisite. By putting them all together, we would be unable to test the file-local code on machines without apache. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-09-17 01:12:52 +08:00
#include "transport.h"
static char *server_capabilities;
static const char *parse_feature_value(const char *, const char *, int *);
static int check_ref(const char *name, unsigned int flags)
{
if (!flags)
return 1;
if (!skip_prefix(name, "refs/", &name))
return 0;
/* REF_NORMAL means that we don't want the magic fake tag refs */
if ((flags & REF_NORMAL) && check_refname_format(name, 0))
return 0;
/* REF_HEADS means that we want regular branch heads */
if ((flags & REF_HEADS) && starts_with(name, "heads/"))
return 1;
/* REF_TAGS means that we want tags */
if ((flags & REF_TAGS) && starts_with(name, "tags/"))
return 1;
/* All type bits clear means that we are ok with anything */
return !(flags & ~REF_NORMAL);
}
int check_ref_type(const struct ref *ref, int flags)
{
return check_ref(ref->name, flags);
}
static void die_initial_contact(int got_at_least_one_head)
{
if (got_at_least_one_head)
die("The remote end hung up upon initial contact");
else
die("Could not read from remote repository.\n\n"
"Please make sure you have the correct access rights\n"
"and the repository exists.");
}
static void parse_one_symref_info(struct string_list *symref, const char *val, int len)
{
char *sym, *target;
struct string_list_item *item;
if (!len)
return; /* just "symref" */
/* e.g. "symref=HEAD:refs/heads/master" */
sym = xmemdupz(val, len);
target = strchr(sym, ':');
if (!target)
/* just "symref=something" */
goto reject;
*(target++) = '\0';
if (check_refname_format(sym, REFNAME_ALLOW_ONELEVEL) ||
check_refname_format(target, REFNAME_ALLOW_ONELEVEL))
/* "symref=bogus:pair */
goto reject;
item = string_list_append(symref, sym);
item->util = target;
return;
reject:
free(sym);
return;
}
static void annotate_refs_with_symref_info(struct ref *ref)
{
struct string_list symref = STRING_LIST_INIT_DUP;
const char *feature_list = server_capabilities;
while (feature_list) {
int len;
const char *val;
val = parse_feature_value(feature_list, "symref", &len);
if (!val)
break;
parse_one_symref_info(&symref, val, len);
feature_list = val + 1;
}
string_list_sort(&symref);
for (; ref; ref = ref->next) {
struct string_list_item *item;
item = string_list_lookup(&symref, ref->name);
if (!item)
continue;
ref->symref = xstrdup((char *)item->util);
}
string_list_clear(&symref, 0);
}
/*
* Read all the refs from the other end
*/
struct ref **get_remote_heads(int in, char *src_buf, size_t src_len,
struct ref **list, unsigned int flags,
struct sha1_array *extra_have,
struct sha1_array *shallow_points)
{
struct ref **orig_list = list;
int got_at_least_one_head = 0;
*list = NULL;
for (;;) {
struct ref *ref;
struct object_id old_oid;
char *name;
int len, name_len;
pkt-line: provide a LARGE_PACKET_MAX static buffer Most of the callers of packet_read_line just read into a static 1000-byte buffer (callers which handle arbitrary binary data already use LARGE_PACKET_MAX). This works fine in practice, because: 1. The only variable-sized data in these lines is a ref name, and refs tend to be a lot shorter than 1000 characters. 2. When sending ref lines, git-core always limits itself to 1000 byte packets. However, the only limit given in the protocol specification in Documentation/technical/protocol-common.txt is LARGE_PACKET_MAX; the 1000 byte limit is mentioned only in pack-protocol.txt, and then only describing what we write, not as a specific limit for readers. This patch lets us bump the 1000-byte limit to LARGE_PACKET_MAX. Even though git-core will never write a packet where this makes a difference, there are two good reasons to do this: 1. Other git implementations may have followed protocol-common.txt and used a larger maximum size. We don't bump into it in practice because it would involve very long ref names. 2. We may want to increase the 1000-byte limit one day. Since packets are transferred before any capabilities, it's difficult to do this in a backwards-compatible way. But if we bump the size of buffer the readers can handle, eventually older versions of git will be obsolete enough that we can justify bumping the writers, as well. We don't have plans to do this anytime soon, but there is no reason not to start the clock ticking now. Just bumping all of the reading bufs to LARGE_PACKET_MAX would waste memory. Instead, since most readers just read into a temporary buffer anyway, let's provide a single static buffer that all callers can use. We can further wrap this detail away by having the packet_read_line wrapper just use the buffer transparently and return a pointer to the static storage. That covers most of the cases, and the remaining ones already read into their own LARGE_PACKET_MAX buffers. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-02-21 04:02:57 +08:00
char *buffer = packet_buffer;
const char *arg;
len = packet_read(in, &src_buf, &src_len,
pkt-line: share buffer/descriptor reading implementation The packet_read function reads from a descriptor. The packet_get_line function is similar, but reads from an in-memory buffer, and uses a completely separate implementation. This patch teaches the generic packet_read function to accept either source, and we can do away with packet_get_line's implementation. There are two other differences to account for between the old and new functions. The first is that we used to read into a strbuf, but now read into a fixed size buffer. The only two callers are fine with that, and in fact it simplifies their code, since they can use the same static-buffer interface as the rest of the packet_read_line callers (and we provide a similar convenience wrapper for reading from a buffer rather than a descriptor). This is technically an externally-visible behavior change in that we used to accept arbitrary sized packets up to 65532 bytes, and now cap out at LARGE_PACKET_MAX, 65520. In practice this doesn't matter, as we use it only for parsing smart-http headers (of which there is exactly one defined, and it is small and fixed-size). And any extension headers would be breaking the protocol to go over LARGE_PACKET_MAX anyway. The other difference is that packet_get_line would return on error rather than dying. However, both callers of packet_get_line are actually improved by dying. The first caller does its own error checking, but we can drop that; as a result, we'll actually get more specific reporting about protocol breakage when packet_read dies internally. The only downside is that packet_read will not print the smart-http URL that failed, but that's not a big deal; anybody not debugging can already see the remote's URL already, and anybody debugging would want to run with GIT_CURL_VERBOSE anyway to see way more information. The second caller, which is just trying to skip past any extra smart-http headers (of which there are none defined, but which we allow to keep room for future expansion), did not error check at all. As a result, it would treat an error just like a flush packet. The resulting mess would generally cause an error later in get_remote_heads, but now we get error reporting much closer to the source of the problem. Brown-paper-bag-fixes-by: Ramsay Jones <ramsay@ramsay1.demon.co.uk> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-02-24 06:31:34 +08:00
packet_buffer, sizeof(packet_buffer),
pkt-line: teach packet_read_line to chomp newlines The packets sent during ref negotiation are all terminated by newline; even though the code to chomp these newlines is short, we end up doing it in a lot of places. This patch teaches packet_read_line to auto-chomp the trailing newline; this lets us get rid of a lot of inline chomping code. As a result, some call-sites which are not reading line-oriented data (e.g., when reading chunks of packfiles alongside sideband) transition away from packet_read_line to the generic packet_read interface. This patch converts all of the existing callsites. Since the function signature of packet_read_line does not change (but its behavior does), there is a possibility of new callsites being introduced in later commits, silently introducing an incompatibility. However, since a later patch in this series will change the signature, such a commit would have to be merged directly into this commit, not to the tip of the series; we can therefore ignore the issue. This is an internal cleanup and should produce no change of behavior in the normal case. However, there is one corner case to note. Callers of packet_read_line have never been able to tell the difference between a flush packet ("0000") and an empty packet ("0004"), as both cause packet_read_line to return a length of 0. Readers treat them identically, even though Documentation/technical/protocol-common.txt says we must not; it also says that implementations should not send an empty pkt-line. By stripping out the newline before the result gets to the caller, we will now treat the newline-only packet ("0005\n") the same as an empty packet, which in turn gets treated like a flush packet. In practice this doesn't matter, as neither empty nor newline-only packets are part of git's protocols (at least not for the line-oriented bits, and readers who are not expecting line-oriented packets will be calling packet_read directly, anyway). But even if we do decide to care about the distinction later, it is orthogonal to this patch. The right place to tighten would be to stop treating empty packets as flush packets, and this change does not make doing so any harder. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-02-21 04:02:28 +08:00
PACKET_READ_GENTLE_ON_EOF |
PACKET_READ_CHOMP_NEWLINE);
if (len < 0)
die_initial_contact(got_at_least_one_head);
if (!len)
break;
if (len > 4 && skip_prefix(buffer, "ERR ", &arg))
die("remote error: %s", arg);
if (len == GIT_SHA1_HEXSZ + strlen("shallow ") &&
skip_prefix(buffer, "shallow ", &arg)) {
if (get_oid_hex(arg, &old_oid))
die("protocol error: expected shallow sha-1, got '%s'", arg);
if (!shallow_points)
die("repository on the other end cannot be shallow");
sha1_array_append(shallow_points, old_oid.hash);
continue;
}
if (len < GIT_SHA1_HEXSZ + 2 || get_oid_hex(buffer, &old_oid) ||
buffer[GIT_SHA1_HEXSZ] != ' ')
die("protocol error: expected sha/ref, got '%s'", buffer);
name = buffer + GIT_SHA1_HEXSZ + 1;
name_len = strlen(name);
if (len != name_len + GIT_SHA1_HEXSZ + 1) {
Avoid unnecessary "if-before-free" tests. This change removes all obvious useless if-before-free tests. E.g., it replaces code like this: if (some_expression) free (some_expression); with the now-equivalent: free (some_expression); It is equivalent not just because POSIX has required free(NULL) to work for a long time, but simply because it has worked for so long that no reasonable porting target fails the test. Here's some evidence from nearly 1.5 years ago: http://www.winehq.org/pipermail/wine-patches/2006-October/031544.html FYI, the change below was prepared by running the following: git ls-files -z | xargs -0 \ perl -0x3b -pi -e \ 's/\bif\s*\(\s*(\S+?)(?:\s*!=\s*NULL)?\s*\)\s+(free\s*\(\s*\1\s*\))/$2/s' Note however, that it doesn't handle brace-enclosed blocks like "if (x) { free (x); }". But that's ok, since there were none like that in git sources. Beware: if you do use the above snippet, note that it can produce syntactically invalid C code. That happens when the affected "if"-statement has a matching "else". E.g., it would transform this if (x) free (x); else foo (); into this: free (x); else foo (); There were none of those here, either. If you're interested in automating detection of the useless tests, you might like the useless-if-before-free script in gnulib: [it *does* detect brace-enclosed free statements, and has a --name=S option to make it detect free-like functions with different names] http://git.sv.gnu.org/gitweb/?p=gnulib.git;a=blob;f=build-aux/useless-if-before-free Addendum: Remove one more (in imap-send.c), spotted by Jean-Luc Herren <jlh@gmx.ch>. Signed-off-by: Jim Meyering <meyering@redhat.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-02-01 01:26:32 +08:00
free(server_capabilities);
server_capabilities = xstrdup(name + name_len + 1);
}
if (extra_have && !strcmp(name, ".have")) {
sha1_array_append(extra_have, old_oid.hash);
2008-09-09 16:27:09 +08:00
continue;
}
if (!check_ref(name, flags))
continue;
ref = alloc_ref(buffer + GIT_SHA1_HEXSZ + 1);
oidcpy(&ref->old_oid, &old_oid);
*list = ref;
list = &ref->next;
got_at_least_one_head = 1;
}
annotate_refs_with_symref_info(*orig_list);
return list;
}
static const char *parse_feature_value(const char *feature_list, const char *feature, int *lenp)
{
int len;
if (!feature_list)
return NULL;
len = strlen(feature);
while (*feature_list) {
const char *found = strstr(feature_list, feature);
if (!found)
return NULL;
2012-08-14 09:59:27 +08:00
if (feature_list == found || isspace(found[-1])) {
const char *value = found + len;
/* feature with no value (e.g., "thin-pack") */
if (!*value || isspace(*value)) {
if (lenp)
*lenp = 0;
return value;
}
/* feature with a value (e.g., "agent=git/1.2.3") */
else if (*value == '=') {
value++;
if (lenp)
*lenp = strcspn(value, " \t\n");
return value;
}
/*
* otherwise we matched a substring of another feature;
* keep looking
*/
}
feature_list = found + 1;
}
return NULL;
}
2012-08-14 09:59:27 +08:00
int parse_feature_request(const char *feature_list, const char *feature)
{
return !!parse_feature_value(feature_list, feature, NULL);
}
const char *server_feature_value(const char *feature, int *len)
{
return parse_feature_value(server_capabilities, feature, len);
}
int server_supports(const char *feature)
{
return !!server_feature_value(feature, NULL);
}
enum protocol {
PROTO_LOCAL = 1,
PROTO_FILE,
PROTO_SSH,
PROTO_GIT
};
int url_is_local_not_ssh(const char *url)
{
const char *colon = strchr(url, ':');
const char *slash = strchr(url, '/');
return !colon || (slash && slash < colon) ||
has_dos_drive_prefix(url);
}
static const char *prot_name(enum protocol protocol)
{
switch (protocol) {
case PROTO_LOCAL:
case PROTO_FILE:
return "file";
case PROTO_SSH:
return "ssh";
case PROTO_GIT:
return "git";
default:
return "unknown protocol";
}
}
static enum protocol get_protocol(const char *name)
{
if (!strcmp(name, "ssh"))
return PROTO_SSH;
if (!strcmp(name, "git"))
return PROTO_GIT;
if (!strcmp(name, "git+ssh"))
return PROTO_SSH;
if (!strcmp(name, "ssh+git"))
return PROTO_SSH;
if (!strcmp(name, "file"))
return PROTO_FILE;
die("I don't handle protocol '%s'", name);
}
static char *host_end(char **hoststart, int removebrackets)
{
char *host = *hoststart;
char *end;
char *start = strstr(host, "@[");
if (start)
start++; /* Jump over '@' */
else
start = host;
if (start[0] == '[') {
end = strchr(start + 1, ']');
if (end) {
if (removebrackets) {
*end = 0;
memmove(start, start + 1, end - start);
end++;
}
} else
end = host;
} else
end = host;
return end;
}
#define STR_(s) # s
#define STR(s) STR_(s)
static void get_host_and_port(char **host, const char **port)
{
char *colon, *end;
end = host_end(host, 1);
colon = strchr(end, ':');
if (colon) {
long portnr = strtol(colon + 1, &end, 10);
if (end != colon + 1 && *end == '\0' && 0 <= portnr && portnr < 65536) {
*colon = 0;
*port = colon + 1;
} else if (!colon[1]) {
*colon = 0;
}
}
}
static void enable_keepalive(int sockfd)
{
int ka = 1;
if (setsockopt(sockfd, SOL_SOCKET, SO_KEEPALIVE, &ka, sizeof(ka)) < 0)
fprintf(stderr, "unable to set SO_KEEPALIVE on socket: %s\n",
strerror(errno));
}
#ifndef NO_IPV6
static const char *ai_name(const struct addrinfo *ai)
{
static char addr[NI_MAXHOST];
if (getnameinfo(ai->ai_addr, ai->ai_addrlen, addr, sizeof(addr), NULL, 0,
NI_NUMERICHOST) != 0)
xsnprintf(addr, sizeof(addr), "(unknown)");
return addr;
}
/*
* Returns a connected socket() fd, or else die()s.
*/
static int git_tcp_connect_sock(char *host, int flags)
{
struct strbuf error_message = STRBUF_INIT;
int sockfd = -1;
const char *port = STR(DEFAULT_GIT_PORT);
struct addrinfo hints, *ai0, *ai;
int gai;
int cnt = 0;
get_host_and_port(&host, &port);
if (!*port)
port = "<none>";
memset(&hints, 0, sizeof(hints));
hints.ai_socktype = SOCK_STREAM;
hints.ai_protocol = IPPROTO_TCP;
if (flags & CONNECT_VERBOSE)
fprintf(stderr, "Looking up %s ... ", host);
gai = getaddrinfo(host, port, &hints, &ai);
if (gai)
die("Unable to look up %s (port %s) (%s)", host, port, gai_strerror(gai));
if (flags & CONNECT_VERBOSE)
fprintf(stderr, "done.\nConnecting to %s (port %s) ... ", host, port);
for (ai0 = ai; ai; ai = ai->ai_next, cnt++) {
sockfd = socket(ai->ai_family,
ai->ai_socktype, ai->ai_protocol);
if ((sockfd < 0) ||
(connect(sockfd, ai->ai_addr, ai->ai_addrlen) < 0)) {
strbuf_addf(&error_message, "%s[%d: %s]: errno=%s\n",
host, cnt, ai_name(ai), strerror(errno));
if (0 <= sockfd)
close(sockfd);
sockfd = -1;
continue;
}
if (flags & CONNECT_VERBOSE)
fprintf(stderr, "%s ", ai_name(ai));
break;
}
freeaddrinfo(ai0);
if (sockfd < 0)
die("unable to connect to %s:\n%s", host, error_message.buf);
enable_keepalive(sockfd);
if (flags & CONNECT_VERBOSE)
fprintf(stderr, "done.\n");
strbuf_release(&error_message);
return sockfd;
}
#else /* NO_IPV6 */
/*
* Returns a connected socket() fd, or else die()s.
*/
static int git_tcp_connect_sock(char *host, int flags)
{
struct strbuf error_message = STRBUF_INIT;
int sockfd = -1;
const char *port = STR(DEFAULT_GIT_PORT);
char *ep;
struct hostent *he;
struct sockaddr_in sa;
char **ap;
unsigned int nport;
int cnt;
get_host_and_port(&host, &port);
if (flags & CONNECT_VERBOSE)
fprintf(stderr, "Looking up %s ... ", host);
he = gethostbyname(host);
if (!he)
die("Unable to look up %s (%s)", host, hstrerror(h_errno));
nport = strtoul(port, &ep, 10);
if ( ep == port || *ep ) {
/* Not numeric */
struct servent *se = getservbyname(port,"tcp");
if ( !se )
die("Unknown port %s", port);
nport = se->s_port;
}
if (flags & CONNECT_VERBOSE)
fprintf(stderr, "done.\nConnecting to %s (port %s) ... ", host, port);
for (cnt = 0, ap = he->h_addr_list; *ap; ap++, cnt++) {
memset(&sa, 0, sizeof sa);
sa.sin_family = he->h_addrtype;
2005-09-29 08:26:44 +08:00
sa.sin_port = htons(nport);
memcpy(&sa.sin_addr, *ap, he->h_length);
sockfd = socket(he->h_addrtype, SOCK_STREAM, 0);
if ((sockfd < 0) ||
connect(sockfd, (struct sockaddr *)&sa, sizeof sa) < 0) {
strbuf_addf(&error_message, "%s[%d: %s]: errno=%s\n",
host,
cnt,
inet_ntoa(*(struct in_addr *)&sa.sin_addr),
strerror(errno));
if (0 <= sockfd)
close(sockfd);
sockfd = -1;
continue;
}
if (flags & CONNECT_VERBOSE)
fprintf(stderr, "%s ",
inet_ntoa(*(struct in_addr *)&sa.sin_addr));
break;
}
if (sockfd < 0)
die("unable to connect to %s:\n%s", host, error_message.buf);
enable_keepalive(sockfd);
if (flags & CONNECT_VERBOSE)
fprintf(stderr, "done.\n");
return sockfd;
}
#endif /* NO_IPV6 */
static void git_tcp_connect(int fd[2], char *host, int flags)
{
int sockfd = git_tcp_connect_sock(host, flags);
fd[0] = sockfd;
fd[1] = dup(sockfd);
}
static char *git_proxy_command;
static int git_proxy_command_options(const char *var, const char *value,
void *cb)
{
if (!strcmp(var, "core.gitproxy")) {
const char *for_pos;
int matchlen = -1;
int hostlen;
const char *rhost_name = cb;
int rhost_len = strlen(rhost_name);
if (git_proxy_command)
return 0;
if (!value)
return config_error_nonbool(var);
/* [core]
* ;# matches www.kernel.org as well
* gitproxy = netcatter-1 for kernel.org
* gitproxy = netcatter-2 for sample.xz
* gitproxy = netcatter-default
*/
for_pos = strstr(value, " for ");
if (!for_pos)
/* matches everybody */
matchlen = strlen(value);
else {
hostlen = strlen(for_pos + 5);
if (rhost_len < hostlen)
matchlen = -1;
else if (!strncmp(for_pos + 5,
rhost_name + rhost_len - hostlen,
hostlen) &&
((rhost_len == hostlen) ||
rhost_name[rhost_len - hostlen -1] == '.'))
matchlen = for_pos - value;
else
matchlen = -1;
}
if (0 <= matchlen) {
/* core.gitproxy = none for kernel.org */
if (matchlen == 4 &&
!memcmp(value, "none", 4))
matchlen = 0;
git_proxy_command = xmemdupz(value, matchlen);
}
return 0;
}
return git_default_config(var, value, cb);
}
static int git_use_proxy(const char *host)
{
git_proxy_command = getenv("GIT_PROXY_COMMAND");
git_config(git_proxy_command_options, (void*)host);
return (git_proxy_command && *git_proxy_command);
}
connect: treat generic proxy processes like ssh processes The git_connect function returns two ends of a pipe for talking with a remote, plus a struct child_process representing the other end of the pipe. If we have a direct socket connection, then this points to a special "no_fork" child process. The code path for doing git-over-pipes or git-over-ssh sets up this child process to point to the child git command or the ssh process. When we call finish_connect eventually, we check wait() on the command and report its return value. The code path for git://, on the other hand, always sets it to no_fork. In the case of a direct TCP connection, this makes sense; we have no child process. But in the case of a proxy command (configured by core.gitproxy), we do have a child process, but we throw away its pid, and therefore ignore its return code. Instead, let's keep that information in the proxy case, and respect its return code, which can help catch some errors (though depending on your proxy command, it will be errors reported by the proxy command itself, and not propagated from git commands. Still, it is probably better to propagate such errors than to ignore them). It also means that the child_process field can reliably be used to determine whether the returned descriptors are actually a full-duplex socket, which means we should be using shutdown() instead of a simple close. Signed-off-by: Jeff King <peff@peff.net> Helped-by: Johannes Sixt <j6t@kdbg.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-05-16 14:46:07 +08:00
static struct child_process *git_proxy_connect(int fd[2], char *host)
{
const char *port = STR(DEFAULT_GIT_PORT);
connect: treat generic proxy processes like ssh processes The git_connect function returns two ends of a pipe for talking with a remote, plus a struct child_process representing the other end of the pipe. If we have a direct socket connection, then this points to a special "no_fork" child process. The code path for doing git-over-pipes or git-over-ssh sets up this child process to point to the child git command or the ssh process. When we call finish_connect eventually, we check wait() on the command and report its return value. The code path for git://, on the other hand, always sets it to no_fork. In the case of a direct TCP connection, this makes sense; we have no child process. But in the case of a proxy command (configured by core.gitproxy), we do have a child process, but we throw away its pid, and therefore ignore its return code. Instead, let's keep that information in the proxy case, and respect its return code, which can help catch some errors (though depending on your proxy command, it will be errors reported by the proxy command itself, and not propagated from git commands. Still, it is probably better to propagate such errors than to ignore them). It also means that the child_process field can reliably be used to determine whether the returned descriptors are actually a full-duplex socket, which means we should be using shutdown() instead of a simple close. Signed-off-by: Jeff King <peff@peff.net> Helped-by: Johannes Sixt <j6t@kdbg.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-05-16 14:46:07 +08:00
struct child_process *proxy;
get_host_and_port(&host, &port);
proxy = xmalloc(sizeof(*proxy));
child_process_init(proxy);
argv_array_push(&proxy->args, git_proxy_command);
argv_array_push(&proxy->args, host);
argv_array_push(&proxy->args, port);
connect: treat generic proxy processes like ssh processes The git_connect function returns two ends of a pipe for talking with a remote, plus a struct child_process representing the other end of the pipe. If we have a direct socket connection, then this points to a special "no_fork" child process. The code path for doing git-over-pipes or git-over-ssh sets up this child process to point to the child git command or the ssh process. When we call finish_connect eventually, we check wait() on the command and report its return value. The code path for git://, on the other hand, always sets it to no_fork. In the case of a direct TCP connection, this makes sense; we have no child process. But in the case of a proxy command (configured by core.gitproxy), we do have a child process, but we throw away its pid, and therefore ignore its return code. Instead, let's keep that information in the proxy case, and respect its return code, which can help catch some errors (though depending on your proxy command, it will be errors reported by the proxy command itself, and not propagated from git commands. Still, it is probably better to propagate such errors than to ignore them). It also means that the child_process field can reliably be used to determine whether the returned descriptors are actually a full-duplex socket, which means we should be using shutdown() instead of a simple close. Signed-off-by: Jeff King <peff@peff.net> Helped-by: Johannes Sixt <j6t@kdbg.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-05-16 14:46:07 +08:00
proxy->in = -1;
proxy->out = -1;
if (start_command(proxy))
die("cannot start proxy %s", git_proxy_command);
connect: treat generic proxy processes like ssh processes The git_connect function returns two ends of a pipe for talking with a remote, plus a struct child_process representing the other end of the pipe. If we have a direct socket connection, then this points to a special "no_fork" child process. The code path for doing git-over-pipes or git-over-ssh sets up this child process to point to the child git command or the ssh process. When we call finish_connect eventually, we check wait() on the command and report its return value. The code path for git://, on the other hand, always sets it to no_fork. In the case of a direct TCP connection, this makes sense; we have no child process. But in the case of a proxy command (configured by core.gitproxy), we do have a child process, but we throw away its pid, and therefore ignore its return code. Instead, let's keep that information in the proxy case, and respect its return code, which can help catch some errors (though depending on your proxy command, it will be errors reported by the proxy command itself, and not propagated from git commands. Still, it is probably better to propagate such errors than to ignore them). It also means that the child_process field can reliably be used to determine whether the returned descriptors are actually a full-duplex socket, which means we should be using shutdown() instead of a simple close. Signed-off-by: Jeff King <peff@peff.net> Helped-by: Johannes Sixt <j6t@kdbg.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-05-16 14:46:07 +08:00
fd[0] = proxy->out; /* read from proxy stdout */
fd[1] = proxy->in; /* write to proxy stdin */
return proxy;
}
static char *get_port(char *host)
{
char *end;
char *p = strchr(host, ':');
if (p) {
long port = strtol(p + 1, &end, 10);
if (end != p + 1 && *end == '\0' && 0 <= port && port < 65536) {
*p = '\0';
return p+1;
}
}
return NULL;
}
/*
* Extract protocol and relevant parts from the specified connection URL.
* The caller must free() the returned strings.
*/
static enum protocol parse_connect_url(const char *url_orig, char **ret_host,
char **ret_path)
{
char *url;
char *host, *path;
char *end;
int separator = '/';
enum protocol protocol = PROTO_LOCAL;
if (is_url(url_orig))
url = url_decode(url_orig);
else
url = xstrdup(url_orig);
host = strstr(url, "://");
if (host) {
*host = '\0';
protocol = get_protocol(url);
host += 3;
} else {
host = url;
if (!url_is_local_not_ssh(url)) {
protocol = PROTO_SSH;
separator = ':';
}
}
/*
* Don't do destructive transforms as protocol code does
* '[]' unwrapping in get_host_and_port()
*/
end = host_end(&host, 0);
if (protocol == PROTO_LOCAL)
path = end;
else if (protocol == PROTO_FILE && has_dos_drive_prefix(end))
path = end; /* "file://$(pwd)" may be "file://C:/projects/repo" */
else
path = strchr(end, separator);
if (!path || !*path)
die("No path specified. See 'man git-pull' for valid url syntax");
/*
* null-terminate hostname and point path to ~ for URL's like this:
* ssh://host.xz/~user/repo
*/
end = path; /* Need to \0 terminate host here */
if (separator == ':')
path++; /* path starts after ':' */
if (protocol == PROTO_GIT || protocol == PROTO_SSH) {
if (path[1] == '~')
path++;
}
path = xstrdup(path);
*end = '\0';
*ret_host = xstrdup(host);
*ret_path = path;
free(url);
return protocol;
}
static struct child_process no_fork = CHILD_PROCESS_INIT;
/*
* This returns a dummy child_process if the transport protocol does not
* need fork(2), or a struct child_process object if it does. Once done,
* finish the connection with finish_connect() with the value returned from
* this function (it is safe to call finish_connect() with NULL to support
* the former case).
*
* If it returns, the connect is successful; it just dies on errors (this
* will hopefully be changed in a libification effort, to return NULL when
* the connection failed).
*/
struct child_process *git_connect(int fd[2], const char *url,
const char *prog, int flags)
{
char *hostandport, *path;
struct child_process *conn = &no_fork;
enum protocol protocol;
struct strbuf cmd = STRBUF_INIT;
/* Without this we cannot rely on waitpid() to tell
* what happened to our children.
*/
signal(SIGCHLD, SIG_DFL);
protocol = parse_connect_url(url, &hostandport, &path);
if ((flags & CONNECT_DIAG_URL) && (protocol != PROTO_SSH)) {
printf("Diag: url=%s\n", url ? url : "NULL");
printf("Diag: protocol=%s\n", prot_name(protocol));
printf("Diag: hostandport=%s\n", hostandport ? hostandport : "NULL");
printf("Diag: path=%s\n", path ? path : "NULL");
conn = NULL;
} else if (protocol == PROTO_GIT) {
/*
* Set up virtual host information based on where we will
* connect, unless the user has overridden us in
* the environment.
*/
char *target_host = getenv("GIT_OVERRIDE_VIRTUAL_HOST");
if (target_host)
target_host = xstrdup(target_host);
else
target_host = xstrdup(hostandport);
transport: add a protocol-whitelist environment variable If we are cloning an untrusted remote repository into a sandbox, we may also want to fetch remote submodules in order to get the complete view as intended by the other side. However, that opens us up to attacks where a malicious user gets us to clone something they would not otherwise have access to (this is not necessarily a problem by itself, but we may then act on the cloned contents in a way that exposes them to the attacker). Ideally such a setup would sandbox git entirely away from high-value items, but this is not always practical or easy to set up (e.g., OS network controls may block multiple protocols, and we would want to enable some but not others). We can help this case by providing a way to restrict particular protocols. We use a whitelist in the environment. This is more annoying to set up than a blacklist, but defaults to safety if the set of protocols git supports grows). If no whitelist is specified, we continue to default to allowing all protocols (this is an "unsafe" default, but since the minority of users will want this sandboxing effect, it is the only sensible one). A note on the tests: ideally these would all be in a single test file, but the git-daemon and httpd test infrastructure is an all-or-nothing proposition rather than a test-by-test prerequisite. By putting them all together, we would be unable to test the file-local code on machines without apache. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-09-17 01:12:52 +08:00
transport_check_allowed("git");
/* These underlying connection commands die() if they
* cannot connect.
*/
if (git_use_proxy(hostandport))
conn = git_proxy_connect(fd, hostandport);
else
git_tcp_connect(fd, hostandport, flags);
/*
* Separate original protocol components prog and path
daemon: Strictly parse the "extra arg" part of the command Since 1.4.4.5 (49ba83fb67 "Add virtualization support to git-daemon") git daemon enters an infinite loop and never terminates if a client hides any extra arguments in the initial request line which is not exactly "\0host=blah\0". Since that change, a client must never insert additional extra arguments, or attempt to use any argument other than "host=", as any daemon will get stuck parsing the request line and will never complete the request. Since the client can't tell if the daemon is patched or not, it is not possible to know if additional extra args might actually be able to be safely requested. If we ever need to extend the git daemon protocol to support a new feature, we may have to do something like this to the exchange: # If both support git:// v2 # C: 000cgit://v2 S: 0010ok host user C: 0018host git.kernel.org C: 0027git-upload-pack /pub/linux-2.6.git S: ...git-upload-pack header... # If client supports git:// v2, server does not: # C: 000cgit://v2 S: <EOF> C: 003bgit-upload-pack /pub/linux-2.6.git\0host=git.kernel.org\0 S: ...git-upload-pack header... This requires the client to create two TCP connections to talk to an older git daemon, however all daemons since the introduction of daemon.c will safely reject the unknown "git://v2" command request, so the client can quite easily determine the server supports an older protocol. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-06-05 09:33:32 +08:00
* from extended host header with a NUL byte.
*
* Note: Do not add any other headers here! Doing so
* will cause older git-daemon servers to crash.
*/
packet_write(fd[1],
"%s %s%chost=%s%c",
prog, path, 0,
target_host, 0);
free(target_host);
} else {
conn = xmalloc(sizeof(*conn));
child_process_init(conn);
strbuf_addstr(&cmd, prog);
strbuf_addch(&cmd, ' ');
sq_quote_buf(&cmd, path);
/* remove repo-local variables from the environment */
conn->env = local_repo_env;
conn->use_shell = 1;
conn->in = conn->out = -1;
if (protocol == PROTO_SSH) {
const char *ssh;
int putty = 0, tortoiseplink = 0;
char *ssh_host = hostandport;
const char *port = NULL;
transport: add a protocol-whitelist environment variable If we are cloning an untrusted remote repository into a sandbox, we may also want to fetch remote submodules in order to get the complete view as intended by the other side. However, that opens us up to attacks where a malicious user gets us to clone something they would not otherwise have access to (this is not necessarily a problem by itself, but we may then act on the cloned contents in a way that exposes them to the attacker). Ideally such a setup would sandbox git entirely away from high-value items, but this is not always practical or easy to set up (e.g., OS network controls may block multiple protocols, and we would want to enable some but not others). We can help this case by providing a way to restrict particular protocols. We use a whitelist in the environment. This is more annoying to set up than a blacklist, but defaults to safety if the set of protocols git supports grows). If no whitelist is specified, we continue to default to allowing all protocols (this is an "unsafe" default, but since the minority of users will want this sandboxing effect, it is the only sensible one). A note on the tests: ideally these would all be in a single test file, but the git-daemon and httpd test infrastructure is an all-or-nothing proposition rather than a test-by-test prerequisite. By putting them all together, we would be unable to test the file-local code on machines without apache. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-09-17 01:12:52 +08:00
transport_check_allowed("ssh");
get_host_and_port(&ssh_host, &port);
if (!port)
port = get_port(ssh_host);
if (flags & CONNECT_DIAG_URL) {
printf("Diag: url=%s\n", url ? url : "NULL");
printf("Diag: protocol=%s\n", prot_name(protocol));
printf("Diag: userandhost=%s\n", ssh_host ? ssh_host : "NULL");
printf("Diag: port=%s\n", port ? port : "NONE");
printf("Diag: path=%s\n", path ? path : "NULL");
free(hostandport);
free(path);
free(conn);
return NULL;
}
if (looks_like_command_line_option(ssh_host))
die("strange hostname '%s' blocked", ssh_host);
ssh = getenv("GIT_SSH_COMMAND");
if (!ssh) {
const char *base;
char *ssh_dup;
/*
* GIT_SSH is the no-shell version of
* GIT_SSH_COMMAND (and must remain so for
* historical compatibility).
*/
conn->use_shell = 0;
ssh = getenv("GIT_SSH");
if (!ssh)
ssh = "ssh";
ssh_dup = xstrdup(ssh);
base = basename(ssh_dup);
tortoiseplink = !strcasecmp(base, "tortoiseplink") ||
!strcasecmp(base, "tortoiseplink.exe");
putty = tortoiseplink ||
!strcasecmp(base, "plink") ||
!strcasecmp(base, "plink.exe");
free(ssh_dup);
}
argv_array_push(&conn->args, ssh);
if (tortoiseplink)
argv_array_push(&conn->args, "-batch");
if (port) {
/* P is for PuTTY, p is for OpenSSH */
argv_array_push(&conn->args, putty ? "-P" : "-p");
argv_array_push(&conn->args, port);
}
argv_array_push(&conn->args, ssh_host);
} else {
transport: add a protocol-whitelist environment variable If we are cloning an untrusted remote repository into a sandbox, we may also want to fetch remote submodules in order to get the complete view as intended by the other side. However, that opens us up to attacks where a malicious user gets us to clone something they would not otherwise have access to (this is not necessarily a problem by itself, but we may then act on the cloned contents in a way that exposes them to the attacker). Ideally such a setup would sandbox git entirely away from high-value items, but this is not always practical or easy to set up (e.g., OS network controls may block multiple protocols, and we would want to enable some but not others). We can help this case by providing a way to restrict particular protocols. We use a whitelist in the environment. This is more annoying to set up than a blacklist, but defaults to safety if the set of protocols git supports grows). If no whitelist is specified, we continue to default to allowing all protocols (this is an "unsafe" default, but since the minority of users will want this sandboxing effect, it is the only sensible one). A note on the tests: ideally these would all be in a single test file, but the git-daemon and httpd test infrastructure is an all-or-nothing proposition rather than a test-by-test prerequisite. By putting them all together, we would be unable to test the file-local code on machines without apache. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-09-17 01:12:52 +08:00
transport_check_allowed("file");
}
argv_array_push(&conn->args, cmd.buf);
if (start_command(conn))
die("unable to fork");
fd[0] = conn->out; /* read from child's stdout */
fd[1] = conn->in; /* write to child's stdin */
strbuf_release(&cmd);
}
free(hostandport);
free(path);
return conn;
}
int git_connection_is_socket(struct child_process *conn)
{
return conn == &no_fork;
}
int finish_connect(struct child_process *conn)
{
int code;
if (!conn || git_connection_is_socket(conn))
return 0;
code = finish_command(conn);
free(conn);
return code;
}