From 7253a02348c1c44c84f8a13309d6d30b443bf23c Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 15 May 2015 02:29:27 -0400 Subject: [PATCH 1/3] http-backend: fix die recursion with custom handler When we die() in http-backend, we call a custom handler that writes an HTTP 500 response to stdout, then reports the error to stderr. Our routines for writing out the HTTP response may themselves die, leading to us entering die() again. When it was originally written, that was OK; our custom handler keeps a variable to notice this and does not recurse. However, since cd163d4 (usage.c: detect recursion in die routines and bail out immediately, 2012-11-14), the main die() implementation detects recursion before we even get to our custom handler, and bails without printing anything useful. We can handle this case by doing two things: 1. Installing a custom die_is_recursing handler that allows us to enter up to one level of recursion. Only the first call to our custom handler will try to write out the error response. So if we die again, that is OK. If we end up dying more than that, it is a sign that we are in an infinite recursion. 2. Reporting the error to stderr before trying to write out the HTTP response. In the current code, if we do die() trying to write out the response, we'll exit immediately from this second die(), and never get a chance to output the original error (which is almost certainly the more interesting one; the second die is just going to be along the lines of "I tried to write to stdout but it was closed"). Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- http-backend.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/http-backend.c b/http-backend.c index b6c0484fb2..3ad82a894d 100644 --- a/http-backend.c +++ b/http-backend.c @@ -500,21 +500,24 @@ static void service_rpc(char *service_name) strbuf_release(&buf); } +static int dead; static NORETURN void die_webcgi(const char *err, va_list params) { - static int dead; + if (dead <= 1) { + vreportf("fatal: ", err, params); - if (!dead) { - dead = 1; http_status(500, "Internal Server Error"); hdr_nocache(); end_headers(); - - vreportf("fatal: ", err, params); } exit(0); /* we successfully reported a failure ;-) */ } +static int die_webcgi_recursing(void) +{ + return dead++ > 1; +} + static char* getdir(void) { struct strbuf buf = STRBUF_INIT; @@ -569,6 +572,7 @@ int main(int argc, char **argv) git_extract_argv0_path(argv[0]); set_die_routine(die_webcgi); + set_die_is_recursing_routine(die_webcgi_recursing); if (!method) die("No REQUEST_METHOD from server"); From cc969c8dc1cfd1f60b1dab8ee3117172d9d1bb4f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 20 May 2015 03:36:43 -0400 Subject: [PATCH 2/3] t5551: factor out tag creation One of our tests in t5551 creates a large number of tags, and jumps through some hoops to do it efficiently. Let's factor that out into a function so we can make other similar tests. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- t/t5551-http-fetch-smart.sh | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/t/t5551-http-fetch-smart.sh b/t/t5551-http-fetch-smart.sh index 6cbc12d9a7..c34e930bb9 100755 --- a/t/t5551-http-fetch-smart.sh +++ b/t/t5551-http-fetch-smart.sh @@ -213,27 +213,35 @@ test_expect_success 'cookies stored in http.cookiefile when http.savecookies set test_cmp expect_cookies.txt cookies_tail.txt ' -test_expect_success EXPENSIVE 'create 50,000 tags in the repo' ' - ( - cd "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" && - for i in `test_seq 50000` +# create an arbitrary number of tags, numbered from tag-$1 to tag-$2 +create_tags () { + rm -f marks && + for i in $(test_seq "$1" "$2") do - echo "commit refs/heads/too-many-refs" - echo "mark :$i" - echo "committer git $i +0000" - echo "data 0" - echo "M 644 inline bla.txt" - echo "data 4" - echo "bla" + # don't use here-doc, because it requires a process + # per loop iteration + echo "commit refs/heads/too-many-refs-$1" && + echo "mark :$i" && + echo "committer git $i +0000" && + echo "data 0" && + echo "M 644 inline bla.txt" && + echo "data 4" && + echo "bla" && # make every commit dangling by always # rewinding the branch after each commit - echo "reset refs/heads/too-many-refs" - echo "from :1" + echo "reset refs/heads/too-many-refs-$1" && + echo "from :$1" done | git fast-import --export-marks=marks && # now assign tags to all the dangling commits we created above tag=$(perl -e "print \"bla\" x 30") && sed -e "s|^:\([^ ]*\) \(.*\)$|\2 refs/tags/$tag-\1|" >packed-refs +} + +test_expect_success 'create 50,000 tags in the repo' ' + ( + cd "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" && + create_tags 1 50000 ) ' From 6bc0cb5176a5e42ca4a74e3558e8f0790ed09bb1 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Wed, 20 May 2015 03:37:09 -0400 Subject: [PATCH 3/3] http-backend: spool ref negotiation requests to buffer When http-backend spawns "upload-pack" to do ref negotiation, it streams the http request body to upload-pack, who then streams the http response back to the client as it reads. In theory, git can go full-duplex; the client can consume our response while it is still sending the request. In practice, however, HTTP is a half-duplex protocol. Even if our client is ready to read and write simultaneously, we may have other HTTP infrastructure in the way, including the webserver that spawns our CGI, or any intermediate proxies. In at least one documented case[1], this leads to deadlock when trying a fetch over http. What happens is basically: 1. Apache proxies the request to the CGI, http-backend. 2. http-backend gzip-inflates the data and sends the result to upload-pack. 3. upload-pack acts on the data and generates output over the pipe back to Apache. Apache isn't reading because it's busy writing (step 1). This works fine most of the time, because the upload-pack output ends up in a system pipe buffer, and Apache reads it as soon as it finishes writing. But if both the request and the response exceed the system pipe buffer size, then we deadlock (Apache blocks writing to http-backend, http-backend blocks writing to upload-pack, and upload-pack blocks writing to Apache). We need to break the deadlock by spooling either the input or the output. In this case, it's ideal to spool the input, because Apache does not start reading either stdout _or_ stderr until we have consumed all of the input. So until we do so, we cannot even get an error message out to the client. The solution is fairly straight-forward: we read the request body into an in-memory buffer in http-backend, freeing up Apache, and then feed the data ourselves to upload-pack. But there are a few important things to note: 1. We limit the in-memory buffer to prevent an obvious denial-of-service attack. This is a new hard limit on requests, but it's unlikely to come into play. The default value is 10MB, which covers even the ridiculous 100,000-ref negotation in the included test (that actually caps out just over 5MB). But it's configurable on the off chance that you don't mind spending some extra memory to make even ridiculous requests work. 2. We must take care only to buffer when we have to. For pushes, the incoming packfile may be of arbitrary size, and we should connect the input directly to receive-pack. There's no deadlock problem here, though, because we do not produce any output until the whole packfile has been read. For upload-pack's initial ref advertisement, we similarly do not need to buffer. Even though we may generate a lot of output, there is no request body at all (i.e., it is a GET, not a POST). [1] http://article.gmane.org/gmane.comp.version-control.git/269020 Test-adapted-from: Dennis Kaarsemaker Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- Documentation/git-http-backend.txt | 9 +++ http-backend.c | 96 ++++++++++++++++++++++++++---- t/t5551-http-fetch-smart.sh | 11 ++++ 3 files changed, 105 insertions(+), 11 deletions(-) diff --git a/Documentation/git-http-backend.txt b/Documentation/git-http-backend.txt index d422ba4b59..8c6acbe440 100644 --- a/Documentation/git-http-backend.txt +++ b/Documentation/git-http-backend.txt @@ -255,6 +255,15 @@ The GIT_HTTP_EXPORT_ALL environmental variable may be passed to 'git-http-backend' to bypass the check for the "git-daemon-export-ok" file in each repository before allowing export of that repository. +The `GIT_HTTP_MAX_REQUEST_BUFFER` environment variable (or the +`http.maxRequestBuffer` config variable) may be set to change the +largest ref negotiation request that git will handle during a fetch; any +fetch requiring a larger buffer will not succeed. This value should not +normally need to be changed, but may be helpful if you are fetching from +a repository with an extremely large number of refs. The value can be +specified with a unit (e.g., `100M` for 100 megabytes). The default is +10 megabytes. + The backend process sets GIT_COMMITTER_NAME to '$REMOTE_USER' and GIT_COMMITTER_EMAIL to '$\{REMOTE_USER}@http.$\{REMOTE_ADDR\}', ensuring that any reflogs created by 'git-receive-pack' contain some diff --git a/http-backend.c b/http-backend.c index 3ad82a894d..6bf139b768 100644 --- a/http-backend.c +++ b/http-backend.c @@ -13,18 +13,20 @@ static const char content_type[] = "Content-Type"; static const char content_length[] = "Content-Length"; static const char last_modified[] = "Last-Modified"; static int getanyfile = 1; +static unsigned long max_request_buffer = 10 * 1024 * 1024; static struct string_list *query_params; struct rpc_service { const char *name; const char *config_name; + unsigned buffer_input : 1; signed enabled : 2; }; static struct rpc_service rpc_service[] = { - { "upload-pack", "uploadpack", 1 }, - { "receive-pack", "receivepack", -1 }, + { "upload-pack", "uploadpack", 1, 1 }, + { "receive-pack", "receivepack", 0, -1 }, }; static struct string_list *get_parameters(void) @@ -225,6 +227,7 @@ static void http_config(void) struct strbuf var = STRBUF_INIT; git_config_get_bool("http.getanyfile", &getanyfile); + git_config_get_ulong("http.maxrequestbuffer", &max_request_buffer); for (i = 0; i < ARRAY_SIZE(rpc_service); i++) { struct rpc_service *svc = &rpc_service[i]; @@ -266,9 +269,52 @@ static struct rpc_service *select_service(const char *name) return svc; } -static void inflate_request(const char *prog_name, int out) +/* + * This is basically strbuf_read(), except that if we + * hit max_request_buffer we die (we'd rather reject a + * maliciously large request than chew up infinite memory). + */ +static ssize_t read_request(int fd, unsigned char **out) +{ + size_t len = 0, alloc = 8192; + unsigned char *buf = xmalloc(alloc); + + if (max_request_buffer < alloc) + max_request_buffer = alloc; + + while (1) { + ssize_t cnt; + + cnt = read_in_full(fd, buf + len, alloc - len); + if (cnt < 0) { + free(buf); + return -1; + } + + /* partial read from read_in_full means we hit EOF */ + len += cnt; + if (len < alloc) { + *out = buf; + return len; + } + + /* otherwise, grow and try again (if we can) */ + if (alloc == max_request_buffer) + die("request was larger than our maximum size (%lu);" + " try setting GIT_HTTP_MAX_REQUEST_BUFFER", + max_request_buffer); + + alloc = alloc_nr(alloc); + if (alloc > max_request_buffer) + alloc = max_request_buffer; + REALLOC_ARRAY(buf, alloc); + } +} + +static void inflate_request(const char *prog_name, int out, int buffer_input) { git_zstream stream; + unsigned char *full_request = NULL; unsigned char in_buf[8192]; unsigned char out_buf[8192]; unsigned long cnt = 0; @@ -277,11 +323,21 @@ static void inflate_request(const char *prog_name, int out) git_inflate_init_gzip_only(&stream); while (1) { - ssize_t n = xread(0, in_buf, sizeof(in_buf)); + ssize_t n; + + if (buffer_input) { + if (full_request) + n = 0; /* nothing left to read */ + else + n = read_request(0, &full_request); + stream.next_in = full_request; + } else { + n = xread(0, in_buf, sizeof(in_buf)); + stream.next_in = in_buf; + } + if (n <= 0) die("request ended in the middle of the gzip stream"); - - stream.next_in = in_buf; stream.avail_in = n; while (0 < stream.avail_in) { @@ -307,9 +363,22 @@ static void inflate_request(const char *prog_name, int out) done: git_inflate_end(&stream); close(out); + free(full_request); } -static void run_service(const char **argv) +static void copy_request(const char *prog_name, int out) +{ + unsigned char *buf; + ssize_t n = read_request(0, &buf); + if (n < 0) + die_errno("error reading request body"); + if (write_in_full(out, buf, n) != n) + die("%s aborted reading request", prog_name); + close(out); + free(buf); +} + +static void run_service(const char **argv, int buffer_input) { const char *encoding = getenv("HTTP_CONTENT_ENCODING"); const char *user = getenv("REMOTE_USER"); @@ -334,7 +403,7 @@ static void run_service(const char **argv) "GIT_COMMITTER_EMAIL=%s@http.%s", user, host); cld.argv = argv; - if (gzipped_request) + if (buffer_input || gzipped_request) cld.in = -1; cld.git_cmd = 1; if (start_command(&cld)) @@ -342,7 +411,9 @@ static void run_service(const char **argv) close(1); if (gzipped_request) - inflate_request(argv[0], cld.in); + inflate_request(argv[0], cld.in, buffer_input); + else if (buffer_input) + copy_request(argv[0], cld.in); else close(0); @@ -392,7 +463,7 @@ static void get_info_refs(char *arg) packet_flush(1); argv[0] = svc->name; - run_service(argv); + run_service(argv, 0); } else { select_getanyfile(); @@ -496,7 +567,7 @@ static void service_rpc(char *service_name) end_headers(); argv[0] = svc->name; - run_service(argv); + run_service(argv, svc->buffer_input); strbuf_release(&buf); } @@ -623,6 +694,9 @@ int main(int argc, char **argv) not_found("Repository not exported: '%s'", dir); http_config(); + max_request_buffer = git_env_ulong("GIT_HTTP_MAX_REQUEST_BUFFER", + max_request_buffer); + cmd->imp(cmd_arg); return 0; } diff --git a/t/t5551-http-fetch-smart.sh b/t/t5551-http-fetch-smart.sh index c34e930bb9..2d8e5a0fbe 100755 --- a/t/t5551-http-fetch-smart.sh +++ b/t/t5551-http-fetch-smart.sh @@ -253,5 +253,16 @@ test_expect_success EXPENSIVE 'clone the 50,000 tag repo to check OS command lin ) ' +test_expect_success EXPENSIVE 'http can handle enormous ref negotiation' ' + git -C too-many-refs fetch -q --tags && + ( + cd "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" && + create_tags 50001 100000 + ) && + git -C too-many-refs fetch -q --tags && + git -C too-many-refs for-each-ref refs/tags >tags && + test_line_count = 100000 tags +' + stop_httpd test_done