From a2d57e22803b1da0df95b0a88a34116209eeb6e6 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 23 Apr 2020 15:41:02 -0600 Subject: [PATCH 1/4] commit-graph.c: don't use discarded graph_name in error When writing a commit-graph layer, we do so in a temporary file which is renamed into place. If we fail to create a temporary file, for e.g., because we have too many open files, then 'git_mkstemp_mode' sets the pattern to the empty string, in which case we get an error something along the lines of: error: unable to create '' It's not useful to show the pattern here at all, since we (1) know the pattern is well-formed, and (2) would have already shown the dirname when trying to create the leading directories. So, replace this error with something friendlier. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- commit-graph.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/commit-graph.c b/commit-graph.c index af677fc98e..98bbbbfc32 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1397,7 +1397,7 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx) fd = git_mkstemp_mode(ctx->graph_name, 0444); if (fd < 0) { - error(_("unable to create '%s'"), ctx->graph_name); + error(_("unable to create temporary graph layer")); return -1; } From b30fdb4b4e6608ccafdfa4a13893940b85a8a7e8 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 23 Apr 2020 15:41:06 -0600 Subject: [PATCH 2/4] t/test-lib.sh: make ULIMIT_FILE_DESCRIPTORS available to tests In t1400 the prerequisite 'ULIMIT_FILE_DESCRIPTORS' is defined and used to effectively guard the helper function 'run_with_limited_open_files' from being used on systems that do not satisfy this prerequisite. In the subsequent patch, we will introduce another test outside of t1400 that would benefit from using this prerequisite. So, move it to 'test-lib.sh' instead so that it can be used by multiple tests. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- t/t1400-update-ref.sh | 9 --------- t/test-lib.sh | 9 +++++++++ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/t/t1400-update-ref.sh b/t/t1400-update-ref.sh index b815cdd1b8..65574e0f1c 100755 --- a/t/t1400-update-ref.sh +++ b/t/t1400-update-ref.sh @@ -1342,15 +1342,6 @@ test_expect_success 'fails with duplicate ref update via symref' ' test_cmp expect actual ' -run_with_limited_open_files () { - (ulimit -n 32 && "$@") -} - -test_lazy_prereq ULIMIT_FILE_DESCRIPTORS ' - test_have_prereq !MINGW,!CYGWIN && - run_with_limited_open_files true -' - test_expect_success ULIMIT_FILE_DESCRIPTORS 'large transaction creating branches does not burst open file limit' ' ( for i in $(test_seq 33) diff --git a/t/test-lib.sh b/t/test-lib.sh index 44df51be8f..cb0f93eb58 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1618,6 +1618,15 @@ test_lazy_prereq ULIMIT_STACK_SIZE ' run_with_limited_stack true ' +run_with_limited_open_files () { + (ulimit -n 32 && "$@") +} + +test_lazy_prereq ULIMIT_FILE_DESCRIPTORS ' + test_have_prereq !MINGW,!CYGWIN && + run_with_limited_open_files true +' + build_option () { git version --build-options | sed -ne "s/^$1: //p" From b78a556a6aef0d1406e0038f6cb8401f0282fa98 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 23 Apr 2020 15:41:09 -0600 Subject: [PATCH 3/4] commit-graph.c: gracefully handle file descriptor exhaustion When writing a layered commit-graph, the commit-graph machinery uses 'commit_graph_filenames_after' and 'commit_graph_hash_after' to keep track of the layers in the chain that we are in the process of writing. When the number of commit-graph layers shrinks, we initialize all entries in the aforementioned arrays, because we know the structure of the new commit-graph chain immediately (since there are no new layers, there are no unknown hash values). But when the number of commit-graph layers grows (i.e., that 'num_commit_graphs_after > num_commit_graphs_before'), then we leave some entries in the filenames and hashes arrays as uninitialized, because we will fill them in later as those values become available. For instance, we rely on 'write_commit_graph_file's to store the filename and hash of the last layer in the new chain, which is the one that it is responsible for writing. But, it's possible that 'write_commit_graph_file' may fail, e.g., from file descriptor exhaustion. In this case it is possible that 'git_mkstemp_mode' will fail, and that function will return early *before* setting the values for the last commit-graph layer's filename and hash. This causes a number of upleasant side-effects. For instance, trying to 'free()' each entry in 'ctx->commit_graph_filenames_after' (and similarly for the hashes array) causes us to 'free()' uninitialized memory, since the area is allocated with 'malloc()' and is therefore subject to contain garbage (which is left alone when 'write_commit_graph_file' returns early). This can manifest in other issues, like a general protection fault, and/or leaving a stray 'commit-graph-chain.lock' around after the process dies. (The reasoning for this is still a mystery to me, since we'd otherwise usually expect the kernel to run tempfile.c's 'atexit()' handlers in the case of a normal death...) To resolve this, initialize the memory with 'CALLOC_ARRAY' so that uninitialized entries are filled with zeros, and can thus be 'free()'d as a noop instead of causing a fault. Helped-by: Jeff King Helped-by: Derrick Stolee Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- commit-graph.c | 4 ++-- t/t5324-split-commit-graph.sh | 13 +++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 98bbbbfc32..1a16d49bcb 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -1602,8 +1602,8 @@ static void split_graph_merge_strategy(struct write_commit_graph_context *ctx) free(old_graph_name); } - ALLOC_ARRAY(ctx->commit_graph_filenames_after, ctx->num_commit_graphs_after); - ALLOC_ARRAY(ctx->commit_graph_hash_after, ctx->num_commit_graphs_after); + CALLOC_ARRAY(ctx->commit_graph_filenames_after, ctx->num_commit_graphs_after); + CALLOC_ARRAY(ctx->commit_graph_hash_after, ctx->num_commit_graphs_after); for (i = 0; i < ctx->num_commit_graphs_after && i < ctx->num_commit_graphs_before; i++) diff --git a/t/t5324-split-commit-graph.sh b/t/t5324-split-commit-graph.sh index 87a9148fbe..1024f62f49 100755 --- a/t/t5324-split-commit-graph.sh +++ b/t/t5324-split-commit-graph.sh @@ -374,4 +374,17 @@ test_expect_success '--split=replace replaces the chain' ' graph_read_expect 2 ' +test_expect_success ULIMIT_FILE_DESCRIPTORS 'handles file descriptor exhaustion' ' + git init ulimit && + ( + cd ulimit && + for i in $(test_seq 64) + do + test_commit $i && + test_might_fail run_with_limited_open_files git commit-graph write \ + --split=no-merge --reachable || return 1 + done + ) +' + test_done From c8828530b7797f5ab584c84dc2b86d3c14b39c8d Mon Sep 17 00:00:00 2001 From: Jeff King Date: Thu, 23 Apr 2020 15:41:13 -0600 Subject: [PATCH 4/4] commit-graph: close descriptors after mmap We don't ever refer to the descriptor after mmap-ing it. And keeping it open means we can run out of descriptors in degenerate cases (e.g., thousands of split chain files). Let's close it as soon as possible. Signed-off-by: Jeff King Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- commit-graph.c | 15 +++++---------- commit-graph.h | 5 +---- fuzz-commit-graph.c | 5 ++--- 3 files changed, 8 insertions(+), 17 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 1a16d49bcb..ac6ba5af45 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -69,7 +69,6 @@ static uint8_t oid_version(void) static struct commit_graph *alloc_commit_graph(void) { struct commit_graph *g = xcalloc(1, sizeof(*g)); - g->graph_fd = -1; return g; } @@ -123,14 +122,13 @@ struct commit_graph *load_commit_graph_one_fd_st(int fd, struct stat *st, return NULL; } graph_map = xmmap(NULL, graph_size, PROT_READ, MAP_PRIVATE, fd, 0); - ret = parse_commit_graph(graph_map, fd, graph_size); + close(fd); + ret = parse_commit_graph(graph_map, graph_size); if (ret) ret->odb = odb; - else { + else munmap(graph_map, graph_size); - close(fd); - } return ret; } @@ -165,8 +163,7 @@ static int verify_commit_graph_lite(struct commit_graph *g) return 0; } -struct commit_graph *parse_commit_graph(void *graph_map, int fd, - size_t graph_size) +struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size) { const unsigned char *data, *chunk_lookup; uint32_t i; @@ -209,7 +206,6 @@ struct commit_graph *parse_commit_graph(void *graph_map, int fd, graph->hash_len = the_hash_algo->rawsz; graph->num_chunks = *(unsigned char*)(data + 6); - graph->graph_fd = fd; graph->data = graph_map; graph->data_len = graph_size; @@ -2129,10 +2125,9 @@ void free_commit_graph(struct commit_graph *g) { if (!g) return; - if (g->graph_fd >= 0) { + if (g->data) { munmap((void *)g->data, g->data_len); g->data = NULL; - close(g->graph_fd); } free(g->filename); free(g); diff --git a/commit-graph.h b/commit-graph.h index 98ef121924..1254eae948 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -40,8 +40,6 @@ struct tree *get_commit_tree_in_graph(struct repository *r, const struct commit *c); struct commit_graph { - int graph_fd; - const unsigned char *data; size_t data_len; @@ -66,8 +64,7 @@ struct commit_graph *load_commit_graph_one_fd_st(int fd, struct stat *st, struct object_directory *odb); struct commit_graph *read_commit_graph_one(struct repository *r, struct object_directory *odb); -struct commit_graph *parse_commit_graph(void *graph_map, int fd, - size_t graph_size); +struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size); /* * Return 1 if and only if the repository has a commit-graph diff --git a/fuzz-commit-graph.c b/fuzz-commit-graph.c index 0157acbf2e..9fd1c04edd 100644 --- a/fuzz-commit-graph.c +++ b/fuzz-commit-graph.c @@ -1,8 +1,7 @@ #include "commit-graph.h" #include "repository.h" -struct commit_graph *parse_commit_graph(void *graph_map, int fd, - size_t graph_size); +struct commit_graph *parse_commit_graph(void *graph_map, size_t graph_size); int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size); @@ -11,7 +10,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) struct commit_graph *g; initialize_the_repository(); - g = parse_commit_graph((void *)data, -1, size); + g = parse_commit_graph((void *)data, size); repo_clear(the_repository); free(g);