From 23424ea7597fa17b96c82001b281a5d21f5b4874 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 5 Sep 2019 18:04:53 -0400 Subject: [PATCH 1/3] t/t5318: introduce failing 'git commit-graph write' tests When invoking 'git commit-graph' in a corrupt repository, one can cause a segfault when ancestral commits are corrupt in one way or another. This is due to two function calls in the 'commit-graph.c' code that may return NULL, but are not checked for NULL-ness before dereferencing. Before fixing the bug, introduce two failing tests that demonstrate the problem. The first test corrupts an ancestral commit's parent to point to a non-existent object. The second test instead corrupts an ancestral tree by removing the 'tree' information entirely from the commit. Both of these cases cause segfaults, each at different lines. Signed-off-by: Taylor Blau Acked-by: Derrick Stolee Signed-off-by: Junio C Hamano --- t/t5318-commit-graph.sh | 43 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index ab3eccf0fa..c855f81930 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -585,4 +585,47 @@ test_expect_success 'get_commit_tree_in_graph works for non-the_repository' ' test_cmp expect actual ' +test_expect_failure 'corrupt commit-graph write (broken parent)' ' + rm -rf repo && + git init repo && + ( + cd repo && + empty="$(git mktree broken <<-EOF && + tree $empty + parent 0000000000000000000000000000000000000000 + author whatever 1234 -0000 + committer whatever 1234 -0000 + + broken commit + EOF + broken="$(git hash-object -w -t commit --literally broken)" && + git commit-tree -p "$broken" -m "good commit" "$empty" >good && + test_must_fail git commit-graph write --stdin-commits \ + test_err && + test_i18ngrep "unable to parse commit" test_err + ) +' + +test_expect_failure 'corrupt commit-graph write (missing tree)' ' + rm -rf repo && + git init repo && + ( + cd repo && + tree="$(git mktree broken <<-EOF && + parent 0000000000000000000000000000000000000000 + author whatever 1234 -0000 + committer whatever 1234 -0000 + + broken commit + EOF + broken="$(git hash-object -w -t commit --literally broken)" && + git commit-tree -p "$broken" -m "good" "$tree" >good && + test_must_fail git commit-graph write --stdin-commits \ + test_err && + test_i18ngrep "unable to get tree for" test_err + ) +' + test_done From 16749b8dd2cd2d3ab693ac7ebe110e57cf054005 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 5 Sep 2019 18:04:55 -0400 Subject: [PATCH 2/3] commit-graph.c: handle commit parsing errors To write a commit graph chunk, 'write_graph_chunk_data()' takes a list of commits to write and parses each one before writing the necessary data, and continuing on to the next commit in the list. Since the majority of these commits are not parsed ahead of time (an exception is made for the *last* commit in the list, which is parsed early within 'copy_oids_to_commits'), it is possible that calling 'parse_commit_no_graph()' on them may return an error. Failing to catch these errors before de-referencing later calls can result in a undefined memory access and a SIGSEGV. One such example of this is 'get_commit_tree_oid()', which expects a parsed object as its input (in this case, the commit-graph code passes '*list'). If '*list' causes a parse error, the subsequent call will fail. Prevent such an issue by checking the return value of 'parse_commit_no_graph()' to avoid passing an unparsed object to a function which expects a parsed object, thus preventing a segfault. It is worth noting that this fix is really skirting around the issue in object.c's 'parse_object()', which makes it difficult to tell how corrupt an object is without digging into it. Presumably one could change the meaning of 'parse_object' returns, but this would require adjusting each callsite accordingly. Instead of that, add an additional check to the object parsed. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- commit-graph.c | 4 +++- t/t5318-commit-graph.sh | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index f2888c203b..6aa6998ecd 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -843,7 +843,9 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, uint32_t packedDate[2]; display_progress(ctx->progress, ++ctx->progress_cnt); - parse_commit_no_graph(*list); + if (parse_commit_no_graph(*list)) + die(_("unable to parse commit %s"), + oid_to_hex(&(*list)->object.oid)); hashwrite(f, get_commit_tree_oid(*list)->hash, hash_len); parent = (*list)->parents; diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index c855f81930..abde8d4e90 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -585,7 +585,7 @@ test_expect_success 'get_commit_tree_in_graph works for non-the_repository' ' test_cmp expect actual ' -test_expect_failure 'corrupt commit-graph write (broken parent)' ' +test_expect_success 'corrupt commit-graph write (broken parent)' ' rm -rf repo && git init repo && ( From 806278dead57766bf000af62dcb8892ee3a24956 Mon Sep 17 00:00:00 2001 From: Taylor Blau Date: Thu, 5 Sep 2019 18:04:57 -0400 Subject: [PATCH 3/3] commit-graph.c: handle corrupt/missing trees Apply similar treatment as in the previous commit to handle an unchecked call to 'get_commit_tree_oid()'. Previously, a NULL return value from this function would be immediately dereferenced with '->hash', and then cause a segfault. Before dereferencing to access the 'hash' member, check the return value of 'get_commit_tree_oid()' to make sure that it is not NULL. To make this check correct, a related change is also needed in 'commit.c', which is to check the return value of 'get_commit_tree' before taking its address. If 'get_commit_tree' returns NULL, we encounter an undefined behavior when taking the address of the return value of 'get_commit_tree' and then taking '->object.oid'. (On my system, this is memory address 0x8, which is obviously wrong). Fix this by making sure that 'get_commit_tree' returns something non-NULL before digging through a structure that is not there, thus preventing a segfault down the line in the commit graph code. Signed-off-by: Taylor Blau Signed-off-by: Junio C Hamano --- commit-graph.c | 7 ++++++- commit.c | 3 ++- t/t5318-commit-graph.sh | 2 +- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index 6aa6998ecd..cea1b37493 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -839,6 +839,7 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, while (list < last) { struct commit_list *parent; + struct object_id *tree; int edge_value; uint32_t packedDate[2]; display_progress(ctx->progress, ++ctx->progress_cnt); @@ -846,7 +847,11 @@ static void write_graph_chunk_data(struct hashfile *f, int hash_len, if (parse_commit_no_graph(*list)) die(_("unable to parse commit %s"), oid_to_hex(&(*list)->object.oid)); - hashwrite(f, get_commit_tree_oid(*list)->hash, hash_len); + tree = get_commit_tree_oid(*list); + if (!tree) + die(_("unable to get tree for %s"), + oid_to_hex(&(*list)->object.oid)); + hashwrite(f, tree->hash, hash_len); parent = (*list)->parents; diff --git a/commit.c b/commit.c index a98de16e3d..fab22cb740 100644 --- a/commit.c +++ b/commit.c @@ -358,7 +358,8 @@ struct tree *repo_get_commit_tree(struct repository *r, struct object_id *get_commit_tree_oid(const struct commit *commit) { - return &get_commit_tree(commit)->object.oid; + struct tree *tree = get_commit_tree(commit); + return tree ? &tree->object.oid : NULL; } void release_commit_memory(struct parsed_object_pool *pool, struct commit *c) diff --git a/t/t5318-commit-graph.sh b/t/t5318-commit-graph.sh index abde8d4e90..5d2d88b100 100755 --- a/t/t5318-commit-graph.sh +++ b/t/t5318-commit-graph.sh @@ -607,7 +607,7 @@ test_expect_success 'corrupt commit-graph write (broken parent)' ' ) ' -test_expect_failure 'corrupt commit-graph write (missing tree)' ' +test_expect_success 'corrupt commit-graph write (missing tree)' ' rm -rf repo && git init repo && (