From 0565cee5e4721c1f991cf9054a2d642a4a72e579 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 5 Jul 2022 01:33:39 +0000 Subject: [PATCH 1/5] t6423: add tests of dual directory rename plus add/add conflict This is an attempt at minimalizing a testcase reported by Glen Choo with tensorflow where merge-ort would report an assertion failure: Assertion failed: (ci->filemask == 2 || ci->filemask == 4), function apply_directory_rename_modifications, file merge-ort.c, line 2410 reversing the direction of the merge provides a different error: error: cache entry has null sha1: ... fatal: unable to write .git/index so we add testcases for both. With these new testcases, the recursive strategy differs in that it returns the latter error for both merge directions. These testcases are somehow a little different than Glen's original tensorflow testcase in that these ones trigger a bug with the recursive algorithm whereas his testcase didn't. I figure that means these testcases somehow manage to be more comprehensive. Reported-by: Glen Choo Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- t/t6423-merge-rename-directories.sh | 105 ++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/t/t6423-merge-rename-directories.sh b/t/t6423-merge-rename-directories.sh index 479db32cd6..ed5586de28 100755 --- a/t/t6423-merge-rename-directories.sh +++ b/t/t6423-merge-rename-directories.sh @@ -5199,6 +5199,111 @@ test_expect_success '12k: Directory rename with sibling causes rename-to-self' ' ) ' +# Testcase 12l, Both sides rename a directory into the other side, both add +# a file which after directory renames are the same filename +# Commit O: sub1/file, sub2/other +# Commit A: sub3/file, sub2/{other, new_add_add_file_1} +# Commit B: sub1/{file, newfile}, sub1/sub2/{other, new_add_add_file_2} +# +# In words: +# A: sub1/ -> sub3/, add sub2/new_add_add_file_1 +# B: sub2/ -> sub1/sub2, add sub1/newfile, add sub1/sub2/new_add_add_file_2 +# +# Expected: sub3/{file, newfile, sub2/other} +# CONFLICT (add/add): sub1/sub2/new_add_add_file +# +# Note that sub1/newfile is not extraneous. Directory renames are only +# detected if they are needed, and they are only needed if the old directory +# had a new file added on the opposite side of history. So sub1/newfile +# is needed for there to be a sub1/ -> sub3/ rename. + +test_setup_12l () { + test_create_repo 12l_$1 && + ( + cd 12l_$1 && + + mkdir sub1 sub2 + echo file >sub1/file && + echo other >sub2/other && + git add sub1 sub2 && + git commit -m "O" && + + git branch O && + git branch A && + git branch B && + + git checkout A && + git mv sub1 sub3 && + echo conflicting >sub2/new_add_add_file && + git add sub2 && + test_tick && + git add -u && + git commit -m "A" && + + git checkout B && + echo dissimilar >sub2/new_add_add_file && + echo brand >sub1/newfile && + git add sub1 sub2 && + git mv sub2 sub1 && + test_tick && + git commit -m "B" + ) +} + +test_expect_merge_algorithm failure failure '12l (B into A): Rename into each other + add/add conflict' ' + test_setup_12l BintoA && + ( + cd 12l_BintoA && + + git checkout -q A^0 && + + test_must_fail git -c merge.directoryRenames=true merge -s recursive B^0 && + + test_stdout_line_count = 5 git ls-files -s && + + git rev-parse >actual \ + :0:sub3/file :0:sub3/newfile :0:sub3/sub2/other \ + :2:sub1/sub2/new_add_add_file \ + :3:sub1/sub2/new_add_add_file && + git rev-parse >expect \ + O:sub1/file B:sub1/newfile O:sub2/other \ + A:sub2/new_add_add_file \ + B:sub1/sub2/new_add_add_file && + test_cmp expect actual && + + git ls-files -o >actual && + test_write_lines actual expect >expect && + test_cmp expect actual + ) +' + +test_expect_merge_algorithm failure failure '12l (A into B): Rename into each other + add/add conflict' ' + test_setup_12l AintoB && + ( + cd 12l_AintoB && + + git checkout -q B^0 && + + test_must_fail git -c merge.directoryRenames=true merge -s recursive A^0 && + + test_stdout_line_count = 5 git ls-files -s && + + git rev-parse >actual \ + :0:sub3/file :0:sub3/newfile :0:sub3/sub2/other \ + :2:sub1/sub2/new_add_add_file \ + :3:sub1/sub2/new_add_add_file && + git rev-parse >expect \ + O:sub1/file B:sub1/newfile O:sub2/other \ + B:sub1/sub2/new_add_add_file \ + A:sub2/new_add_add_file && + test_cmp expect actual && + + git ls-files -o >actual && + test_write_lines actual expect >expect && + test_cmp expect actual + ) +' + ########################################################################### # SECTION 13: Checking informational and conflict messages # From 51e41e4eaf2bd1d0344627d3326a3e20f90f4580 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 5 Jul 2022 01:33:40 +0000 Subject: [PATCH 2/5] merge-ort: small cleanups of check_for_directory_rename MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No functional changes, just some preparatory cleanups. Suggested-by: Ævar Arnfjörð Bjarmason Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index 8545354daf..ff037cca8d 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -2267,18 +2267,17 @@ static char *check_for_directory_rename(struct merge_options *opt, struct strmap *collisions, int *clean_merge) { - char *new_path = NULL; + char *new_path; struct strmap_entry *rename_info; - struct strmap_entry *otherinfo = NULL; + struct strmap_entry *otherinfo; const char *new_dir; + /* Cases where we don't have a directory rename for this path */ if (strmap_empty(dir_renames)) - return new_path; + return NULL; rename_info = check_dir_renamed(path, dir_renames); if (!rename_info) - return new_path; - /* old_dir = rename_info->key; */ - new_dir = rename_info->value; + return NULL; /* * This next part is a little weird. We do not want to do an @@ -2304,6 +2303,7 @@ static char *check_for_directory_rename(struct merge_options *opt, * As it turns out, this also prevents N-way transient rename * confusion; See testcases 9c and 9d of t6043. */ + new_dir = rename_info->value; /* old_dir = rename_info->key; */ otherinfo = strmap_get_entry(dir_rename_exclusions, new_dir); if (otherinfo) { path_msg(opt, rename_info->key, 1, From 6dd1f0e9d446e9ec1da3583eb7d6959716fc31f6 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 5 Jul 2022 01:33:41 +0000 Subject: [PATCH 3/5] merge-ort: make a separate function for freeing struct collisions This commit makes no functional changes, it's just some code movement in preparation for later changes. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index ff037cca8d..1514dd173c 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -2259,6 +2259,27 @@ static void compute_collisions(struct strmap *collisions, } } +static void free_collisions(struct strmap *collisions) +{ + struct hashmap_iter iter; + struct strmap_entry *entry; + + /* Free each value in the collisions map */ + strmap_for_each_entry(collisions, &iter, entry) { + struct collision_info *info = entry->value; + string_list_clear(&info->source_files, 0); + } + /* + * In compute_collisions(), we set collisions.strdup_strings to 0 + * so that we wouldn't have to make another copy of the new_path + * allocated by apply_dir_rename(). But now that we've used them + * and have no other references to these strings, it is time to + * deallocate them. + */ + free_strmap_strings(collisions); + strmap_clear(collisions, 1); +} + static char *check_for_directory_rename(struct merge_options *opt, const char *path, unsigned side_index, @@ -3029,8 +3050,6 @@ static int collect_renames(struct merge_options *opt, int i, clean = 1; struct strmap collisions; struct diff_queue_struct *side_pairs; - struct hashmap_iter iter; - struct strmap_entry *entry; struct rename_info *renames = &opt->priv->renames; side_pairs = &renames->pairs[side_index]; @@ -3076,20 +3095,7 @@ static int collect_renames(struct merge_options *opt, result->queue[result->nr++] = p; } - /* Free each value in the collisions map */ - strmap_for_each_entry(&collisions, &iter, entry) { - struct collision_info *info = entry->value; - string_list_clear(&info->source_files, 0); - } - /* - * In compute_collisions(), we set collisions.strdup_strings to 0 - * so that we wouldn't have to make another copy of the new_path - * allocated by apply_dir_rename(). But now that we've used them - * and have no other references to these strings, it is time to - * deallocate them. - */ - free_strmap_strings(&collisions); - strmap_clear(&collisions, 1); + free_collisions(&collisions); return clean; } From 3ffbe5a223aa1ed46d6f20da607d42341a8c2bf4 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 5 Jul 2022 01:33:42 +0000 Subject: [PATCH 4/5] merge-ort: shuffle the computation and cleanup of potential collisions Run compute_collisions() for renames on both sides of history before any calls to collect_renames(), and do not free the computed collisions until after both calls to collect_renames(). This is just a code reorganization at this point that doesn't make sense on its own, but will permit us to use the computed collision info from both sides within each call to collect_renames() in a subsequent commit. Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index 1514dd173c..a37c1c19ac 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -2335,7 +2335,8 @@ static char *check_for_directory_rename(struct merge_options *opt, } new_path = handle_path_level_conflicts(opt, path, side_index, - rename_info, collisions); + rename_info, + &collisions[side_index]); *clean_merge &= (new_path != NULL); return new_path; @@ -3044,16 +3045,15 @@ static int detect_regular_renames(struct merge_options *opt, static int collect_renames(struct merge_options *opt, struct diff_queue_struct *result, unsigned side_index, + struct strmap *collisions, struct strmap *dir_renames_for_side, struct strmap *rename_exclusions) { int i, clean = 1; - struct strmap collisions; struct diff_queue_struct *side_pairs; struct rename_info *renames = &opt->priv->renames; side_pairs = &renames->pairs[side_index]; - compute_collisions(&collisions, dir_renames_for_side, side_pairs); for (i = 0; i < side_pairs->nr; ++i) { struct diff_filepair *p = side_pairs->queue[i]; @@ -3069,7 +3069,7 @@ static int collect_renames(struct merge_options *opt, side_index, dir_renames_for_side, rename_exclusions, - &collisions, + collisions, &clean); possibly_cache_new_pair(renames, p, side_index, new_path); @@ -3095,7 +3095,6 @@ static int collect_renames(struct merge_options *opt, result->queue[result->nr++] = p; } - free_collisions(&collisions); return clean; } @@ -3106,6 +3105,7 @@ static int detect_and_process_renames(struct merge_options *opt, { struct diff_queue_struct combined = { 0 }; struct rename_info *renames = &opt->priv->renames; + struct strmap collisions[3]; int need_dir_renames, s, i, clean = 1; unsigned detection_run = 0; @@ -3155,12 +3155,22 @@ static int detect_and_process_renames(struct merge_options *opt, ALLOC_GROW(combined.queue, renames->pairs[1].nr + renames->pairs[2].nr, combined.alloc); + for (i = MERGE_SIDE1; i <= MERGE_SIDE2; i++) { + int other_side = 3 - i; + compute_collisions(&collisions[i], + &renames->dir_renames[other_side], + &renames->pairs[i]); + } clean &= collect_renames(opt, &combined, MERGE_SIDE1, + collisions, &renames->dir_renames[2], &renames->dir_renames[1]); clean &= collect_renames(opt, &combined, MERGE_SIDE2, + collisions, &renames->dir_renames[1], &renames->dir_renames[2]); + for (i = MERGE_SIDE1; i <= MERGE_SIDE2; i++) + free_collisions(&collisions[i]); STABLE_QSORT(combined.queue, combined.nr, compare_pairs); trace2_region_leave("merge", "directory renames", opt->repo); From 751e16542472c7040565f97c2149c2d501ed0b81 Mon Sep 17 00:00:00 2001 From: Elijah Newren Date: Tue, 5 Jul 2022 01:33:43 +0000 Subject: [PATCH 5/5] merge-ort: fix issue with dual rename and add/add conflict There is code in both merge-recursive and merge-ort for avoiding doubly transitive renames (i.e. one side renames directory A/ -> B/, and the other side renames directory B/ -> C/), because this combination would otherwise make a mess for new files added to A/ on the first side and wondering which directory they end up in -- especially if there were even more renames such as the first side renaming C/ -> D/. In such cases, it just turns "off" directory rename detection for the higher order transitive cases. The testcases added in t6423 a couple commits ago are slightly different but similar in principle. They involve a similar case of paired renaming but instead of A/ -> B/ and B/ -> C/, the second side renames a leading directory of B/ to C/. And both sides add a new file somewhere under the directory that the other side will rename. While the new files added start within different directories and thus could logically end up within different directories, it is weird for a file on one side to end up where the other one started and not move along with it. So, let's just turn off directory rename detection in this case as well. Another way to look at this is that if the source name involved in a directory rename on one side is the target name of a directory rename operation for a file from the other side, then we avoid the doubly transitive rename. (More concretely, if a directory rename on side D wants to rename a file on side E from OLD_NAME -> NEW_NAME, and side D already had a file named NEW_NAME, and a directory rename on side E wants to rename side D's NEW_NAME -> NEWER_NAME, then we turn off the directory rename detection for NEW_NAME to prevent the NEW_NAME -> NEWER_NAME rename, and instead end up with an add/add conflict on NEW_NAME.) Signed-off-by: Elijah Newren Signed-off-by: Junio C Hamano --- merge-ort.c | 8 +++++++- t/t6423-merge-rename-directories.sh | 4 ++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/merge-ort.c b/merge-ort.c index a37c1c19ac..3855f9de25 100644 --- a/merge-ort.c +++ b/merge-ort.c @@ -2292,10 +2292,16 @@ static char *check_for_directory_rename(struct merge_options *opt, struct strmap_entry *rename_info; struct strmap_entry *otherinfo; const char *new_dir; + int other_side = 3 - side_index; - /* Cases where we don't have a directory rename for this path */ + /* + * Cases where we don't have or don't want a directory rename for + * this path. + */ if (strmap_empty(dir_renames)) return NULL; + if (strmap_get(&collisions[other_side], path)) + return NULL; rename_info = check_dir_renamed(path, dir_renames); if (!rename_info) return NULL; diff --git a/t/t6423-merge-rename-directories.sh b/t/t6423-merge-rename-directories.sh index ed5586de28..99baf77cbf 100755 --- a/t/t6423-merge-rename-directories.sh +++ b/t/t6423-merge-rename-directories.sh @@ -5250,7 +5250,7 @@ test_setup_12l () { ) } -test_expect_merge_algorithm failure failure '12l (B into A): Rename into each other + add/add conflict' ' +test_expect_merge_algorithm failure success '12l (B into A): Rename into each other + add/add conflict' ' test_setup_12l BintoA && ( cd 12l_BintoA && @@ -5277,7 +5277,7 @@ test_expect_merge_algorithm failure failure '12l (B into A): Rename into each ot ) ' -test_expect_merge_algorithm failure failure '12l (A into B): Rename into each other + add/add conflict' ' +test_expect_merge_algorithm failure success '12l (A into B): Rename into each other + add/add conflict' ' test_setup_12l AintoB && ( cd 12l_AintoB &&