revision.c: discount side branches when computing TREESAME

Use the BOTTOM flag to define relevance for pruning. Relevant commits
are those that are !UNINTERESTING or BOTTOM, and this allows us to
identify irrelevant side branches (UNINTERESTING && !BOTTOM).

If a merge has relevant parents, and it is TREESAME to them, then do not
let irrelevant parents cause the merge to be treated as !TREESAME.

When considering simplification, don't always include all merges -
merges with exactly one relevant parent can be simplified, if TREESAME
according to the above rule.

These two changes greatly increase simplification in limited, pruned
revision lists.

Signed-off-by: Kevin Bracey <kevin@bracey.fi>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Kevin Bracey 2013-05-16 18:32:39 +03:00 committed by Junio C Hamano
parent 7f34a46ff5
commit 4d826608e9
3 changed files with 164 additions and 27 deletions

View File

@ -332,6 +332,80 @@ static int everybody_uninteresting(struct commit_list *orig)
return 1;
}
/*
* A definition of "relevant" commit that we can use to simplify limited graphs
* by eliminating side branches.
*
* A "relevant" commit is one that is !UNINTERESTING (ie we are including it
* in our list), or that is a specified BOTTOM commit. Then after computing
* a limited list, during processing we can generally ignore boundary merges
* coming from outside the graph, (ie from irrelevant parents), and treat
* those merges as if they were single-parent. TREESAME is defined to consider
* only relevant parents, if any. If we are TREESAME to our on-graph parents,
* we don't care if we were !TREESAME to non-graph parents.
*
* Treating bottom commits as relevant ensures that a limited graph's
* connection to the actual bottom commit is not viewed as a side branch, but
* treated as part of the graph. For example:
*
* ....Z...A---X---o---o---B
* . /
* W---Y
*
* When computing "A..B", the A-X connection is at least as important as
* Y-X, despite A being flagged UNINTERESTING.
*
* And when computing --ancestry-path "A..B", the A-X connection is more
* important than Y-X, despite both A and Y being flagged UNINTERESTING.
*/
static inline int relevant_commit(struct commit *commit)
{
return (commit->object.flags & (UNINTERESTING | BOTTOM)) != UNINTERESTING;
}
/*
* Return a single relevant commit from a parent list. If we are a TREESAME
* commit, and this selects one of our parents, then we can safely simplify to
* that parent.
*/
static struct commit *one_relevant_parent(const struct rev_info *revs,
struct commit_list *orig)
{
struct commit_list *list = orig;
struct commit *relevant = NULL;
if (!orig)
return NULL;
/*
* For 1-parent commits, or if first-parent-only, then return that
* first parent (even if not "relevant" by the above definition).
* TREESAME will have been set purely on that parent.
*/
if (revs->first_parent_only || !orig->next)
return orig->item;
/*
* For multi-parent commits, identify a sole relevant parent, if any.
* If we have only one relevant parent, then TREESAME will be set purely
* with regard to that parent, and we can simplify accordingly.
*
* If we have more than one relevant parent, or no relevant parents
* (and multiple irrelevant ones), then we can't select a parent here
* and return NULL.
*/
while (list) {
struct commit *commit = list->item;
list = list->next;
if (relevant_commit(commit)) {
if (relevant)
return NULL;
relevant = commit;
}
}
return relevant;
}
/*
* The goal is to get REV_TREE_NEW as the result only if the
* diff consists of all '+' (and no other changes), REV_TREE_OLD
@ -502,27 +576,52 @@ static unsigned update_treesame(struct rev_info *revs, struct commit *commit)
if (commit->parents && commit->parents->next) {
unsigned n;
struct treesame_state *st;
struct commit_list *p;
unsigned relevant_parents;
unsigned relevant_change, irrelevant_change;
st = lookup_decoration(&revs->treesame, &commit->object);
if (!st)
die("update_treesame %s", sha1_to_hex(commit->object.sha1));
commit->object.flags |= TREESAME;
for (n = 0; n < st->nparents; n++) {
if (!st->treesame[n]) {
commit->object.flags &= ~TREESAME;
break;
}
relevant_parents = 0;
relevant_change = irrelevant_change = 0;
for (p = commit->parents, n = 0; p; n++, p = p->next) {
if (relevant_commit(p->item)) {
relevant_change |= !st->treesame[n];
relevant_parents++;
} else
irrelevant_change |= !st->treesame[n];
}
if (relevant_parents ? relevant_change : irrelevant_change)
commit->object.flags &= ~TREESAME;
else
commit->object.flags |= TREESAME;
}
return commit->object.flags & TREESAME;
}
static inline int limiting_can_increase_treesame(const struct rev_info *revs)
{
/*
* TREESAME is irrelevant unless prune && dense;
* if simplify_history is set, we can't have a mixture of TREESAME and
* !TREESAME INTERESTING parents (and we don't have treesame[]
* decoration anyway);
* if first_parent_only is set, then the TREESAME flag is locked
* against the first parent (and again we lack treesame[] decoration).
*/
return revs->prune && revs->dense &&
!revs->simplify_history &&
!revs->first_parent_only;
}
static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)
{
struct commit_list **pp, *parent;
struct treesame_state *ts = NULL;
int tree_changed = 0, nth_parent;
int relevant_change = 0, irrelevant_change = 0;
int relevant_parents, nth_parent;
/*
* If we don't do pruning, everything is interesting
@ -546,10 +645,12 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)
if (!revs->dense && !commit->parents->next)
return;
for (pp = &commit->parents, nth_parent = 0;
for (pp = &commit->parents, nth_parent = 0, relevant_parents = 0;
(parent = *pp) != NULL;
pp = &parent->next, nth_parent++) {
struct commit *p = parent->item;
if (relevant_commit(p))
relevant_parents++;
if (nth_parent == 1) {
/*
@ -573,7 +674,7 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)
!revs->simplify_history &&
!(commit->object.flags & UNINTERESTING)) {
ts = initialise_treesame(revs, commit);
if (!tree_changed)
if (!(irrelevant_change || relevant_change))
ts->treesame[0] = 1;
}
}
@ -619,14 +720,27 @@ static void try_to_simplify_commit(struct rev_info *revs, struct commit *commit)
/* fallthrough */
case REV_TREE_OLD:
case REV_TREE_DIFFERENT:
tree_changed = 1;
if (relevant_commit(p))
relevant_change = 1;
else
irrelevant_change = 1;
continue;
}
die("bad tree compare for commit %s", sha1_to_hex(commit->object.sha1));
}
if (tree_changed)
return;
commit->object.flags |= TREESAME;
/*
* TREESAME is straightforward for single-parent commits. For merge
* commits, it is most useful to define it so that "irrelevant"
* parents cannot make us !TREESAME - if we have any relevant
* parents, then we only consider TREESAMEness with respect to them,
* allowing irrelevant merges from uninteresting branches to be
* simplified away. Only if we have only irrelevant parents do we
* base TREESAME on them. Note that this logic is replicated in
* update_treesame, which should be kept in sync.
*/
if (relevant_parents ? !relevant_change : !irrelevant_change)
commit->object.flags |= TREESAME;
}
static void commit_list_insert_by_date_cached(struct commit *p, struct commit_list **head,
@ -998,6 +1112,18 @@ static int limit_list(struct rev_info *revs)
free_commit_list(bottom);
}
/*
* Check if any commits have become TREESAME by some of their parents
* becoming UNINTERESTING.
*/
if (limiting_can_increase_treesame(revs))
for (list = newlist; list; list = list->next) {
struct commit *c = list->item;
if (c->object.flags & (UNINTERESTING | TREESAME))
continue;
update_treesame(revs, c);
}
revs->commits = newlist;
return 0;
}
@ -2248,6 +2374,7 @@ static int remove_marked_parents(struct rev_info *revs, struct commit *commit)
static struct commit_list **simplify_one(struct rev_info *revs, struct commit *commit, struct commit_list **tail)
{
struct commit_list *p;
struct commit *parent;
struct merge_simplify_state *st, *pst;
int cnt;
@ -2336,19 +2463,20 @@ static struct commit_list **simplify_one(struct rev_info *revs, struct commit *c
/*
* A commit simplifies to itself if it is a root, if it is
* UNINTERESTING, if it touches the given paths, or if it is a
* merge and its parents simplify to more than one commit
* merge and its parents don't simplify to one relevant commit
* (the first two cases are already handled at the beginning of
* this function).
*
* Otherwise, it simplifies to what its sole parent simplifies to.
* Otherwise, it simplifies to what its sole relevant parent
* simplifies to.
*/
if (!cnt ||
(commit->object.flags & UNINTERESTING) ||
!(commit->object.flags & TREESAME) ||
(1 < cnt))
(parent = one_relevant_parent(revs, commit->parents)) == NULL)
st->simplified = commit;
else {
pst = locate_simplify_state(revs, commit->parents->item);
pst = locate_simplify_state(revs, parent);
st->simplified = pst->simplified;
}
return tail;
@ -2445,7 +2573,8 @@ int prepare_revision_walk(struct rev_info *revs)
free(list);
/* Signal whether we need per-parent treesame decoration */
if (revs->simplify_merges)
if (revs->simplify_merges ||
(revs->limited && limiting_can_increase_treesame(revs)))
revs->treesame.name = "treesame";
if (revs->no_walk != REVISION_WALK_NO_WALK_UNSORTED)
@ -2479,15 +2608,15 @@ static enum rewrite_result rewrite_one(struct rev_info *revs, struct commit **pp
if (!revs->limited)
if (add_parents_to_list(revs, p, &revs->commits, &cache) < 0)
return rewrite_one_error;
if (p->parents && p->parents->next)
return rewrite_one_ok;
if (p->object.flags & UNINTERESTING)
return rewrite_one_ok;
if (!(p->object.flags & TREESAME))
return rewrite_one_ok;
if (!p->parents)
return rewrite_one_noparents;
*pp = p->parents->item;
if ((p = one_relevant_parent(revs, p->parents)) == NULL)
return rewrite_one_ok;
*pp = p;
}
}

View File

@ -18,7 +18,8 @@ test_description='--ancestry-path'
# --ancestry-path F...I == F H I
#
# G..M -- G.t == [nothing - was dropped in "-s ours" merge L]
# --ancestry-path G..M -- G.t == H J L
# --ancestry-path G..M -- G.t == L
# --ancestry-path --simplify-merges G^..M -- G.t == G L
. ./test-lib.sh
@ -101,8 +102,15 @@ test_expect_success 'rev-list G..M -- G.t' '
'
test_expect_success 'rev-list --ancestry-path G..M -- G.t' '
for c in H J L; do echo $c; done >expect &&
echo L >expect &&
git rev-list --ancestry-path --format=%s G..M -- G.t |
sed -e "/^commit /d" >actual &&
test_cmp expect actual
'
test_expect_success 'rev-list --ancestry-path --simplify-merges G^..M -- G.t' '
for c in G L; do echo $c; done >expect &&
git rev-list --ancestry-path --simplify-merges --format=%s G^..M -- G.t |
sed -e "/^commit /d" |
sort >actual &&
test_cmp expect actual

View File

@ -138,9 +138,9 @@ check_result 'M L G' F..M --first-parent -- file
# Note that G is pruned when E is the bottom, even if it's the same commit list
# If we want history since E, then we're quite happy to ignore G that took E.
check_result 'M L K J I H G' E..M --ancestry-path
check_outcome failure 'M L J I H' E..M --ancestry-path -- file # includes G
check_result 'M L J I H' E..M --ancestry-path -- file
check_outcome failure '(LH)M (K)L (EJ)K (I)J (E)I (E)H' E..M --ancestry-path --parents -- file # includes G
check_outcome failure '(LH)M (E)H (J)L (I)J (E)I' E..M --ancestry-path --simplify-merges -- file # includes G
check_result '(LH)M (E)H (J)L (I)J (E)I' E..M --ancestry-path --simplify-merges -- file
# Should still be able to ignore I-J branch in simple log, despite limiting
# to G.
@ -167,9 +167,9 @@ check_result 'F D' B..F --full-history -- file
check_result '(D)F (BA)D' B..F --full-history --parents -- file
check_result '(B)F' B..F --simplify-merges -- file
check_result 'F D' B..F --ancestry-path
check_outcome failure 'F' B..F --ancestry-path -- file # includes D
check_result 'F' B..F --ancestry-path -- file
check_outcome failure 'F' B..F --ancestry-path --parents -- file # includes D
check_outcome failure 'F' B..F --ancestry-path --simplify-merges -- file # includes D
check_result 'F' B..F --ancestry-path --simplify-merges -- file
check_result 'F D' B..F --first-parent
check_result 'F' B..F --first-parent -- file