Merge branch 'ds/for-each-ref-is-base'

'git for-each-ref' learned a new "--format" atom to find the branch
that the history leading to a given commit "%(is-base:<commit>)" is
likely based on.

* ds/for-each-ref-is-base:
  p1500: add is-base performance tests
  for-each-ref: add 'is-base' token
  commit: add gentle reference lookup method
  commit-reach: add get_branch_base_for_tip
This commit is contained in:
Junio C Hamano 2024-08-26 11:32:24 -07:00
commit 3222718ad7
11 changed files with 448 additions and 2 deletions

View File

@ -264,6 +264,48 @@ ahead-behind:<committish>::
commits ahead and behind, respectively, when comparing the output
ref to the `<committish>` specified in the format.
is-base:<committish>::
In at most one row, `(<committish>)` will appear to indicate the ref
that is most likely the ref used as a starting point for the branch
that produced `<committish>`. This choice is made using a heuristic:
choose the ref that minimizes the number of commits in the
first-parent history of `<committish>` and not in the first-parent
history of the ref.
+
For example, consider the following figure of first-parent histories of
several refs:
+
----
*--*--*--*--*--* refs/heads/A
\
\
*--*--*--* refs/heads/B
\ \
\ \
* * refs/heads/C
\
\
*--* refs/heads/D
----
+
Here, if `A`, `B`, and `C` are the filtered references, and the format
string is `%(refname):%(is-base:D)`, then the output would be
+
----
refs/heads/A:
refs/heads/B:(D)
refs/heads/C:
----
+
This is because the first-parent history of `D` has its earliest
intersection with the first-parent histories of the filtered refs at a
common first-parent ancestor of `B` and `C` and ties are broken by the
earliest ref in the sorted order.
+
Note that this token will not appear if the first-parent history of
`<committish>` does not intersect the first-parent histories of the
filtered refs.
describe[:options]::
A human-readable name, like linkgit:git-describe[1];
empty string for undescribable commits. The `describe` string may

View File

@ -1229,3 +1229,129 @@ done:
repo_clear_commit_marks(r, SEEN);
free_commit_list(stack);
}
/*
* This slab initializes integers to zero, so use "-1" for "tip is best" and
* "i + 1" for "bases[i] is best".
*/
define_commit_slab(best_branch_base, int);
static struct best_branch_base best_branch_base;
#define get_best(c) (*best_branch_base_at(&best_branch_base, (c)))
#define set_best(c,v) (*best_branch_base_at(&best_branch_base, (c)) = (v))
int get_branch_base_for_tip(struct repository *r,
struct commit *tip,
struct commit **bases,
size_t bases_nr)
{
int best_index = -1;
struct commit *branch_point = NULL;
struct prio_queue queue = { compare_commits_by_gen_then_commit_date };
int found_missing_gen = 0;
if (!bases_nr)
return -1;
repo_parse_commit(r, tip);
if (commit_graph_generation(tip) == GENERATION_NUMBER_INFINITY)
found_missing_gen = 1;
/* Check for missing generation numbers. */
for (size_t i = 0; i < bases_nr; i++) {
struct commit *c = bases[i];
repo_parse_commit(r, c);
if (commit_graph_generation(c) == GENERATION_NUMBER_INFINITY)
found_missing_gen = 1;
}
if (found_missing_gen) {
struct commit **commits;
size_t commits_nr = bases_nr + 1;
CALLOC_ARRAY(commits, commits_nr);
COPY_ARRAY(commits, bases, bases_nr);
commits[bases_nr] = tip;
ensure_generations_valid(r, commits, commits_nr);
free(commits);
}
/* Initialize queue and slab now that generations are guaranteed. */
init_best_branch_base(&best_branch_base);
set_best(tip, -1);
prio_queue_put(&queue, tip);
for (size_t i = 0; i < bases_nr; i++) {
struct commit *c = bases[i];
int best = get_best(c);
/* Has this already been marked as best by another commit? */
if (best) {
if (best == -1) {
/* We agree at this position. Stop now. */
best_index = i + 1;
goto cleanup;
}
continue;
}
set_best(c, i + 1);
prio_queue_put(&queue, c);
}
while (queue.nr) {
struct commit *c = prio_queue_get(&queue);
int best_for_c = get_best(c);
int best_for_p, positive;
struct commit *parent;
/* Have we reached a known branch point? It's optimal. */
if (c == branch_point)
break;
repo_parse_commit(r, c);
if (!c->parents)
continue;
parent = c->parents->item;
repo_parse_commit(r, parent);
best_for_p = get_best(parent);
if (!best_for_p) {
/* 'parent' is new, so pass along best_for_c. */
set_best(parent, best_for_c);
prio_queue_put(&queue, parent);
continue;
}
if (best_for_p > 0 && best_for_c > 0) {
/* Collision among bases. Minimize. */
if (best_for_c < best_for_p)
set_best(parent, best_for_c);
continue;
}
/*
* At this point, we have reached a commit that is reachable
* from the tip, either from 'c' or from an earlier commit to
* have 'parent' as its first parent.
*
* Update 'best_index' to match the minimum of all base indices
* to reach 'parent'.
*/
/* Exactly one is positive due to initial conditions. */
positive = (best_for_c < 0) ? best_for_p : best_for_c;
if (best_index < 0 || positive < best_index)
best_index = positive;
/* No matter what, track that the parent is reachable from tip. */
set_best(parent, -1);
branch_point = parent;
}
cleanup:
clear_best_branch_base(&best_branch_base);
clear_prio_queue(&queue);
return best_index > 0 ? best_index - 1 : -1;
}

View File

@ -139,4 +139,21 @@ void tips_reachable_from_bases(struct repository *r,
struct commit **tips, size_t tips_nr,
int mark);
/*
* Given a 'tip' commit and a list potential 'bases', return the index 'i' that
* minimizes the number of commits in the first-parent history of 'tip' and not
* in the first-parent history of 'bases[i]'.
*
* Among a list of long-lived branches that are updated only by merges (with the
* first parent being the previous position of the branch), this would inform
* which branch was used to create the tip reference.
*
* Returns -1 if no common point is found in first-parent histories, which is
* rare, but possible with multiple root commits.
*/
int get_branch_base_for_tip(struct repository *r,
struct commit *tip,
struct commit **bases,
size_t bases_nr);
#endif

View File

@ -84,13 +84,19 @@ struct commit *lookup_commit(struct repository *r, const struct object_id *oid)
}
struct commit *lookup_commit_reference_by_name(const char *name)
{
return lookup_commit_reference_by_name_gently(name, 0);
}
struct commit *lookup_commit_reference_by_name_gently(const char *name,
int quiet)
{
struct object_id oid;
struct commit *commit;
if (repo_get_oid_committish(the_repository, name, &oid))
return NULL;
commit = lookup_commit_reference(the_repository, &oid);
commit = lookup_commit_reference_gently(the_repository, &oid, quiet);
if (repo_parse_commit(the_repository, commit))
return NULL;
return commit;

View File

@ -81,6 +81,8 @@ struct commit *lookup_commit_reference_gently(struct repository *r,
const struct object_id *oid,
int quiet);
struct commit *lookup_commit_reference_by_name(const char *name);
struct commit *lookup_commit_reference_by_name_gently(const char *name,
int quiet);
/*
* Look up object named by "oid", dereference tag as necessary,

View File

@ -169,6 +169,7 @@ enum atom_type {
ATOM_ELSE,
ATOM_REST,
ATOM_AHEADBEHIND,
ATOM_ISBASE,
};
/*
@ -890,6 +891,23 @@ static int ahead_behind_atom_parser(struct ref_format *format,
return 0;
}
static int is_base_atom_parser(struct ref_format *format,
struct used_atom *atom UNUSED,
const char *arg, struct strbuf *err)
{
struct string_list_item *item;
if (!arg)
return strbuf_addf_ret(err, -1, _("expected format: %%(is-base:<committish>)"));
item = string_list_append(&format->is_base_tips, arg);
item->util = lookup_commit_reference_by_name(arg);
if (!item->util)
die("failed to find '%s'", arg);
return 0;
}
static int head_atom_parser(struct ref_format *format UNUSED,
struct used_atom *atom,
const char *arg, struct strbuf *err)
@ -955,6 +973,7 @@ static struct {
[ATOM_ELSE] = { "else", SOURCE_NONE },
[ATOM_REST] = { "rest", SOURCE_NONE, FIELD_STR, rest_atom_parser },
[ATOM_AHEADBEHIND] = { "ahead-behind", SOURCE_OTHER, FIELD_STR, ahead_behind_atom_parser },
[ATOM_ISBASE] = { "is-base", SOURCE_OTHER, FIELD_STR, is_base_atom_parser },
/*
* Please update $__git_ref_fieldlist in git-completion.bash
* when you add new atoms
@ -2340,6 +2359,7 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
int i;
struct object_info empty = OBJECT_INFO_INIT;
int ahead_behind_atoms = 0;
int is_base_atoms = 0;
CALLOC_ARRAY(ref->value, used_atom_cnt);
@ -2489,6 +2509,15 @@ static int populate_value(struct ref_array_item *ref, struct strbuf *err)
v->s = xstrdup("");
}
continue;
} else if (atom_type == ATOM_ISBASE) {
if (ref->is_base && ref->is_base[is_base_atoms]) {
v->s = xstrfmt("(%s)", ref->is_base[is_base_atoms]);
free(ref->is_base[is_base_atoms]);
} else {
v->s = xstrdup("");
}
is_base_atoms++;
continue;
} else
continue;
@ -2895,6 +2924,7 @@ static void free_array_item(struct ref_array_item *item)
free(item->value);
}
free(item->counts);
free(item->is_base);
free(item);
}
@ -3059,6 +3089,49 @@ void filter_ahead_behind(struct repository *r,
free(commits);
}
void filter_is_base(struct repository *r,
struct ref_format *format,
struct ref_array *array)
{
struct commit **bases;
size_t bases_nr = 0;
struct ref_array_item **back_index;
if (!format->is_base_tips.nr || !array->nr)
return;
CALLOC_ARRAY(back_index, array->nr);
CALLOC_ARRAY(bases, array->nr);
for (size_t i = 0; i < array->nr; i++) {
const char *name = array->items[i]->refname;
struct commit *c = lookup_commit_reference_by_name_gently(name, 1);
CALLOC_ARRAY(array->items[i]->is_base, format->is_base_tips.nr);
if (!c)
continue;
back_index[bases_nr] = array->items[i];
bases[bases_nr] = c;
bases_nr++;
}
for (size_t i = 0; i < format->is_base_tips.nr; i++) {
struct commit *tip = format->is_base_tips.items[i].util;
int base_index = get_branch_base_for_tip(r, tip, bases, bases_nr);
if (base_index < 0)
continue;
/* Store the string for use in output later. */
back_index[base_index]->is_base[i] = xstrdup(format->is_base_tips.items[i].string);
}
free(back_index);
free(bases);
}
static int do_filter_refs(struct ref_filter *filter, unsigned int type, each_ref_fn fn, void *cb_data)
{
int ret = 0;
@ -3152,7 +3225,8 @@ static inline int can_do_iterative_format(struct ref_filter *filter,
return !(filter->reachable_from ||
filter->unreachable_from ||
sorting ||
format->bases.nr);
format->bases.nr ||
format->is_base_tips.nr);
}
void filter_and_format_refs(struct ref_filter *filter, unsigned int type,
@ -3176,6 +3250,7 @@ void filter_and_format_refs(struct ref_filter *filter, unsigned int type,
struct ref_array array = { 0 };
filter_refs(&array, filter, type);
filter_ahead_behind(the_repository, format, &array);
filter_is_base(the_repository, format, &array);
ref_array_sort(sorting, &array);
print_formatted_ref_array(&array, format);
ref_array_clear(&array);

View File

@ -48,6 +48,7 @@ struct ref_array_item {
struct commit *commit;
struct atom_value *value;
struct ahead_behind_count **counts;
char **is_base;
char refname[FLEX_ARRAY];
};
@ -101,6 +102,9 @@ struct ref_format {
/* List of bases for ahead-behind counts. */
struct string_list bases;
/* List of bases for is-base indicators. */
struct string_list is_base_tips;
struct {
int max_count;
int omit_empty;
@ -114,6 +118,7 @@ struct ref_format {
#define REF_FORMAT_INIT { \
.use_color = -1, \
.bases = STRING_LIST_INIT_DUP, \
.is_base_tips = STRING_LIST_INIT_DUP, \
}
/* Macros for checking --merged and --no-merged options */
@ -203,6 +208,16 @@ void filter_ahead_behind(struct repository *r,
struct ref_format *format,
struct ref_array *array);
/*
* If the provided format includes is-base atoms, then compute the base checks
* for those tips against all refs.
*
* If this is not called, then any is-base atoms will be blank.
*/
void filter_is_base(struct repository *r,
struct ref_format *format,
struct ref_array *array);
void ref_filter_init(struct ref_filter *filter);
void ref_filter_clear(struct ref_filter *filter);

View File

@ -116,6 +116,8 @@ int cmd__reach(int ac, const char **av)
repo_in_merge_bases_many(the_repository, A, X_nr, X_array, 0));
else if (!strcmp(av[1], "is_descendant_of"))
printf("%s(A,X):%d\n", av[1], repo_is_descendant_of(r, A, X));
else if (!strcmp(av[1], "get_branch_base_for_tip"))
printf("%s(A,X):%d\n", av[1], get_branch_base_for_tip(r, A, X_array, X_nr));
else if (!strcmp(av[1], "get_merge_bases_many")) {
struct commit_list *list = NULL;
if (repo_get_merge_bases_many(the_repository,

View File

@ -20,6 +20,21 @@ test_expect_success 'setup' '
echo tag-$ref ||
return 1
done >tags &&
echo "A:HEAD" >test-tool-refs &&
for line in $(cat refs)
do
echo "X:$line" >>test-tool-refs || return 1
done &&
echo "A:HEAD" >test-tool-tags &&
for line in $(cat tags)
do
echo "X:$line" >>test-tool-tags || return 1
done &&
commit=$(git commit-tree $(git rev-parse HEAD^{tree})) &&
git update-ref refs/heads/disjoint-base $commit &&
git commit-graph write --reachable
'
@ -47,4 +62,20 @@ test_perf 'contains: git tag --merged' '
xargs git tag --merged=HEAD <tags
'
test_perf 'is-base check: test-tool reach (refs)' '
test-tool reach get_branch_base_for_tip <test-tool-refs
'
test_perf 'is-base check: test-tool reach (tags)' '
test-tool reach get_branch_base_for_tip <test-tool-tags
'
test_perf 'is-base check: git for-each-ref' '
git for-each-ref --format="%(is-base:HEAD)" --stdin <refs
'
test_perf 'is-base check: git for-each-ref (disjoint-base)' '
git for-each-ref --format="%(is-base:refs/heads/disjoint-base)" --stdin <refs
'
test_done

View File

@ -1907,6 +1907,15 @@ test_expect_success 'git for-each-ref with nested tags' '
test_cmp expect actual
'
test_expect_success 'is-base atom with non-commits' '
git for-each-ref --format="%(is-base:HEAD) %(refname)" >out 2>err &&
grep "(HEAD) refs/heads/main" out &&
test_line_count = 2 err &&
grep "error: object .* is a commit, not a blob" err &&
grep "error: bad tag pointer to" err
'
GRADE_FORMAT="%(signature:grade)%0a%(signature:key)%0a%(signature:signer)%0a%(signature:fingerprint)%0a%(signature:primarykeyfingerprint)"
TRUSTLEVEL_FORMAT="%(signature:trustlevel)%0a%(signature:key)%0a%(signature:signer)%0a%(signature:fingerprint)%0a%(signature:primarykeyfingerprint)"

View File

@ -612,4 +612,125 @@ test_expect_success 'for-each-ref merged:none' '
--format="%(refname)" --stdin
'
# For get_branch_base_for_tip, we only care about
# first-parent history. Here is the test graph with
# second parents removed:
#
# (10,10)
# /
# (10,9) (9,10)
# / /
# (10,8) (9,9) (8,10)
# / / /
# ( continued...)
# \ / / /
# (3,1) (2,2) (1,3)
# \ / /
# (2,1) (1,2)
# \ /
# (1,1)
#
# In short, for a commit (i,j), the first-parent history
# walks all commits (i, k) with k from j to 1, then the
# commits (l, 1) with l from i to 1.
test_expect_success 'get_branch_base_for_tip: none reach' '
# (2,3) branched from the first tip (i,4) in X with i > 2
cat >input <<-\EOF &&
A:commit-2-3
X:commit-1-2
X:commit-1-4
X:commit-4-4
X:commit-8-4
X:commit-10-4
EOF
echo "get_branch_base_for_tip(A,X):2" >expect &&
test_all_modes get_branch_base_for_tip
'
test_expect_success 'get_branch_base_for_tip: equal to tip' '
# (2,3) branched from the first tip (i,4) in X with i > 2
cat >input <<-\EOF &&
A:commit-8-4
X:commit-1-2
X:commit-1-4
X:commit-4-4
X:commit-8-4
X:commit-10-4
EOF
echo "get_branch_base_for_tip(A,X):3" >expect &&
test_all_modes get_branch_base_for_tip
'
test_expect_success 'get_branch_base_for_tip: all reach tip' '
# (2,3) branched from the first tip (i,4) in X with i > 2
cat >input <<-\EOF &&
A:commit-4-1
X:commit-4-2
X:commit-5-1
EOF
echo "get_branch_base_for_tip(A,X):0" >expect &&
test_all_modes get_branch_base_for_tip
'
test_expect_success 'for-each-ref is-base: none reach' '
cat >input <<-\EOF &&
refs/heads/commit-1-1
refs/heads/commit-4-2
refs/heads/commit-4-4
refs/heads/commit-8-4
EOF
cat >expect <<-\EOF &&
refs/heads/commit-1-1:
refs/heads/commit-4-2:(commit-2-3)
refs/heads/commit-4-4:
refs/heads/commit-8-4:
EOF
run_all_modes git for-each-ref \
--format="%(refname):%(is-base:commit-2-3)" --stdin
'
test_expect_success 'for-each-ref is-base: all reach' '
cat >input <<-\EOF &&
refs/heads/commit-4-2
refs/heads/commit-5-1
EOF
cat >expect <<-\EOF &&
refs/heads/commit-4-2:(commit-4-1)
refs/heads/commit-5-1:
EOF
run_all_modes git for-each-ref \
--format="%(refname):%(is-base:commit-4-1)" --stdin
'
test_expect_success 'for-each-ref is-base: equal to tip' '
cat >input <<-\EOF &&
refs/heads/commit-4-2
refs/heads/commit-5-1
EOF
cat >expect <<-\EOF &&
refs/heads/commit-4-2:(commit-4-2)
refs/heads/commit-5-1:
EOF
run_all_modes git for-each-ref \
--format="%(refname):%(is-base:commit-4-2)" --stdin
'
test_expect_success 'for-each-ref is-base:multiple' '
cat >input <<-\EOF &&
refs/heads/commit-1-1
refs/heads/commit-4-2
refs/heads/commit-4-4
refs/heads/commit-8-4
EOF
cat >expect <<-\EOF &&
refs/heads/commit-1-1[-]
refs/heads/commit-4-2[(commit-2-3)-]
refs/heads/commit-4-4[-]
refs/heads/commit-8-4[-(commit-6-5)]
EOF
run_all_modes git for-each-ref \
--format="%(refname)[%(is-base:commit-2-3)-%(is-base:commit-6-5)]" --stdin
'
test_done