Merge branch 'ds/line-log-on-bloom'

"git log -L..." now takes advantage of the "which paths are touched
by this commit?" info stored in the commit-graph system.

* ds/line-log-on-bloom:
  line-log: integrate with changed-path Bloom filters
  line-log: try to use generation number-based topo-ordering
  line-log: more responsive, incremental 'git log -L'
  t4211-line-log: add tests for parent oids
  line-log: remove unused fields from 'struct line_log_data'
This commit is contained in:
Junio C Hamano 2020-06-08 18:06:26 -07:00
commit c3a02824cf
6 changed files with 152 additions and 13 deletions

View File

@ -138,6 +138,11 @@ void fill_bloom_key(const char *data,
key->hashes[i] = hash0 + i * hash1;
}
void clear_bloom_key(struct bloom_key *key)
{
FREE_AND_NULL(key->hashes);
}
void add_key_to_filter(const struct bloom_key *key,
struct bloom_filter *filter,
const struct bloom_filter_settings *settings)

View File

@ -72,6 +72,7 @@ void fill_bloom_key(const char *data,
size_t len,
struct bloom_key *key,
const struct bloom_filter_settings *settings);
void clear_bloom_key(struct bloom_key *key);
void add_key_to_filter(const struct bloom_key *key,
struct bloom_filter *filter,

View File

@ -15,6 +15,7 @@
#include "userdiff.h"
#include "line-log.h"
#include "argv-array.h"
#include "bloom.h"
static void range_set_grow(struct range_set *rs, size_t extra)
{
@ -1146,6 +1147,37 @@ int line_log_print(struct rev_info *rev, struct commit *commit)
return 1;
}
static int bloom_filter_check(struct rev_info *rev,
struct commit *commit,
struct line_log_data *range)
{
struct bloom_filter *filter;
struct bloom_key key;
int result = 0;
if (!commit->parents)
return 1;
if (!rev->bloom_filter_settings ||
!(filter = get_bloom_filter(rev->repo, commit, 0)))
return 1;
if (!range)
return 0;
while (!result && range) {
fill_bloom_key(range->path, strlen(range->path), &key, rev->bloom_filter_settings);
if (bloom_filter_contains(filter, &key, rev->bloom_filter_settings))
result = 1;
clear_bloom_key(&key);
range = range->next;
}
return result;
}
static int process_ranges_ordinary_commit(struct rev_info *rev, struct commit *commit,
struct line_log_data *range)
{
@ -1159,6 +1191,7 @@ static int process_ranges_ordinary_commit(struct rev_info *rev, struct commit *c
queue_diffs(range, &rev->diffopt, &queue, commit, parent);
changed = process_all_files(&parent_range, rev, &queue, range);
if (parent)
add_line_range(rev, parent, parent_range);
free_line_log_data(parent_range);
@ -1227,13 +1260,17 @@ static int process_ranges_merge_commit(struct rev_info *rev, struct commit *comm
/* NEEDSWORK leaking like a sieve */
}
static int process_ranges_arbitrary_commit(struct rev_info *rev, struct commit *commit)
int line_log_process_ranges_arbitrary_commit(struct rev_info *rev, struct commit *commit)
{
struct line_log_data *range = lookup_line_range(rev, commit);
int changed = 0;
if (range) {
if (!commit->parents || !commit->parents->next)
if (commit->parents && !bloom_filter_check(rev, commit, range)) {
struct line_log_data *prange = line_log_data_copy(range);
add_line_range(rev, commit->parents->item, prange);
clear_commit_line_range(rev, commit);
} else if (!commit->parents || !commit->parents->next)
changed = process_ranges_ordinary_commit(rev, commit, range);
else
changed = process_ranges_merge_commit(rev, commit, range);
@ -1270,7 +1307,7 @@ int line_log_filter(struct rev_info *rev)
while (list) {
struct commit_list *to_free = NULL;
commit = list->item;
if (process_ranges_arbitrary_commit(rev, commit)) {
if (line_log_process_ranges_arbitrary_commit(rev, commit)) {
*pp = list;
pp = &list->next;
} else

View File

@ -46,10 +46,7 @@ void sort_and_merge_range_set(struct range_set *);
struct line_log_data {
struct line_log_data *next;
char *path;
char status;
struct range_set ranges;
int arg_alloc, arg_nr;
const char **args;
struct diff_filepair *pair;
struct diff_ranges diff;
};
@ -57,6 +54,8 @@ struct line_log_data {
void line_log_init(struct rev_info *rev, const char *prefix, struct string_list *args);
int line_log_filter(struct rev_info *rev);
int line_log_process_ranges_arbitrary_commit(struct rev_info *rev,
struct commit *commit);
int line_log_print(struct rev_info *rev, struct commit *commit);

View File

@ -39,6 +39,8 @@ static const char *term_good;
implement_shared_commit_slab(revision_sources, char *);
static inline int want_ancestry(const struct rev_info *revs);
void show_object_with_name(FILE *out, struct object *obj, const char *name)
{
const char *p;
@ -687,6 +689,9 @@ static void prepare_to_use_bloom_filter(struct rev_info *revs)
if (!revs->bloom_filter_settings)
return;
if (!revs->pruning.pathspec.nr)
return;
pi = &revs->pruning.pathspec.items[0];
last_index = pi->len - 1;
@ -2810,6 +2815,12 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
if (revs->diffopt.objfind)
revs->simplify_history = 0;
if (revs->line_level_traverse) {
if (want_ancestry(revs))
revs->limited = 1;
revs->topo_order = 1;
}
if (revs->topo_order && !generation_numbers_enabled(the_repository))
revs->limited = 1;
@ -2829,11 +2840,6 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s
revs->diffopt.abbrev = revs->abbrev;
if (revs->line_level_traverse) {
revs->limited = 1;
revs->topo_order = 1;
}
diff_setup_done(&revs->diffopt);
grep_commit_pattern_type(GREP_PATTERN_TYPE_UNSPECIFIED,
@ -3521,7 +3527,7 @@ int prepare_revision_walk(struct rev_info *revs)
FOR_EACH_OBJECT_PROMISOR_ONLY);
}
if (revs->pruning.pathspec.nr == 1 && !revs->reflog_info)
if (!revs->reflog_info)
prepare_to_use_bloom_filter(revs);
if (revs->no_walk != REVISION_WALK_NO_WALK_UNSORTED)
commit_list_sort_by_date(&revs->commits);
@ -3534,7 +3540,14 @@ int prepare_revision_walk(struct rev_info *revs)
sort_in_topological_order(&revs->commits, revs->sort_order);
} else if (revs->topo_order)
init_topo_walk(revs);
if (revs->line_level_traverse)
if (revs->line_level_traverse && want_ancestry(revs))
/*
* At the moment we can only do line-level log with parent
* rewriting by performing this expensive pre-filtering step.
* If parent rewriting is not requested, then we rather
* perform the line-level log filtering during the regular
* history traversal.
*/
line_log_filter(revs);
if (revs->simplify_merges)
simplify_merges(revs);
@ -3745,6 +3758,22 @@ enum commit_action get_commit_action(struct rev_info *revs, struct commit *commi
return commit_ignore;
if (commit->object.flags & UNINTERESTING)
return commit_ignore;
if (revs->line_level_traverse && !want_ancestry(revs)) {
/*
* In case of line-level log with parent rewriting
* prepare_revision_walk() already took care of all line-level
* log filtering, and there is nothing left to do here.
*
* If parent rewriting was not requested, then this is the
* place to perform the line-level log filtering. Notably,
* this check, though expensive, must come before the other,
* cheaper filtering conditions, because the tracked line
* ranges must be adjusted even when the commit will end up
* being ignored based on other conditions.
*/
if (!line_log_process_ranges_arbitrary_commit(revs, commit))
return commit_ignore;
}
if (revs->min_age != -1 &&
comparison_date(revs, commit) > revs->min_age)
return commit_ignore;

View File

@ -215,4 +215,72 @@ test_expect_success 'fancy rename following #2' '
test_cmp expect actual
'
# Create the following linear history, where each commit does what its
# subject line promises:
#
# * 66c6410 Modify func2() in file.c
# * 50834e5 Modify other-file
# * fe5851c Modify func1() in file.c
# * 8c7c7dd Add other-file
# * d5f4417 Add func1() and func2() in file.c
test_expect_success 'setup for checking line-log and parent oids' '
git checkout --orphan parent-oids &&
git reset --hard &&
cat >file.c <<-\EOF &&
int func1()
{
return F1;
}
int func2()
{
return F2;
}
EOF
git add file.c &&
test_tick &&
git commit -m "Add func1() and func2() in file.c" &&
echo 1 >other-file &&
git add other-file &&
git commit -m "Add other-file" &&
sed -e "s/F1/F1 + 1/" file.c >tmp &&
mv tmp file.c &&
git commit -a -m "Modify func1() in file.c" &&
echo 2 >other-file &&
git commit -a -m "Modify other-file" &&
sed -e "s/F2/F2 + 2/" file.c >tmp &&
mv tmp file.c &&
git commit -a -m "Modify func2() in file.c" &&
head_oid=$(git rev-parse --short HEAD) &&
prev_oid=$(git rev-parse --short HEAD^) &&
root_oid=$(git rev-parse --short HEAD~4)
'
# Parent oid should be from immediate parent.
test_expect_success 'parent oids without parent rewriting' '
cat >expect <<-EOF &&
$head_oid $prev_oid Modify func2() in file.c
$root_oid Add func1() and func2() in file.c
EOF
git log --format="%h %p %s" --no-patch -L:func2:file.c >actual &&
test_cmp expect actual
'
# Parent oid should be from the most recent ancestor touching func2(),
# i.e. in this case from the root commit.
test_expect_success 'parent oids with parent rewriting' '
cat >expect <<-EOF &&
$head_oid $root_oid Modify func2() in file.c
$root_oid Add func1() and func2() in file.c
EOF
git log --format="%h %p %s" --no-patch -L:func2:file.c --parents >actual &&
test_cmp expect actual
'
test_done