Merge branch 'jt/bulk-prefetch'

"git read-tree" had a codepath where blobs are fetched one-by-one
from the promisor remote, which has been corrected to fetch in bulk.

* jt/bulk-prefetch:
  cache-tree: prefetch in partial clone read-tree
  unpack-trees: refactor prefetching code
This commit is contained in:
Junio C Hamano 2021-08-02 14:06:42 -07:00
commit 8230107f33
5 changed files with 82 additions and 21 deletions

View File

@ -237,6 +237,11 @@ int cache_tree_fully_valid(struct cache_tree *it)
return 1; return 1;
} }
static int must_check_existence(const struct cache_entry *ce)
{
return !(has_promisor_remote() && ce_skip_worktree(ce));
}
static int update_one(struct cache_tree *it, static int update_one(struct cache_tree *it,
struct cache_entry **cache, struct cache_entry **cache,
int entries, int entries,
@ -378,8 +383,7 @@ static int update_one(struct cache_tree *it,
} }
ce_missing_ok = mode == S_IFGITLINK || missing_ok || ce_missing_ok = mode == S_IFGITLINK || missing_ok ||
(has_promisor_remote() && !must_check_existence(ce);
ce_skip_worktree(ce));
if (is_null_oid(oid) || if (is_null_oid(oid) ||
(!ce_missing_ok && !has_object_file(oid))) { (!ce_missing_ok && !has_object_file(oid))) {
strbuf_release(&buffer); strbuf_release(&buffer);
@ -466,6 +470,9 @@ int cache_tree_update(struct index_state *istate, int flags)
if (!istate->cache_tree) if (!istate->cache_tree)
istate->cache_tree = cache_tree(); istate->cache_tree = cache_tree();
if (!(flags & WRITE_TREE_MISSING_OK) && has_promisor_remote())
prefetch_cache_entries(istate, must_check_existence);
trace_performance_enter(); trace_performance_enter();
trace2_region_enter("cache_tree", "update", the_repository); trace2_region_enter("cache_tree", "update", the_repository);
i = update_one(istate->cache_tree, istate->cache, istate->cache_nr, i = update_one(istate->cache_tree, istate->cache, istate->cache_nr,

View File

@ -410,6 +410,15 @@ struct cache_entry *dup_cache_entry(const struct cache_entry *ce, struct index_s
*/ */
void validate_cache_entries(const struct index_state *istate); void validate_cache_entries(const struct index_state *istate);
/*
* Bulk prefetch all missing cache entries that are not GITLINKs and that match
* the given predicate. This function should only be called if
* has_promisor_remote() returns true.
*/
typedef int (*must_prefetch_predicate)(const struct cache_entry *);
void prefetch_cache_entries(const struct index_state *istate,
must_prefetch_predicate must_prefetch);
#ifdef USE_THE_INDEX_COMPATIBILITY_MACROS #ifdef USE_THE_INDEX_COMPATIBILITY_MACROS
extern struct index_state the_index; extern struct index_state the_index;

View File

@ -27,6 +27,7 @@
#include "progress.h" #include "progress.h"
#include "sparse-index.h" #include "sparse-index.h"
#include "csum-file.h" #include "csum-file.h"
#include "promisor-remote.h"
/* Mask for the name length in ce_flags in the on-disk index */ /* Mask for the name length in ce_flags in the on-disk index */
@ -3663,3 +3664,25 @@ static void write_ieot_extension(struct strbuf *sb, struct index_entry_offset_ta
strbuf_add(sb, &buffer, sizeof(uint32_t)); strbuf_add(sb, &buffer, sizeof(uint32_t));
} }
} }
void prefetch_cache_entries(const struct index_state *istate,
must_prefetch_predicate must_prefetch)
{
int i;
struct oid_array to_fetch = OID_ARRAY_INIT;
for (i = 0; i < istate->cache_nr; i++) {
struct cache_entry *ce = istate->cache[i];
if (S_ISGITLINK(ce->ce_mode) || !must_prefetch(ce))
continue;
if (!oid_object_info_extended(the_repository, &ce->oid,
NULL,
OBJECT_INFO_FOR_PREFETCH))
continue;
oid_array_append(&to_fetch, &ce->oid);
}
promisor_remote_get_direct(the_repository,
to_fetch.oid, to_fetch.nr);
oid_array_clear(&to_fetch);
}

View File

@ -0,0 +1,33 @@
#!/bin/sh
test_description='git read-tree in partial clones'
TEST_NO_CREATE_REPO=1
GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME=main
export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME
. ./test-lib.sh
test_expect_success 'read-tree in partial clone prefetches in one batch' '
test_when_finished "rm -rf server client trace" &&
git init server &&
echo foo >server/one &&
echo bar >server/two &&
git -C server add one two &&
git -C server commit -m "initial commit" &&
TREE=$(git -C server rev-parse HEAD^{tree}) &&
git -C server config uploadpack.allowfilter 1 &&
git -C server config uploadpack.allowanysha1inwant 1 &&
git clone --bare --filter=blob:none "file://$(pwd)/server" client &&
GIT_TRACE_PACKET="$(pwd)/trace" git -C client read-tree $TREE &&
# "done" marks the end of negotiation (once per fetch). Expect that
# only one fetch occurs.
grep "fetch> done" trace >donelines &&
test_line_count = 1 donelines
'
test_done

View File

@ -392,6 +392,11 @@ static void report_collided_checkout(struct index_state *index)
string_list_clear(&list, 0); string_list_clear(&list, 0);
} }
static int must_checkout(const struct cache_entry *ce)
{
return ce->ce_flags & CE_UPDATE;
}
static int check_updates(struct unpack_trees_options *o, static int check_updates(struct unpack_trees_options *o,
struct index_state *index) struct index_state *index)
{ {
@ -442,28 +447,12 @@ static int check_updates(struct unpack_trees_options *o,
if (should_update_submodules()) if (should_update_submodules())
load_gitmodules_file(index, &state); load_gitmodules_file(index, &state);
if (has_promisor_remote()) { if (has_promisor_remote())
/* /*
* Prefetch the objects that are to be checked out in the loop * Prefetch the objects that are to be checked out in the loop
* below. * below.
*/ */
struct oid_array to_fetch = OID_ARRAY_INIT; prefetch_cache_entries(index, must_checkout);
for (i = 0; i < index->cache_nr; i++) {
struct cache_entry *ce = index->cache[i];
if (!(ce->ce_flags & CE_UPDATE) ||
S_ISGITLINK(ce->ce_mode))
continue;
if (!oid_object_info_extended(the_repository, &ce->oid,
NULL,
OBJECT_INFO_FOR_PREFETCH))
continue;
oid_array_append(&to_fetch, &ce->oid);
}
promisor_remote_get_direct(the_repository,
to_fetch.oid, to_fetch.nr);
oid_array_clear(&to_fetch);
}
get_parallel_checkout_configs(&pc_workers, &pc_threshold); get_parallel_checkout_configs(&pc_workers, &pc_threshold);
@ -473,7 +462,7 @@ static int check_updates(struct unpack_trees_options *o,
for (i = 0; i < index->cache_nr; i++) { for (i = 0; i < index->cache_nr; i++) {
struct cache_entry *ce = index->cache[i]; struct cache_entry *ce = index->cache[i];
if (ce->ce_flags & CE_UPDATE) { if (must_checkout(ce)) {
size_t last_pc_queue_size = pc_queue_size(); size_t last_pc_queue_size = pc_queue_size();
if (ce->ce_flags & CE_WT_REMOVE) if (ce->ce_flags & CE_WT_REMOVE)