mirror of
https://github.com/git/git.git
synced 2025-01-08 16:43:56 +08:00
Merge branch 'ds/cache-tree-basics'
Document, clean-up and optimize the code around the cache-tree extension in the index. * ds/cache-tree-basics: cache-tree: speed up consecutive path comparisons cache-tree: use ce_namelen() instead of strlen() index-format: discuss recursion of cache-tree better index-format: update preamble to cache tree extension index-format: use 'cache tree' over 'cached tree' cache-tree: trace regions for prime_cache_tree cache-tree: trace regions for I/O cache-tree: use trace2 in cache_tree_update() unpack-trees: add trace2 regions tree-walk: report recursion counts
This commit is contained in:
commit
a0a2d75d3b
@ -26,7 +26,7 @@ Git index format
|
||||
Extensions are identified by signature. Optional extensions can
|
||||
be ignored if Git does not understand them.
|
||||
|
||||
Git currently supports cached tree and resolve undo extensions.
|
||||
Git currently supports cache tree and resolve undo extensions.
|
||||
|
||||
4-byte extension signature. If the first byte is 'A'..'Z' the
|
||||
extension is optional and can be ignored.
|
||||
@ -136,14 +136,35 @@ Git index format
|
||||
|
||||
== Extensions
|
||||
|
||||
=== Cached tree
|
||||
=== Cache tree
|
||||
|
||||
Cached tree extension contains pre-computed hashes for trees that can
|
||||
be derived from the index. It helps speed up tree object generation
|
||||
from index for a new commit.
|
||||
Since the index does not record entries for directories, the cache
|
||||
entries cannot describe tree objects that already exist in the object
|
||||
database for regions of the index that are unchanged from an existing
|
||||
commit. The cache tree extension stores a recursive tree structure that
|
||||
describes the trees that already exist and completely match sections of
|
||||
the cache entries. This speeds up tree object generation from the index
|
||||
for a new commit by only computing the trees that are "new" to that
|
||||
commit. It also assists when comparing the index to another tree, such
|
||||
as `HEAD^{tree}`, since sections of the index can be skipped when a tree
|
||||
comparison demonstrates equality.
|
||||
|
||||
When a path is updated in index, the path must be invalidated and
|
||||
removed from tree cache.
|
||||
The recursive tree structure uses nodes that store a number of cache
|
||||
entries, a list of subnodes, and an object ID (OID). The OID references
|
||||
the existing tree for that node, if it is known to exist. The subnodes
|
||||
correspond to subdirectories that themselves have cache tree nodes. The
|
||||
number of cache entries corresponds to the number of cache entries in
|
||||
the index that describe paths within that tree's directory.
|
||||
|
||||
The extension tracks the full directory structure in the cache tree
|
||||
extension, but this is generally smaller than the full cache entry list.
|
||||
|
||||
When a path is updated in index, Git invalidates all nodes of the
|
||||
recursive cache tree corresponding to the parent directories of that
|
||||
path. We store these tree nodes as being "invalid" by using "-1" as the
|
||||
number of cache entries. Invalid nodes still store a span of index
|
||||
entries, allowing Git to focus its efforts when reconstructing a full
|
||||
cache tree.
|
||||
|
||||
The signature for this extension is { 'T', 'R', 'E', 'E' }.
|
||||
|
||||
@ -174,7 +195,8 @@ Git index format
|
||||
first entry represents the root level of the repository, followed by the
|
||||
first subtree--let's call this A--of the root level (with its name
|
||||
relative to the root level), followed by the first subtree of A (with
|
||||
its name relative to A), ...
|
||||
its name relative to A), and so on. The specified number of subtrees
|
||||
indicates when the current level of the recursive stack is complete.
|
||||
|
||||
=== Resolve undo
|
||||
|
||||
|
30
cache-tree.c
30
cache-tree.c
@ -185,12 +185,14 @@ static int verify_cache(struct cache_entry **cache,
|
||||
* the cache is sorted. Also path can appear only once,
|
||||
* which means conflicting one would immediately follow.
|
||||
*/
|
||||
const char *this_name = cache[i]->name;
|
||||
const char *next_name = cache[i+1]->name;
|
||||
int this_len = strlen(this_name);
|
||||
if (this_len < strlen(next_name) &&
|
||||
strncmp(this_name, next_name, this_len) == 0 &&
|
||||
next_name[this_len] == '/') {
|
||||
const struct cache_entry *this_ce = cache[i];
|
||||
const struct cache_entry *next_ce = cache[i + 1];
|
||||
const char *this_name = this_ce->name;
|
||||
const char *next_name = next_ce->name;
|
||||
int this_len = ce_namelen(this_ce);
|
||||
if (this_len < ce_namelen(next_ce) &&
|
||||
next_name[this_len] == '/' &&
|
||||
strncmp(this_name, next_name, this_len) == 0) {
|
||||
if (10 < ++funny) {
|
||||
fprintf(stderr, "...\n");
|
||||
break;
|
||||
@ -442,7 +444,9 @@ int cache_tree_update(struct index_state *istate, int flags)
|
||||
if (i)
|
||||
return i;
|
||||
trace_performance_enter();
|
||||
trace2_region_enter("cache_tree", "update", the_repository);
|
||||
i = update_one(it, cache, entries, "", 0, &skip, flags);
|
||||
trace2_region_leave("cache_tree", "update", the_repository);
|
||||
trace_performance_leave("cache_tree_update");
|
||||
if (i < 0)
|
||||
return i;
|
||||
@ -492,7 +496,9 @@ static void write_one(struct strbuf *buffer, struct cache_tree *it,
|
||||
|
||||
void cache_tree_write(struct strbuf *sb, struct cache_tree *root)
|
||||
{
|
||||
trace2_region_enter("cache_tree", "write", the_repository);
|
||||
write_one(sb, root, "", 0);
|
||||
trace2_region_leave("cache_tree", "write", the_repository);
|
||||
}
|
||||
|
||||
static struct cache_tree *read_one(const char **buffer, unsigned long *size_p)
|
||||
@ -581,9 +587,16 @@ static struct cache_tree *read_one(const char **buffer, unsigned long *size_p)
|
||||
|
||||
struct cache_tree *cache_tree_read(const char *buffer, unsigned long size)
|
||||
{
|
||||
struct cache_tree *result;
|
||||
|
||||
if (buffer[0])
|
||||
return NULL; /* not the whole tree */
|
||||
return read_one(&buffer, &size);
|
||||
|
||||
trace2_region_enter("cache_tree", "read", the_repository);
|
||||
result = read_one(&buffer, &size);
|
||||
trace2_region_leave("cache_tree", "read", the_repository);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static struct cache_tree *cache_tree_find(struct cache_tree *it, const char *path)
|
||||
@ -733,10 +746,13 @@ void prime_cache_tree(struct repository *r,
|
||||
struct index_state *istate,
|
||||
struct tree *tree)
|
||||
{
|
||||
trace2_region_enter("cache-tree", "prime_cache_tree", the_repository);
|
||||
cache_tree_free(&istate->cache_tree);
|
||||
istate->cache_tree = cache_tree();
|
||||
|
||||
prime_cache_tree_rec(r, istate->cache_tree, tree);
|
||||
istate->cache_changed |= CACHE_TREE_CHANGED;
|
||||
trace2_region_leave("cache-tree", "prime_cache_tree", the_repository);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -33,7 +33,7 @@ test_expect_success 'reset --hard should restore unmerged ones' '
|
||||
|
||||
'
|
||||
|
||||
test_expect_success 'reset --hard did not corrupt index or cached-tree' '
|
||||
test_expect_success 'reset --hard did not corrupt index or cache-tree' '
|
||||
|
||||
T=$(git write-tree) &&
|
||||
rm -f .git/index &&
|
||||
|
33
tree-walk.c
33
tree-walk.c
@ -4,6 +4,7 @@
|
||||
#include "object-store.h"
|
||||
#include "tree.h"
|
||||
#include "pathspec.h"
|
||||
#include "json-writer.h"
|
||||
|
||||
static const char *get_mode(const char *str, unsigned int *modep)
|
||||
{
|
||||
@ -167,6 +168,25 @@ int tree_entry_gently(struct tree_desc *desc, struct name_entry *entry)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int traverse_trees_atexit_registered;
|
||||
static int traverse_trees_count;
|
||||
static int traverse_trees_cur_depth;
|
||||
static int traverse_trees_max_depth;
|
||||
|
||||
static void trace2_traverse_trees_statistics_atexit(void)
|
||||
{
|
||||
struct json_writer jw = JSON_WRITER_INIT;
|
||||
|
||||
jw_object_begin(&jw, 0);
|
||||
jw_object_intmax(&jw, "traverse_trees_count", traverse_trees_count);
|
||||
jw_object_intmax(&jw, "traverse_trees_max_depth", traverse_trees_max_depth);
|
||||
jw_end(&jw);
|
||||
|
||||
trace2_data_json("traverse_trees", the_repository, "statistics", &jw);
|
||||
|
||||
jw_release(&jw);
|
||||
}
|
||||
|
||||
void setup_traverse_info(struct traverse_info *info, const char *base)
|
||||
{
|
||||
size_t pathlen = strlen(base);
|
||||
@ -180,6 +200,11 @@ void setup_traverse_info(struct traverse_info *info, const char *base)
|
||||
info->namelen = pathlen;
|
||||
if (pathlen)
|
||||
info->prev = &dummy;
|
||||
|
||||
if (trace2_is_enabled() && !traverse_trees_atexit_registered) {
|
||||
atexit(trace2_traverse_trees_statistics_atexit);
|
||||
traverse_trees_atexit_registered = 1;
|
||||
}
|
||||
}
|
||||
|
||||
char *make_traverse_path(char *path, size_t pathlen,
|
||||
@ -416,6 +441,12 @@ int traverse_trees(struct index_state *istate,
|
||||
int interesting = 1;
|
||||
char *traverse_path;
|
||||
|
||||
traverse_trees_count++;
|
||||
traverse_trees_cur_depth++;
|
||||
|
||||
if (traverse_trees_cur_depth > traverse_trees_max_depth)
|
||||
traverse_trees_max_depth = traverse_trees_cur_depth;
|
||||
|
||||
if (n >= ARRAY_SIZE(entry))
|
||||
BUG("traverse_trees() called with too many trees (%d)", n);
|
||||
|
||||
@ -515,6 +546,8 @@ int traverse_trees(struct index_state *istate,
|
||||
free(traverse_path);
|
||||
info->traverse_path = NULL;
|
||||
strbuf_release(&base);
|
||||
|
||||
traverse_trees_cur_depth--;
|
||||
return error;
|
||||
}
|
||||
|
||||
|
@ -1580,6 +1580,8 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
|
||||
die("unpack_trees takes at most %d trees", MAX_UNPACK_TREES);
|
||||
|
||||
trace_performance_enter();
|
||||
trace2_region_enter("unpack_trees", "unpack_trees", the_repository);
|
||||
|
||||
if (!core_apply_sparse_checkout || !o->update)
|
||||
o->skip_sparse_checkout = 1;
|
||||
if (!o->skip_sparse_checkout && !o->pl) {
|
||||
@ -1653,7 +1655,9 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
|
||||
}
|
||||
|
||||
trace_performance_enter();
|
||||
trace2_region_enter("unpack_trees", "traverse_trees", the_repository);
|
||||
ret = traverse_trees(o->src_index, len, t, &info);
|
||||
trace2_region_leave("unpack_trees", "traverse_trees", the_repository);
|
||||
trace_performance_leave("traverse_trees");
|
||||
if (ret < 0)
|
||||
goto return_failed;
|
||||
@ -1741,6 +1745,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options
|
||||
done:
|
||||
if (free_pattern_list)
|
||||
clear_pattern_list(&pl);
|
||||
trace2_region_leave("unpack_trees", "unpack_trees", the_repository);
|
||||
trace_performance_leave("unpack_trees");
|
||||
return ret;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user