mirror of
https://github.com/git/git.git
synced 2024-11-27 20:14:30 +08:00
Merge branch 'jk/pack-delta-reuse-with-bitmap'
When creating a thin pack, which allows objects to be made into a delta against another object that is not in the resulting pack but is known to be present on the receiving end, the code learned to take advantage of the reachability bitmap; this allows the server to send a delta against a base beyond the "boundary" commit. * jk/pack-delta-reuse-with-bitmap: pack-objects: reuse on-disk deltas for thin "have" objects pack-bitmap: save "have" bitmap from walk t/perf: add perf tests for fetches from a bitmapped server t/perf: add infrastructure for measuring sizes t/perf: factor out percent calculations t/perf: factor boilerplate out of test_perf
This commit is contained in:
commit
3ebdef2e1b
@ -41,6 +41,7 @@
|
||||
#define DELTA_CHILD(obj) oe_delta_child(&to_pack, obj)
|
||||
#define DELTA_SIBLING(obj) oe_delta_sibling(&to_pack, obj)
|
||||
#define SET_DELTA(obj, val) oe_set_delta(&to_pack, obj, val)
|
||||
#define SET_DELTA_EXT(obj, oid) oe_set_delta_ext(&to_pack, obj, oid)
|
||||
#define SET_DELTA_SIZE(obj, val) oe_set_delta_size(&to_pack, obj, val)
|
||||
#define SET_DELTA_CHILD(obj, val) oe_set_delta_child(&to_pack, obj, val)
|
||||
#define SET_DELTA_SIBLING(obj, val) oe_set_delta_sibling(&to_pack, obj, val)
|
||||
@ -60,6 +61,7 @@ static struct packing_data to_pack;
|
||||
|
||||
static struct pack_idx_entry **written_list;
|
||||
static uint32_t nr_result, nr_written, nr_seen;
|
||||
static struct bitmap_index *bitmap_git;
|
||||
|
||||
static int non_empty;
|
||||
static int reuse_delta = 1, reuse_object = 1;
|
||||
@ -80,6 +82,7 @@ static unsigned long pack_size_limit;
|
||||
static int depth = 50;
|
||||
static int delta_search_threads;
|
||||
static int pack_to_stdout;
|
||||
static int thin;
|
||||
static int num_preferred_base;
|
||||
static struct progress *progress_state;
|
||||
|
||||
@ -1538,11 +1541,15 @@ static void check_object(struct object_entry *entry)
|
||||
break;
|
||||
}
|
||||
|
||||
if (base_ref && (base_entry = packlist_find(&to_pack, base_ref, NULL))) {
|
||||
if (base_ref && (
|
||||
(base_entry = packlist_find(&to_pack, base_ref, NULL)) ||
|
||||
(thin &&
|
||||
bitmap_has_sha1_in_uninteresting(bitmap_git, base_ref)))) {
|
||||
/*
|
||||
* If base_ref was set above that means we wish to
|
||||
* reuse delta data, and we even found that base
|
||||
* in the list of objects we want to pack. Goodie!
|
||||
* reuse delta data, and either we found that object in
|
||||
* the list of objects we want to pack, or it's one we
|
||||
* know the receiver has.
|
||||
*
|
||||
* Depth value does not matter - find_deltas() will
|
||||
* never consider reused delta as the base object to
|
||||
@ -1551,10 +1558,16 @@ static void check_object(struct object_entry *entry)
|
||||
*/
|
||||
oe_set_type(entry, entry->in_pack_type);
|
||||
SET_SIZE(entry, in_pack_size); /* delta size */
|
||||
SET_DELTA(entry, base_entry);
|
||||
SET_DELTA_SIZE(entry, in_pack_size);
|
||||
entry->delta_sibling_idx = base_entry->delta_child_idx;
|
||||
SET_DELTA_CHILD(base_entry, entry);
|
||||
|
||||
if (base_entry) {
|
||||
SET_DELTA(entry, base_entry);
|
||||
entry->delta_sibling_idx = base_entry->delta_child_idx;
|
||||
SET_DELTA_CHILD(base_entry, entry);
|
||||
} else {
|
||||
SET_DELTA_EXT(entry, base_ref);
|
||||
}
|
||||
|
||||
unuse_pack(&w_curs);
|
||||
return;
|
||||
}
|
||||
@ -2979,7 +2992,6 @@ static int pack_options_allow_reuse(void)
|
||||
|
||||
static int get_object_list_from_bitmap(struct rev_info *revs)
|
||||
{
|
||||
struct bitmap_index *bitmap_git;
|
||||
if (!(bitmap_git = prepare_bitmap_walk(revs)))
|
||||
return -1;
|
||||
|
||||
@ -2995,7 +3007,6 @@ static int get_object_list_from_bitmap(struct rev_info *revs)
|
||||
}
|
||||
|
||||
traverse_bitmap_commit_list(bitmap_git, &add_object_entry_from_bitmap);
|
||||
free_bitmap_index(bitmap_git);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -3143,7 +3154,6 @@ static int option_parse_unpack_unreachable(const struct option *opt,
|
||||
int cmd_pack_objects(int argc, const char **argv, const char *prefix)
|
||||
{
|
||||
int use_internal_rev_list = 0;
|
||||
int thin = 0;
|
||||
int shallow = 0;
|
||||
int all_progress_implied = 0;
|
||||
struct argv_array rp = ARGV_ARRAY_INIT;
|
||||
|
@ -86,6 +86,9 @@ struct bitmap_index {
|
||||
/* Bitmap result of the last performed walk */
|
||||
struct bitmap *result;
|
||||
|
||||
/* "have" bitmap from the last performed walk */
|
||||
struct bitmap *haves;
|
||||
|
||||
/* Version of the bitmap index */
|
||||
unsigned int version;
|
||||
|
||||
@ -759,8 +762,8 @@ struct bitmap_index *prepare_bitmap_walk(struct rev_info *revs)
|
||||
bitmap_and_not(wants_bitmap, haves_bitmap);
|
||||
|
||||
bitmap_git->result = wants_bitmap;
|
||||
bitmap_git->haves = haves_bitmap;
|
||||
|
||||
bitmap_free(haves_bitmap);
|
||||
return bitmap_git;
|
||||
|
||||
cleanup:
|
||||
@ -1114,5 +1117,25 @@ void free_bitmap_index(struct bitmap_index *b)
|
||||
free(b->ext_index.objects);
|
||||
free(b->ext_index.hashes);
|
||||
bitmap_free(b->result);
|
||||
bitmap_free(b->haves);
|
||||
free(b);
|
||||
}
|
||||
|
||||
int bitmap_has_sha1_in_uninteresting(struct bitmap_index *bitmap_git,
|
||||
const unsigned char *sha1)
|
||||
{
|
||||
int pos;
|
||||
|
||||
if (!bitmap_git)
|
||||
return 0; /* no bitmap loaded */
|
||||
if (!bitmap_git->result)
|
||||
BUG("failed to perform bitmap walk before querying");
|
||||
if (!bitmap_git->haves)
|
||||
return 0; /* walk had no "haves" */
|
||||
|
||||
pos = bitmap_position_packfile(bitmap_git, sha1);
|
||||
if (pos < 0)
|
||||
return 0;
|
||||
|
||||
return bitmap_get(bitmap_git->haves, pos);
|
||||
}
|
||||
|
@ -53,6 +53,13 @@ int rebuild_existing_bitmaps(struct bitmap_index *, struct packing_data *mapping
|
||||
khash_sha1 *reused_bitmaps, int show_progress);
|
||||
void free_bitmap_index(struct bitmap_index *);
|
||||
|
||||
/*
|
||||
* After a traversal has been performed on the bitmap_index, this can be
|
||||
* queried to see if a particular object was reachable from any of the
|
||||
* objects flagged as UNINTERESTING.
|
||||
*/
|
||||
int bitmap_has_sha1_in_uninteresting(struct bitmap_index *, const unsigned char *sha1);
|
||||
|
||||
void bitmap_writer_show_progress(int show);
|
||||
void bitmap_writer_set_checksum(unsigned char *sha1);
|
||||
void bitmap_writer_build_type_index(struct packing_data *to_pack,
|
||||
|
@ -181,3 +181,22 @@ struct object_entry *packlist_alloc(struct packing_data *pdata,
|
||||
|
||||
return new_entry;
|
||||
}
|
||||
|
||||
void oe_set_delta_ext(struct packing_data *pdata,
|
||||
struct object_entry *delta,
|
||||
const unsigned char *sha1)
|
||||
{
|
||||
struct object_entry *base;
|
||||
|
||||
ALLOC_GROW(pdata->ext_bases, pdata->nr_ext + 1, pdata->alloc_ext);
|
||||
base = &pdata->ext_bases[pdata->nr_ext++];
|
||||
memset(base, 0, sizeof(*base));
|
||||
hashcpy(base->idx.oid.hash, sha1);
|
||||
|
||||
/* These flags mark that we are not part of the actual pack output. */
|
||||
base->preferred_base = 1;
|
||||
base->filled = 1;
|
||||
|
||||
delta->ext_base = 1;
|
||||
delta->delta_idx = base - pdata->ext_bases + 1;
|
||||
}
|
||||
|
@ -112,6 +112,7 @@ struct object_entry {
|
||||
unsigned filled:1; /* assigned write-order */
|
||||
unsigned dfs_state:OE_DFS_STATE_BITS;
|
||||
unsigned depth:OE_DEPTH_BITS;
|
||||
unsigned ext_base:1; /* delta_idx points outside packlist */
|
||||
|
||||
/*
|
||||
* pahole results on 64-bit linux (gcc and clang)
|
||||
@ -147,6 +148,14 @@ struct packing_data {
|
||||
pthread_mutex_t lock;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This list contains entries for bases which we know the other side
|
||||
* has (e.g., via reachability bitmaps), but which aren't in our
|
||||
* "objects" list.
|
||||
*/
|
||||
struct object_entry *ext_bases;
|
||||
uint32_t nr_ext, alloc_ext;
|
||||
|
||||
uintmax_t oe_size_limit;
|
||||
uintmax_t oe_delta_size_limit;
|
||||
};
|
||||
@ -249,9 +258,12 @@ static inline struct object_entry *oe_delta(
|
||||
const struct packing_data *pack,
|
||||
const struct object_entry *e)
|
||||
{
|
||||
if (e->delta_idx)
|
||||
if (!e->delta_idx)
|
||||
return NULL;
|
||||
if (e->ext_base)
|
||||
return &pack->ext_bases[e->delta_idx - 1];
|
||||
else
|
||||
return &pack->objects[e->delta_idx - 1];
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void oe_set_delta(struct packing_data *pack,
|
||||
@ -264,6 +276,10 @@ static inline void oe_set_delta(struct packing_data *pack,
|
||||
e->delta_idx = 0;
|
||||
}
|
||||
|
||||
void oe_set_delta_ext(struct packing_data *pack,
|
||||
struct object_entry *e,
|
||||
const unsigned char *sha1);
|
||||
|
||||
static inline struct object_entry *oe_delta_child(
|
||||
const struct packing_data *pack,
|
||||
const struct object_entry *e)
|
||||
|
@ -168,3 +168,28 @@ that
|
||||
While we have tried to make sure that it can cope with embedded
|
||||
whitespace and other special characters, it will not work with
|
||||
multi-line data.
|
||||
|
||||
Rather than tracking the performance by run-time as `test_perf` does, you
|
||||
may also track output size by using `test_size`. The stdout of the
|
||||
function should be a single numeric value, which will be captured and
|
||||
shown in the aggregated output. For example:
|
||||
|
||||
test_perf 'time foo' '
|
||||
./foo >foo.out
|
||||
'
|
||||
|
||||
test_size 'output size'
|
||||
wc -c <foo.out
|
||||
'
|
||||
|
||||
might produce output like:
|
||||
|
||||
Test origin HEAD
|
||||
-------------------------------------------------------------
|
||||
1234.1 time foo 0.37(0.79+0.02) 0.26(0.51+0.02) -29.7%
|
||||
1234.2 output size 4.3M 3.6M -14.7%
|
||||
|
||||
The item being measured (and its units) is up to the test; the context
|
||||
and the test title should make it clear to the user whether bigger or
|
||||
smaller numbers are better. Unlike test_perf, the test code will only be
|
||||
run once, since output sizes tend to be more deterministic than timings.
|
||||
|
@ -13,27 +13,42 @@ sub get_times {
|
||||
my $line = <$fh>;
|
||||
return undef if not defined $line;
|
||||
close $fh or die "cannot close $name: $!";
|
||||
$line =~ /^(?:(\d+):)?(\d+):(\d+(?:\.\d+)?) (\d+(?:\.\d+)?) (\d+(?:\.\d+)?)$/
|
||||
or die "bad input line: $line";
|
||||
my $rt = ((defined $1 ? $1 : 0.0)*60+$2)*60+$3;
|
||||
return ($rt, $4, $5);
|
||||
# times
|
||||
if ($line =~ /^(?:(\d+):)?(\d+):(\d+(?:\.\d+)?) (\d+(?:\.\d+)?) (\d+(?:\.\d+)?)$/) {
|
||||
my $rt = ((defined $1 ? $1 : 0.0)*60+$2)*60+$3;
|
||||
return ($rt, $4, $5);
|
||||
# size
|
||||
} elsif ($line =~ /^\d+$/) {
|
||||
return $&;
|
||||
} else {
|
||||
die "bad input line: $line";
|
||||
}
|
||||
}
|
||||
|
||||
sub relative_change {
|
||||
my ($r, $firstr) = @_;
|
||||
if ($firstr > 0) {
|
||||
return sprintf "%+.1f%%", 100.0*($r-$firstr)/$firstr;
|
||||
} elsif ($r == 0) {
|
||||
return "=";
|
||||
} else {
|
||||
return "+inf";
|
||||
}
|
||||
}
|
||||
|
||||
sub format_times {
|
||||
my ($r, $u, $s, $firstr) = @_;
|
||||
# no value means we did not finish the test
|
||||
if (!defined $r) {
|
||||
return "<missing>";
|
||||
}
|
||||
my $out = sprintf "%.2f(%.2f+%.2f)", $r, $u, $s;
|
||||
if (defined $firstr) {
|
||||
if ($firstr > 0) {
|
||||
$out .= sprintf " %+.1f%%", 100.0*($r-$firstr)/$firstr;
|
||||
} elsif ($r == 0) {
|
||||
$out .= " =";
|
||||
} else {
|
||||
$out .= " +inf";
|
||||
}
|
||||
# a single value means we have a size, not times
|
||||
if (!defined $u) {
|
||||
return format_size($r, $firstr);
|
||||
}
|
||||
# otherwise, we have real/user/system times
|
||||
my $out = sprintf "%.2f(%.2f+%.2f)", $r, $u, $s;
|
||||
$out .= ' ' . relative_change($r, $firstr) if defined $firstr;
|
||||
return $out;
|
||||
}
|
||||
|
||||
@ -51,6 +66,25 @@ EOT
|
||||
exit(1);
|
||||
}
|
||||
|
||||
sub human_size {
|
||||
my $n = shift;
|
||||
my @units = ('', qw(K M G));
|
||||
while ($n > 900 && @units > 1) {
|
||||
$n /= 1000;
|
||||
shift @units;
|
||||
}
|
||||
return $n unless length $units[0];
|
||||
return sprintf '%.1f%s', $n, $units[0];
|
||||
}
|
||||
|
||||
sub format_size {
|
||||
my ($size, $first) = @_;
|
||||
# match the width of a time: 0.00(0.00+0.00)
|
||||
my $out = sprintf '%15s', human_size($size);
|
||||
$out .= ' ' . relative_change($size, $first) if defined $first;
|
||||
return $out;
|
||||
}
|
||||
|
||||
my (@dirs, %dirnames, %dirabbrevs, %prefixes, @tests,
|
||||
$codespeed, $sortby, $subsection, $reponame);
|
||||
|
||||
@ -181,7 +215,14 @@ sub print_default_results {
|
||||
my $firstr;
|
||||
for my $i (0..$#dirs) {
|
||||
my $d = $dirs[$i];
|
||||
$times{$prefixes{$d}.$t} = [get_times("$resultsdir/$prefixes{$d}$t.times")];
|
||||
my $base = "$resultsdir/$prefixes{$d}$t";
|
||||
$times{$prefixes{$d}.$t} = [];
|
||||
foreach my $type (qw(times size)) {
|
||||
if (-e "$base.$type") {
|
||||
$times{$prefixes{$d}.$t} = [get_times("$base.$type")];
|
||||
last;
|
||||
}
|
||||
}
|
||||
my ($r,$u,$s) = @{$times{$prefixes{$d}.$t}};
|
||||
my $w = length format_times($r,$u,$s,$firstr);
|
||||
$colwidth[$i] = $w if $w > $colwidth[$i];
|
||||
|
45
t/perf/p5311-pack-bitmaps-fetch.sh
Executable file
45
t/perf/p5311-pack-bitmaps-fetch.sh
Executable file
@ -0,0 +1,45 @@
|
||||
#!/bin/sh
|
||||
|
||||
test_description='performance of fetches from bitmapped packs'
|
||||
. ./perf-lib.sh
|
||||
|
||||
test_perf_default_repo
|
||||
|
||||
test_expect_success 'create bitmapped server repo' '
|
||||
git config pack.writebitmaps true &&
|
||||
git config pack.writebitmaphashcache true &&
|
||||
git repack -ad
|
||||
'
|
||||
|
||||
# simulate a fetch from a repository that last fetched N days ago, for
|
||||
# various values of N. We do so by following the first-parent chain,
|
||||
# and assume the first entry in the chain that is N days older than the current
|
||||
# HEAD is where the HEAD would have been then.
|
||||
for days in 1 2 4 8 16 32 64 128; do
|
||||
title=$(printf '%10s' "($days days)")
|
||||
test_expect_success "setup revs from $days days ago" '
|
||||
now=$(git log -1 --format=%ct HEAD) &&
|
||||
then=$(($now - ($days * 86400))) &&
|
||||
tip=$(git rev-list -1 --first-parent --until=$then HEAD) &&
|
||||
{
|
||||
echo HEAD &&
|
||||
echo ^$tip
|
||||
} >revs
|
||||
'
|
||||
|
||||
test_perf "server $title" '
|
||||
git pack-objects --stdout --revs \
|
||||
--thin --delta-base-offset \
|
||||
<revs >tmp.pack
|
||||
'
|
||||
|
||||
test_size "size $title" '
|
||||
wc -c <tmp.pack
|
||||
'
|
||||
|
||||
test_perf "client $title" '
|
||||
git index-pack --stdin --fix-thin <tmp.pack
|
||||
'
|
||||
done
|
||||
|
||||
test_done
|
@ -179,8 +179,8 @@ exit $ret' >&3 2>&4
|
||||
return "$eval_ret"
|
||||
}
|
||||
|
||||
|
||||
test_perf () {
|
||||
test_wrapper_ () {
|
||||
test_wrapper_func_=$1; shift
|
||||
test_start_
|
||||
test "$#" = 3 && { test_prereq=$1; shift; } || test_prereq=
|
||||
test "$#" = 2 ||
|
||||
@ -191,37 +191,59 @@ test_perf () {
|
||||
base=$(basename "$0" .sh)
|
||||
echo "$test_count" >>"$perf_results_dir"/$base.subtests
|
||||
echo "$1" >"$perf_results_dir"/$base.$test_count.descr
|
||||
if test -z "$verbose"; then
|
||||
printf "%s" "perf $test_count - $1:"
|
||||
else
|
||||
echo "perf $test_count - $1:"
|
||||
fi
|
||||
for i in $(test_seq 1 $GIT_PERF_REPEAT_COUNT); do
|
||||
say >&3 "running: $2"
|
||||
if test_run_perf_ "$2"
|
||||
then
|
||||
if test -z "$verbose"; then
|
||||
printf " %s" "$i"
|
||||
else
|
||||
echo "* timing run $i/$GIT_PERF_REPEAT_COUNT:"
|
||||
fi
|
||||
else
|
||||
test -z "$verbose" && echo
|
||||
test_failure_ "$@"
|
||||
break
|
||||
fi
|
||||
done
|
||||
if test -z "$verbose"; then
|
||||
echo " ok"
|
||||
else
|
||||
test_ok_ "$1"
|
||||
fi
|
||||
base="$perf_results_dir"/"$perf_results_prefix$(basename "$0" .sh)"."$test_count"
|
||||
"$TEST_DIRECTORY"/perf/min_time.perl test_time.* >"$base".times
|
||||
"$test_wrapper_func_" "$@"
|
||||
fi
|
||||
|
||||
test_finish_
|
||||
}
|
||||
|
||||
test_perf_ () {
|
||||
if test -z "$verbose"; then
|
||||
printf "%s" "perf $test_count - $1:"
|
||||
else
|
||||
echo "perf $test_count - $1:"
|
||||
fi
|
||||
for i in $(test_seq 1 $GIT_PERF_REPEAT_COUNT); do
|
||||
say >&3 "running: $2"
|
||||
if test_run_perf_ "$2"
|
||||
then
|
||||
if test -z "$verbose"; then
|
||||
printf " %s" "$i"
|
||||
else
|
||||
echo "* timing run $i/$GIT_PERF_REPEAT_COUNT:"
|
||||
fi
|
||||
else
|
||||
test -z "$verbose" && echo
|
||||
test_failure_ "$@"
|
||||
break
|
||||
fi
|
||||
done
|
||||
if test -z "$verbose"; then
|
||||
echo " ok"
|
||||
else
|
||||
test_ok_ "$1"
|
||||
fi
|
||||
"$TEST_DIRECTORY"/perf/min_time.perl test_time.* >"$base".times
|
||||
}
|
||||
|
||||
test_perf () {
|
||||
test_wrapper_ test_perf_ "$@"
|
||||
}
|
||||
|
||||
test_size_ () {
|
||||
say >&3 "running: $2"
|
||||
if test_eval_ "$2" 3>"$base".size; then
|
||||
test_ok_ "$1"
|
||||
else
|
||||
test_failure_ "$@"
|
||||
fi
|
||||
}
|
||||
|
||||
test_size () {
|
||||
test_wrapper_ test_size_ "$@"
|
||||
}
|
||||
|
||||
# We extend test_done to print timings at the end (./run disables this
|
||||
# and does it after running everything)
|
||||
test_at_end_hook_ () {
|
||||
|
Loading…
Reference in New Issue
Block a user