git/t/lib-bitmap.sh
Taylor Blau fcb2205b77 midx: implement support for writing incremental MIDX chains
Now that the rest of the MIDX subsystem and relevant callers have been
updated to learn about how to read and process incremental MIDX chains,
let's finally update the implementation in `write_midx_internal()` to be
able to write incremental MIDX chains.

This new feature is available behind the `--incremental` option for the
`multi-pack-index` builtin, like so:

    $ git multi-pack-index write --incremental

The implementation for doing so is relatively straightforward, and boils
down to a handful of different kinds of changes implemented in this
patch:

  - The `compute_sorted_entries()` function is taught to reject objects
    which appear in any existing MIDX layer.

  - Functions like `write_midx_revindex()` are adjusted to write
    pack_order values which are offset by the number of objects in the
    base MIDX layer.

  - The end of `write_midx_internal()` is adjusted to move
    non-incremental MIDX files when necessary (i.e. when creating an
    incremental chain with an existing non-incremental MIDX in the
    repository).

There are a handful of other changes that are introduced, like new
functions to clear incremental MIDX files that are unrelated to the
current chain (using the same "keep_hash" mechanism as in the
non-incremental case).

The tests explicitly exercising the new incremental MIDX feature are
relatively limited for two reasons:

  1. Most of the "interesting" behavior is already thoroughly covered in
     t5319-multi-pack-index.sh, which handles the core logic of reading
     objects through a MIDX.

     The new tests in t5334-incremental-multi-pack-index.sh are mostly
     focused on creating and destroying incremental MIDXs, as well as
     stitching their results together across layers.

  2. A new GIT_TEST environment variable is added called
     "GIT_TEST_MULTI_PACK_INDEX_WRITE_INCREMENTAL", which modifies the
     entire test suite to write incremental MIDXs after repacking when
     combined with the "GIT_TEST_MULTI_PACK_INDEX" variable.

     This exercises the long tail of other interesting behavior that is
     defined implicitly throughout the rest of the CI suite. It is
     likewise added to the linux-TEST-vars job.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-08-06 12:01:39 -07:00

450 lines
13 KiB
Bash

# Helpers for scripts testing bitmap functionality; see t5310 for
# example usage.
. "$TEST_DIRECTORY"/lib-midx.sh
objdir=.git/objects
midx=$objdir/pack/multi-pack-index
# Compare a file containing rev-list bitmap traversal output to its non-bitmap
# counterpart. You can't just use test_cmp for this, because the two produce
# subtly different output:
#
# - regular output is in traversal order, whereas bitmap is split by type,
# with non-packed objects at the end
#
# - regular output has a space and the pathname appended to non-commit
# objects; bitmap output omits this
#
# This function normalizes and compares the two. The second file should
# always be the bitmap output.
test_bitmap_traversal () {
if test "$1" = "--no-confirm-bitmaps"
then
shift
elif cmp "$1" "$2"
then
echo >&2 "identical raw outputs; are you sure bitmaps were used?"
return 1
fi &&
cut -d' ' -f1 "$1" | sort >"$1.normalized" &&
sort "$2" >"$2.normalized" &&
test_cmp "$1.normalized" "$2.normalized" &&
rm -f "$1.normalized" "$2.normalized"
}
# To ensure the logic for "maximal commits" is exercised, make
# the repository a bit more complicated.
#
# other second
# * *
# (99 commits) (99 commits)
# * *
# |\ /|
# | * octo-other octo-second * |
# |/|\_________ ____________/|\|
# | \ \/ __________/ |
# | | ________/\ / |
# * |/ * merge-right *
# | _|__________/ \____________ |
# |/ | \|
# (l1) * * merge-left * (r1)
# | / \________________________ |
# |/ \|
# (l2) * * (r2)
# \___________________________ |
# \|
# * (base)
#
# We only push bits down the first-parent history, which
# makes some of these commits unimportant!
#
# The important part for the maximal commit algorithm is how
# the bitmasks are extended. Assuming starting bit positions
# for second (bit 0) and other (bit 1), the bitmasks at the
# end should be:
#
# second: 1 (maximal, selected)
# other: 01 (maximal, selected)
# (base): 11 (maximal)
#
# This complicated history was important for a previous
# version of the walk that guarantees never walking a
# commit multiple times. That goal might be important
# again, so preserve this complicated case. For now, this
# test will guarantee that the bitmaps are computed
# correctly, even with the repeat calculations.
setup_bitmap_history() {
test_expect_success 'setup repo with moderate-sized history' '
test_commit_bulk --id=file 10 &&
git branch -M second &&
git checkout -b other HEAD~5 &&
test_commit_bulk --id=side 10 &&
# add complicated history setup, including merges and
# ambiguous merge-bases
git checkout -b merge-left other~2 &&
git merge second~2 -m "merge-left" &&
git checkout -b merge-right second~1 &&
git merge other~1 -m "merge-right" &&
git checkout -b octo-second second &&
git merge merge-left merge-right -m "octopus-second" &&
git checkout -b octo-other other &&
git merge merge-left merge-right -m "octopus-other" &&
git checkout other &&
git merge octo-other -m "pull octopus" &&
git checkout second &&
git merge octo-second -m "pull octopus" &&
# Remove these branches so they are not selected
# as bitmap tips
git branch -D merge-left &&
git branch -D merge-right &&
git branch -D octo-other &&
git branch -D octo-second &&
# add padding to make these merges less interesting
# and avoid having them selected for bitmaps
test_commit_bulk --id=file 100 &&
git checkout other &&
test_commit_bulk --id=side 100 &&
git checkout second &&
bitmaptip=$(git rev-parse second) &&
blob=$(echo tagged-blob | git hash-object -w --stdin) &&
git tag tagged-blob $blob
'
}
rev_list_tests_head () {
test_expect_success "counting commits via bitmap ($state, $branch)" '
git rev-list --count $branch >expect &&
git rev-list --use-bitmap-index --count $branch >actual &&
test_cmp expect actual
'
test_expect_success "counting partial commits via bitmap ($state, $branch)" '
git rev-list --count $branch~5..$branch >expect &&
git rev-list --use-bitmap-index --count $branch~5..$branch >actual &&
test_cmp expect actual
'
test_expect_success "counting commits with limit ($state, $branch)" '
git rev-list --count -n 1 $branch >expect &&
git rev-list --use-bitmap-index --count -n 1 $branch >actual &&
test_cmp expect actual
'
test_expect_success "counting non-linear history ($state, $branch)" '
git rev-list --count other...second >expect &&
git rev-list --use-bitmap-index --count other...second >actual &&
test_cmp expect actual
'
test_expect_success "counting commits with limiting ($state, $branch)" '
git rev-list --count $branch -- 1.t >expect &&
git rev-list --use-bitmap-index --count $branch -- 1.t >actual &&
test_cmp expect actual
'
test_expect_success "counting objects via bitmap ($state, $branch)" '
git rev-list --count --objects $branch >expect &&
git rev-list --use-bitmap-index --count --objects $branch >actual &&
test_cmp expect actual
'
test_expect_success "enumerate commits ($state, $branch)" '
git rev-list --use-bitmap-index $branch >actual &&
git rev-list $branch >expect &&
test_bitmap_traversal --no-confirm-bitmaps expect actual
'
test_expect_success "enumerate --objects ($state, $branch)" '
git rev-list --objects --use-bitmap-index $branch >actual &&
git rev-list --objects $branch >expect &&
test_bitmap_traversal expect actual
'
test_expect_success "bitmap --objects handles non-commit objects ($state, $branch)" '
git rev-list --objects --use-bitmap-index $branch tagged-blob >actual &&
grep $blob actual
'
}
rev_list_tests () {
state=$1
for branch in "second" "other"
do
rev_list_tests_head
done
}
basic_bitmap_tests () {
tip="$1"
test_expect_success 'rev-list --test-bitmap verifies bitmaps' "
git rev-list --test-bitmap "${tip:-HEAD}"
"
rev_list_tests 'full bitmap'
test_expect_success 'clone from bitmapped repository' '
rm -fr clone.git &&
git clone --no-local --bare . clone.git &&
git rev-parse HEAD >expect &&
git --git-dir=clone.git rev-parse HEAD >actual &&
test_cmp expect actual
'
test_expect_success 'partial clone from bitmapped repository' '
test_config uploadpack.allowfilter true &&
rm -fr partial-clone.git &&
git clone --no-local --bare --filter=blob:none . partial-clone.git &&
(
cd partial-clone.git &&
pack=$(echo objects/pack/*.pack) &&
git verify-pack -v "$pack" >have &&
awk "/blob/ { print \$1 }" <have >blobs &&
# we expect this single blob because of the direct ref
git rev-parse refs/tags/tagged-blob >expect &&
test_cmp expect blobs
)
'
test_expect_success 'setup further non-bitmapped commits' '
test_commit_bulk --id=further 10
'
rev_list_tests 'partial bitmap'
test_expect_success 'fetch (partial bitmap)' '
git --git-dir=clone.git fetch origin second:second &&
git rev-parse HEAD >expect &&
git --git-dir=clone.git rev-parse HEAD >actual &&
test_cmp expect actual
'
test_expect_success 'enumerating progress counts pack-reused objects' '
count=$(git rev-list --objects --all --count) &&
git repack -adb &&
# check first with only reused objects; confirm that our
# progress showed the right number, and also that we did
# pack-reuse as expected. Check only the final "done"
# line of the meter (there may be an arbitrary number of
# intermediate lines ending with CR).
GIT_PROGRESS_DELAY=0 \
git pack-objects --all --stdout --progress \
</dev/null >/dev/null 2>stderr &&
grep "Enumerating objects: $count, done" stderr &&
grep "pack-reused $count" stderr &&
# now the same but with one non-reused object
git commit --allow-empty -m "an extra commit object" &&
GIT_PROGRESS_DELAY=0 \
git pack-objects --all --stdout --progress \
</dev/null >/dev/null 2>stderr &&
grep "Enumerating objects: $((count+1)), done" stderr &&
grep "pack-reused $count" stderr
'
}
# have_delta <obj> <expected_base>
#
# Note that because this relies on cat-file, it might find _any_ copy of an
# object in the repository. The caller is responsible for making sure
# there's only one (e.g., via "repack -ad", or having just fetched a copy).
have_delta () {
echo $2 >expect &&
echo $1 | git cat-file --batch-check="%(deltabase)" >actual &&
test_cmp expect actual
}
# midx_pack_source <obj>
midx_pack_source () {
test-tool read-midx --show-objects .git/objects | grep "^$1 " | cut -f2
}
test_rev_exists () {
commit="$1"
kind="$2"
test_expect_success "reverse index exists ($kind)" '
GIT_TRACE2_EVENT=$(pwd)/event.trace \
git rev-list --test-bitmap "$commit" &&
if test "rev" = "$kind"
then
test_path_is_file $midx-$(midx_checksum $objdir).rev
fi &&
grep "\"category\":\"load_midx_revindex\",\"key\":\"source\",\"value\":\"$kind\"" event.trace
'
}
midx_bitmap_core () {
rev_kind="${1:-midx}"
setup_bitmap_history
test_expect_success 'create single-pack midx with bitmaps' '
git repack -ad &&
git multi-pack-index write --bitmap &&
test_path_is_file $midx &&
test_path_is_file $midx-$(midx_checksum $objdir).bitmap
'
test_rev_exists HEAD "$rev_kind"
basic_bitmap_tests
test_expect_success 'create new additional packs' '
for i in $(test_seq 1 16)
do
test_commit "$i" &&
git repack -d || return 1
done &&
git checkout -b other2 HEAD~8 &&
for i in $(test_seq 1 8)
do
test_commit "side-$i" &&
git repack -d || return 1
done &&
git checkout second
'
test_expect_success 'create multi-pack midx with bitmaps' '
git multi-pack-index write --bitmap &&
ls $objdir/pack/pack-*.pack >packs &&
test_line_count = 25 packs &&
test_path_is_file $midx &&
test_path_is_file $midx-$(midx_checksum $objdir).bitmap
'
test_rev_exists HEAD "$rev_kind"
basic_bitmap_tests
test_expect_success '--no-bitmap is respected when bitmaps exist' '
git multi-pack-index write --bitmap &&
test_commit respect--no-bitmap &&
git repack -d &&
test_path_is_file $midx &&
test_path_is_file $midx-$(midx_checksum $objdir).bitmap &&
git multi-pack-index write --no-bitmap &&
test_path_is_file $midx &&
test_path_is_missing $midx-$(midx_checksum $objdir).bitmap &&
test_path_is_missing $midx-$(midx_checksum $objdir).rev
'
test_expect_success 'setup midx with base from later pack' '
# Write a and b so that "a" is a delta on top of base "b", since Git
# prefers to delete contents out of a base rather than add to a shorter
# object.
test_seq 1 128 >a &&
test_seq 1 130 >b &&
git add a b &&
git commit -m "initial commit" &&
a=$(git rev-parse HEAD:a) &&
b=$(git rev-parse HEAD:b) &&
# In the first pack, "a" is stored as a delta to "b".
p1=$(git pack-objects .git/objects/pack/pack <<-EOF
$a
$b
EOF
) &&
# In the second pack, "a" is missing, and "b" is not a delta nor base to
# any other object.
p2=$(git pack-objects .git/objects/pack/pack <<-EOF
$b
$(git rev-parse HEAD)
$(git rev-parse HEAD^{tree})
EOF
) &&
git prune-packed &&
# Use the second pack as the preferred source, so that "b" occurs
# earlier in the MIDX object order, rendering "a" unusable for pack
# reuse.
git multi-pack-index write --bitmap --preferred-pack=pack-$p2.idx &&
have_delta $a $b &&
test $(midx_pack_source $a) != $(midx_pack_source $b)
'
rev_list_tests 'full bitmap with backwards delta'
test_expect_success 'clone with bitmaps enabled' '
git clone --no-local --bare . clone-reverse-delta.git &&
test_when_finished "rm -fr clone-reverse-delta.git" &&
git rev-parse HEAD >expect &&
git --git-dir=clone-reverse-delta.git rev-parse HEAD >actual &&
test_cmp expect actual
'
test_expect_success 'changing the preferred pack does not corrupt bitmaps' '
rm -fr repo &&
git init repo &&
test_when_finished "rm -fr repo" &&
(
cd repo &&
test_commit A &&
test_commit B &&
git rev-list --objects --no-object-names HEAD^ >A.objects &&
git rev-list --objects --no-object-names HEAD^.. >B.objects &&
A=$(git pack-objects $objdir/pack/pack <A.objects) &&
B=$(git pack-objects $objdir/pack/pack <B.objects) &&
cat >indexes <<-EOF &&
pack-$A.idx
pack-$B.idx
EOF
git multi-pack-index write --bitmap --stdin-packs \
--preferred-pack=pack-$A.pack <indexes &&
git rev-list --test-bitmap A &&
git multi-pack-index write --bitmap --stdin-packs \
--preferred-pack=pack-$B.pack <indexes &&
git rev-list --test-bitmap A
)
'
}
midx_bitmap_partial_tests () {
rev_kind="${1:-midx}"
test_expect_success 'setup partial bitmaps' '
test_commit packed &&
git repack &&
test_commit loose &&
git multi-pack-index write --bitmap &&
test_path_is_file $midx &&
test_path_is_file $midx-$(midx_checksum $objdir).bitmap
'
test_rev_exists HEAD~ "$rev_kind"
basic_bitmap_tests HEAD~
}