Merge branch 'tb/multi-pack-reuse-fix'

A data corruption bug when multi-pack-index is used and the same
objects are stored in multiple packfiles has been corrected.

* tb/multi-pack-reuse-fix:
  builtin/pack-objects.c: do not open-code `MAX_PACK_OBJECT_HEADER`
  pack-bitmap.c: avoid repeated `pack_pos_to_offset()` during reuse
  builtin/pack-objects.c: translate bit positions during pack-reuse
  pack-bitmap: tag bitmapped packs with their corresponding MIDX
  t/t5332-multi-pack-reuse.sh: verify pack generation with --strict
This commit is contained in:
Junio C Hamano 2024-09-12 11:47:23 -07:00
commit 3bf057a0cd
5 changed files with 78 additions and 17 deletions

View File

@ -1072,7 +1072,7 @@ static void write_reused_pack_one(struct packed_git *reuse_packfile,
fixup = find_reused_offset(offset) -
find_reused_offset(base_offset);
if (fixup) {
unsigned char ofs_header[10];
unsigned char ofs_header[MAX_PACK_OBJECT_HEADER];
unsigned i, ofs_len;
off_t ofs = offset - base_offset - fixup;
@ -1191,6 +1191,7 @@ static void write_reused_pack(struct bitmapped_pack *reuse_packfile,
size_t pos = (i * BITS_IN_EWORD);
for (offset = 0; offset < BITS_IN_EWORD; ++offset) {
uint32_t pack_pos;
if ((word >> offset) == 0)
break;
@ -1199,14 +1200,41 @@ static void write_reused_pack(struct bitmapped_pack *reuse_packfile,
continue;
if (pos + offset >= reuse_packfile->bitmap_pos + reuse_packfile->bitmap_nr)
goto done;
/*
* Can use bit positions directly, even for MIDX
* bitmaps. See comment in try_partial_reuse()
* for why.
*/
write_reused_pack_one(reuse_packfile->p,
pos + offset - reuse_packfile->bitmap_pos,
f, pack_start, &w_curs);
if (reuse_packfile->bitmap_pos) {
/*
* When doing multi-pack reuse on a
* non-preferred pack, translate bit positions
* from the MIDX pseudo-pack order back to their
* pack-relative positions before attempting
* reuse.
*/
struct multi_pack_index *m = reuse_packfile->from_midx;
uint32_t midx_pos;
off_t pack_ofs;
if (!m)
BUG("non-zero bitmap position without MIDX");
midx_pos = pack_pos_to_midx(m, pos + offset);
pack_ofs = nth_midxed_offset(m, midx_pos);
if (offset_to_pack_pos(reuse_packfile->p,
pack_ofs, &pack_pos) < 0)
BUG("could not find expected object at offset %"PRIuMAX" in pack %s",
(uintmax_t)pack_ofs,
pack_basename(reuse_packfile->p));
} else {
/*
* Can use bit positions directly, even for MIDX
* bitmaps. See comment in try_partial_reuse()
* for why.
*/
pack_pos = pos + offset;
}
write_reused_pack_one(reuse_packfile->p, pack_pos, f,
pack_start, &w_curs);
display_progress(progress_state, ++written);
}
}

1
midx.c
View File

@ -496,6 +496,7 @@ int nth_bitmapped_pack(struct repository *r, struct multi_pack_index *m,
MIDX_CHUNK_BITMAPPED_PACKS_WIDTH * local_pack_int_id +
sizeof(uint32_t));
bp->pack_int_id = pack_int_id;
bp->from_midx = m;
return 0;
}

View File

@ -2055,17 +2055,18 @@ static int try_partial_reuse(struct bitmap_index *bitmap_git,
struct bitmapped_pack *pack,
size_t bitmap_pos,
uint32_t pack_pos,
off_t offset,
struct bitmap *reuse,
struct pack_window **w_curs)
{
off_t offset, delta_obj_offset;
off_t delta_obj_offset;
enum object_type type;
unsigned long size;
if (pack_pos >= pack->p->num_objects)
return -1; /* not actually in the pack */
offset = delta_obj_offset = pack_pos_to_offset(pack->p, pack_pos);
delta_obj_offset = offset;
type = unpack_object_header(pack->p, w_curs, &offset, &size);
if (type < 0)
return -1; /* broken packfile, punt */
@ -2184,6 +2185,7 @@ static void reuse_partial_packfile_from_bitmap_1(struct bitmap_index *bitmap_git
for (offset = 0; offset < BITS_IN_EWORD; offset++) {
size_t bit_pos;
uint32_t pack_pos;
off_t ofs;
if (word >> offset == 0)
break;
@ -2198,7 +2200,6 @@ static void reuse_partial_packfile_from_bitmap_1(struct bitmap_index *bitmap_git
if (bitmap_is_midx(bitmap_git)) {
uint32_t midx_pos;
off_t ofs;
midx_pos = pack_pos_to_midx(bitmap_git->midx, bit_pos);
ofs = nth_midxed_offset(bitmap_git->midx, midx_pos);
@ -2213,10 +2214,12 @@ static void reuse_partial_packfile_from_bitmap_1(struct bitmap_index *bitmap_git
BUG("advanced beyond the end of pack %s (%"PRIuMAX" > %"PRIu32")",
pack_basename(pack->p), (uintmax_t)pack_pos,
pack->p->num_objects);
ofs = pack_pos_to_offset(pack->p, pack_pos);
}
if (try_partial_reuse(bitmap_git, pack, bit_pos,
pack_pos, reuse, &w_curs) < 0) {
pack_pos, ofs, reuse, &w_curs) < 0) {
/*
* try_partial_reuse indicated we couldn't reuse
* any bits, so there is no point in trying more
@ -2322,6 +2325,7 @@ void reuse_partial_packfile_from_bitmap(struct bitmap_index *bitmap_git,
packs[packs_nr].pack_int_id = pack_int_id;
packs[packs_nr].bitmap_nr = pack->num_objects;
packs[packs_nr].bitmap_pos = 0;
packs[packs_nr].from_midx = bitmap_git->midx;
objects_nr = packs[packs_nr++].bitmap_nr;
}

View File

@ -60,6 +60,7 @@ struct bitmapped_pack {
uint32_t bitmap_pos;
uint32_t bitmap_nr;
struct multi_pack_index *from_midx; /* MIDX only */
uint32_t pack_int_id; /* MIDX only */
};

View File

@ -31,20 +31,24 @@ test_pack_objects_reused_all () {
: >trace2.txt &&
GIT_TRACE2_EVENT="$PWD/trace2.txt" \
git pack-objects --stdout --revs --all --delta-base-offset \
>/dev/null &&
>got.pack &&
test_pack_reused "$1" <trace2.txt &&
test_packs_reused "$2" <trace2.txt
test_packs_reused "$2" <trace2.txt &&
git index-pack --strict -o got.idx got.pack
}
# test_pack_objects_reused <pack-reused> <packs-reused>
test_pack_objects_reused () {
: >trace2.txt &&
GIT_TRACE2_EVENT="$PWD/trace2.txt" \
git pack-objects --stdout --revs >/dev/null &&
git pack-objects --stdout --revs >got.pack &&
test_pack_reused "$1" <trace2.txt &&
test_packs_reused "$2" <trace2.txt
test_packs_reused "$2" <trace2.txt &&
git index-pack --strict -o got.idx got.pack
}
test_expect_success 'preferred pack is reused for single-pack reuse' '
@ -232,4 +236,27 @@ test_expect_success 'non-omitted delta in MIDX preferred pack' '
test_pack_objects_reused_all $(wc -l <expect) 1
'
test_expect_success 'duplicate objects' '
git init duplicate-objects &&
(
cd duplicate-objects &&
git config pack.allowPackReuse multi &&
test_commit base &&
git repack -a &&
git rev-parse HEAD^{tree} >in &&
p="$(git pack-objects $packdir/pack <in)" &&
git multi-pack-index write --bitmap --preferred-pack=pack-$p.idx &&
objects_nr="$(git rev-list --count --all --objects)" &&
packs_nr="$(find $packdir -type f -name "pack-*.pack" | wc -l)" &&
test_pack_objects_reused_all $objects_nr $packs_nr
)
'
test_done