Merge branch 'tb/multi-pack-reuse-dupfix'

Object reuse code based on multi-pack-index sent an unwanted copy
of object.

* tb/multi-pack-reuse-dupfix:
  pack-objects: only perform verbatim reuse on the preferred pack
  t5332-multi-pack-reuse.sh: demonstrate duplicate packing failure
This commit is contained in:
Junio C Hamano 2024-11-22 14:34:19 +09:00
commit 0a83b39594
2 changed files with 62 additions and 55 deletions

View File

@ -1101,78 +1101,64 @@ static void write_reused_pack_one(struct packed_git *reuse_packfile,
static size_t write_reused_pack_verbatim(struct bitmapped_pack *reuse_packfile, static size_t write_reused_pack_verbatim(struct bitmapped_pack *reuse_packfile,
struct hashfile *out, struct hashfile *out,
off_t pack_start,
struct pack_window **w_curs) struct pack_window **w_curs)
{ {
size_t pos = reuse_packfile->bitmap_pos; size_t pos = 0;
size_t end; size_t end;
if (pos % BITS_IN_EWORD) { if (reuse_packfile->bitmap_pos) {
size_t word_pos = (pos / BITS_IN_EWORD); /*
size_t offset = pos % BITS_IN_EWORD; * We can't reuse whole chunks verbatim out of
size_t last; * non-preferred packs since we can't guarantee that
eword_t word = reuse_packfile_bitmap->words[word_pos]; * all duplicate objects were resolved in favor of
* that pack.
if (offset + reuse_packfile->bitmap_nr < BITS_IN_EWORD) *
last = offset + reuse_packfile->bitmap_nr; * Even if we have a whole eword_t worth of bits that
else * could be reused, there may be objects between the
last = BITS_IN_EWORD; * objects corresponding to the first and last bit of
* that word which were selected from a different
for (; offset < last; offset++) { * pack, causing us to send duplicate or unwanted
if (word >> offset == 0) * objects.
return word_pos; *
if (!bitmap_get(reuse_packfile_bitmap, * Handle non-preferred packs from within
word_pos * BITS_IN_EWORD + offset)) * write_reused_pack(), which inspects and reuses
return word_pos; * individual bits.
} */
return reuse_packfile->bitmap_pos / BITS_IN_EWORD;
pos += BITS_IN_EWORD - (pos % BITS_IN_EWORD);
} }
/* /*
* Now we're going to copy as many whole eword_t's as possible. * Only read through the last word whose bits all correspond
* "end" is the index of the last whole eword_t we copy, but * to objects in the given packfile, since we must stop at a
* there may be additional bits to process. Those are handled * word boundary.
* individually by write_reused_pack().
* *
* Begin by advancing to the first word boundary in range of the * If there is no whole word to read (i.e. the packfile
* bit positions occupied by objects in "reuse_packfile". Then * contains fewer than BITS_IN_EWORD objects), then we'll
* pick the last word boundary in the same range. If we have at * inspect bits one-by-one in write_reused_pack().
* least one word's worth of bits to process, continue on.
*/ */
end = reuse_packfile->bitmap_pos + reuse_packfile->bitmap_nr; end = reuse_packfile->bitmap_nr / BITS_IN_EWORD;
if (end % BITS_IN_EWORD) if (reuse_packfile_bitmap->word_alloc < end)
end -= end % BITS_IN_EWORD; BUG("fewer words than expected in reuse_packfile_bitmap");
if (pos >= end)
return reuse_packfile->bitmap_pos / BITS_IN_EWORD;
while (pos < end && while (pos < end && reuse_packfile_bitmap->words[pos] == (eword_t)~0)
reuse_packfile_bitmap->words[pos / BITS_IN_EWORD] == (eword_t)~0) pos++;
pos += BITS_IN_EWORD;
if (pos > end) if (pos) {
pos = end; off_t to_write;
if (reuse_packfile->bitmap_pos < pos) { written = (pos * BITS_IN_EWORD);
off_t pack_start_off = pack_pos_to_offset(reuse_packfile->p, 0); to_write = pack_pos_to_offset(reuse_packfile->p, written)
off_t pack_end_off = pack_pos_to_offset(reuse_packfile->p, - sizeof(struct pack_header);
pos - reuse_packfile->bitmap_pos);
written += pos - reuse_packfile->bitmap_pos;
/* We're recording one chunk, not one object. */ /* We're recording one chunk, not one object. */
record_reused_object(pack_start_off, record_reused_object(sizeof(struct pack_header), 0);
pack_start_off - (hashfile_total(out) - pack_start));
hashflush(out); hashflush(out);
copy_pack_data(out, reuse_packfile->p, w_curs, copy_pack_data(out, reuse_packfile->p, w_curs,
pack_start_off, pack_end_off - pack_start_off); sizeof(struct pack_header), to_write);
display_progress(progress_state, written); display_progress(progress_state, written);
} }
if (pos % BITS_IN_EWORD) return pos;
BUG("attempted to jump past a word boundary to %"PRIuMAX,
(uintmax_t)pos);
return pos / BITS_IN_EWORD;
} }
static void write_reused_pack(struct bitmapped_pack *reuse_packfile, static void write_reused_pack(struct bitmapped_pack *reuse_packfile,
@ -1184,8 +1170,7 @@ static void write_reused_pack(struct bitmapped_pack *reuse_packfile,
struct pack_window *w_curs = NULL; struct pack_window *w_curs = NULL;
if (allow_ofs_delta) if (allow_ofs_delta)
i = write_reused_pack_verbatim(reuse_packfile, f, pack_start, i = write_reused_pack_verbatim(reuse_packfile, f, &w_curs);
&w_curs);
for (; i < reuse_packfile_bitmap->word_alloc; ++i) { for (; i < reuse_packfile_bitmap->word_alloc; ++i) {
eword_t word = reuse_packfile_bitmap->words[i]; eword_t word = reuse_packfile_bitmap->words[i];

View File

@ -259,4 +259,26 @@ test_expect_success 'duplicate objects' '
) )
' '
test_expect_success 'duplicate objects with verbatim reuse' '
git init duplicate-objects-verbatim &&
(
cd duplicate-objects-verbatim &&
git config pack.allowPackReuse multi &&
test_commit_bulk 64 &&
# take the first object from the main pack...
git show-index <$(ls $packdir/pack-*.idx) >obj.raw &&
sort -nk1 <obj.raw | head -n1 | cut -d" " -f2 >in &&
# ...and create a separate pack containing just that object
p="$(git pack-objects $packdir/pack <in)" &&
git multi-pack-index write --bitmap --preferred-pack=pack-$p.idx &&
test_pack_objects_reused_all 192 2
)
'
test_done test_done