packfile: use oidset for bad objects

Store the object ID of broken pack entries in an oidset instead of
keeping only their hashes in an unsorted array.  The resulting code is
shorter and easier to read.  It also handles the (hopefully) very rare
case of having a high number of bad objects better.

Helped-by: Jeff King <peff@peff.net>
Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
René Scharfe 2021-09-11 22:43:26 +02:00 committed by Junio C Hamano
parent 7407d733a4
commit 09ef66179b
3 changed files with 11 additions and 31 deletions

10
midx.c
View File

@ -307,13 +307,9 @@ int fill_midx_entry(struct repository * r,
if (!is_pack_valid(p))
return 0;
if (p->num_bad_objects) {
uint32_t i;
for (i = 0; i < p->num_bad_objects; i++)
if (hasheq(oid->hash,
p->bad_object_sha1 + the_hash_algo->rawsz * i))
return 0;
}
if (oidset_size(&p->bad_objects) &&
oidset_contains(&p->bad_objects, oid))
return 0;
e->offset = nth_midxed_offset(m, pos);
e->p = p;

View File

@ -10,6 +10,7 @@
#include "khash.h"
#include "dir.h"
#include "oidtree.h"
#include "oidset.h"
struct object_directory {
struct object_directory *next;
@ -75,9 +76,8 @@ struct packed_git {
const void *index_data;
size_t index_size;
uint32_t num_objects;
uint32_t num_bad_objects;
uint32_t crc_offset;
unsigned char *bad_object_sha1;
struct oidset bad_objects;
int index_version;
time_t mtime;
int pack_fd;

View File

@ -1163,29 +1163,17 @@ int unpack_object_header(struct packed_git *p,
void mark_bad_packed_object(struct packed_git *p, const struct object_id *oid)
{
unsigned i;
const unsigned hashsz = the_hash_algo->rawsz;
for (i = 0; i < p->num_bad_objects; i++)
if (hasheq(oid->hash, p->bad_object_sha1 + hashsz * i))
return;
p->bad_object_sha1 = xrealloc(p->bad_object_sha1,
st_mult(GIT_MAX_RAWSZ,
st_add(p->num_bad_objects, 1)));
hashcpy(p->bad_object_sha1 + hashsz * p->num_bad_objects, oid->hash);
p->num_bad_objects++;
oidset_insert(&p->bad_objects, oid);
}
const struct packed_git *has_packed_and_bad(struct repository *r,
const struct object_id *oid)
{
struct packed_git *p;
unsigned i;
for (p = r->objects->packed_git; p; p = p->next)
for (i = 0; i < p->num_bad_objects; i++)
if (hasheq(oid->hash,
p->bad_object_sha1 + the_hash_algo->rawsz * i))
return p;
if (oidset_contains(&p->bad_objects, oid))
return p;
return NULL;
}
@ -2016,13 +2004,9 @@ static int fill_pack_entry(const struct object_id *oid,
{
off_t offset;
if (p->num_bad_objects) {
unsigned i;
for (i = 0; i < p->num_bad_objects; i++)
if (hasheq(oid->hash,
p->bad_object_sha1 + the_hash_algo->rawsz * i))
return 0;
}
if (oidset_size(&p->bad_objects) &&
oidset_contains(&p->bad_objects, oid))
return 0;
offset = find_pack_entry_one(oid->hash, p);
if (!offset)