builtin/pack-objects.c: simplify add_objects_in_unpacked_packs()

This function is used to implement `pack-objects`'s `--keep-unreachable`
option, but can be simplified in a couple of ways:

  - add_objects_in_unpacked_packs() iterates over all packs (and then
    all packed objects) itself, but could use for_each_packed_object()
    instead since the missing flags necessary were added in the previous
    commit

  - objects are added to an in_pack array which store (off_t, object)
    tuples, and then sorted in offset order when we could iterate
    objects in offset order.

    There is a slight behavior change here: before we would have added
    objects in sorted offset order among _all_ packs. Handing objects to
    create_object_entry() in pack order for each pack (instead of
    feeding objects from all packs simultaneously their offset relative
    to different packs) is much more reasonable, if different than how
    the code currently works.

  - objects in a single pack are iterated in index order and searched
    for in order to discover their offsets, which is much less efficient
    than using the on-disk reverse index

Simplify the function by addressing each of the above and moving the
core of the loop into a callback function that we then pass to
for_each_packed_object() instead of open-coding the latter function
ourselves.

Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
Taylor Blau 2021-08-29 22:48:54 -04:00 committed by Junio C Hamano
parent a241878ac7
commit a9fd2f207d

View File

@ -3505,79 +3505,27 @@ static void show_edge(struct commit *commit)
add_preferred_base(&commit->object.oid);
}
struct in_pack_object {
off_t offset;
struct object *object;
};
struct in_pack {
unsigned int alloc;
unsigned int nr;
struct in_pack_object *array;
};
static void mark_in_pack_object(struct object *object, struct packed_git *p, struct in_pack *in_pack)
static int add_object_in_unpacked_pack(const struct object_id *oid,
struct packed_git *pack,
uint32_t pos,
void *_data)
{
in_pack->array[in_pack->nr].offset = find_pack_entry_one(object->oid.hash, p);
in_pack->array[in_pack->nr].object = object;
in_pack->nr++;
}
/*
* Compare the objects in the offset order, in order to emulate the
* "git rev-list --objects" output that produced the pack originally.
*/
static int ofscmp(const void *a_, const void *b_)
{
struct in_pack_object *a = (struct in_pack_object *)a_;
struct in_pack_object *b = (struct in_pack_object *)b_;
if (a->offset < b->offset)
return -1;
else if (a->offset > b->offset)
return 1;
else
return oidcmp(&a->object->oid, &b->object->oid);
struct object *obj = lookup_unknown_object(the_repository, oid);
if (obj->flags & OBJECT_ADDED)
return 0;
add_object_entry(oid, obj->type, "", 0);
obj->flags |= OBJECT_ADDED;
return 0;
}
static void add_objects_in_unpacked_packs(void)
{
struct packed_git *p;
struct in_pack in_pack;
uint32_t i;
memset(&in_pack, 0, sizeof(in_pack));
for (p = get_all_packs(the_repository); p; p = p->next) {
struct object_id oid;
struct object *o;
if (!p->pack_local || p->pack_keep || p->pack_keep_in_core)
continue;
if (open_pack_index(p))
die(_("cannot open pack index"));
ALLOC_GROW(in_pack.array,
in_pack.nr + p->num_objects,
in_pack.alloc);
for (i = 0; i < p->num_objects; i++) {
nth_packed_object_id(&oid, p, i);
o = lookup_unknown_object(the_repository, &oid);
if (!(o->flags & OBJECT_ADDED))
mark_in_pack_object(o, p, &in_pack);
o->flags |= OBJECT_ADDED;
}
}
if (in_pack.nr) {
QSORT(in_pack.array, in_pack.nr, ofscmp);
for (i = 0; i < in_pack.nr; i++) {
struct object *o = in_pack.array[i].object;
add_object_entry(&o->oid, o->type, "", 0);
}
}
free(in_pack.array);
if (for_each_packed_object(add_object_in_unpacked_pack, NULL,
FOR_EACH_OBJECT_PACK_ORDER |
FOR_EACH_OBJECT_LOCAL_ONLY |
FOR_EACH_OBJECT_SKIP_IN_CORE_KEPT_PACKS |
FOR_EACH_OBJECT_SKIP_ON_DISK_KEPT_PACKS))
die(_("cannot open pack index"));
}
static int add_loose_object(const struct object_id *oid, const char *path,