From 009be0d26d579c61dd7c7819e77173676c3ce007 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Mon, 24 Aug 2020 12:16:31 -0700 Subject: [PATCH 1/7] Documentation: deltaBaseCacheLimit is per-thread Clarify that core.deltaBaseCacheLimit is per-thread, as can be seen from the fact that cache usage (base_cache_used in struct thread_local in builtin/index-pack.c) is tracked individually for each thread and compared against delta_base_cache_limit. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- Documentation/config/core.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index 74619a9c03..02002cf109 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -399,7 +399,7 @@ the largest projects. You probably do not need to adjust this value. Common unit suffixes of 'k', 'm', or 'g' are supported. core.deltaBaseCacheLimit:: - Maximum number of bytes to reserve for caching base objects + Maximum number of bytes per thread to reserve for caching base objects that may be referenced by multiple deltified objects. By storing the entire decompressed base objects in a cache Git is able to avoid unpacking and decompressing frequently used base From fc968e26c201a63d4d75cae550ecf0447c82920f Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Mon, 24 Aug 2020 12:16:33 -0700 Subject: [PATCH 2/7] index-pack: remove redundant parameter find_{ref,ofs}_delta_{,children} take an enum object_type parameter, but the object type is already present in the name of the function. Remove that parameter from these functions. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- builtin/index-pack.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/builtin/index-pack.c b/builtin/index-pack.c index f865666db9..a8444d550b 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -614,7 +614,7 @@ static int compare_ofs_delta_bases(off_t offset1, off_t offset2, 0; } -static int find_ofs_delta(const off_t offset, enum object_type type) +static int find_ofs_delta(const off_t offset) { int first = 0, last = nr_ofs_deltas; @@ -624,7 +624,8 @@ static int find_ofs_delta(const off_t offset, enum object_type type) int cmp; cmp = compare_ofs_delta_bases(offset, delta->offset, - type, objects[delta->obj_no].type); + OBJ_OFS_DELTA, + objects[delta->obj_no].type); if (!cmp) return next; if (cmp < 0) { @@ -637,10 +638,9 @@ static int find_ofs_delta(const off_t offset, enum object_type type) } static void find_ofs_delta_children(off_t offset, - int *first_index, int *last_index, - enum object_type type) + int *first_index, int *last_index) { - int first = find_ofs_delta(offset, type); + int first = find_ofs_delta(offset); int last = first; int end = nr_ofs_deltas - 1; @@ -668,7 +668,7 @@ static int compare_ref_delta_bases(const struct object_id *oid1, return oidcmp(oid1, oid2); } -static int find_ref_delta(const struct object_id *oid, enum object_type type) +static int find_ref_delta(const struct object_id *oid) { int first = 0, last = nr_ref_deltas; @@ -678,7 +678,8 @@ static int find_ref_delta(const struct object_id *oid, enum object_type type) int cmp; cmp = compare_ref_delta_bases(oid, &delta->oid, - type, objects[delta->obj_no].type); + OBJ_REF_DELTA, + objects[delta->obj_no].type); if (!cmp) return next; if (cmp < 0) { @@ -691,10 +692,9 @@ static int find_ref_delta(const struct object_id *oid, enum object_type type) } static void find_ref_delta_children(const struct object_id *oid, - int *first_index, int *last_index, - enum object_type type) + int *first_index, int *last_index) { - int first = find_ref_delta(oid, type); + int first = find_ref_delta(oid); int last = first; int end = nr_ref_deltas - 1; @@ -983,12 +983,10 @@ static struct base_data *find_unresolved_deltas_1(struct base_data *base, { if (base->ref_last == -1 && base->ofs_last == -1) { find_ref_delta_children(&base->obj->idx.oid, - &base->ref_first, &base->ref_last, - OBJ_REF_DELTA); + &base->ref_first, &base->ref_last); find_ofs_delta_children(base->obj->idx.offset, - &base->ofs_first, &base->ofs_last, - OBJ_OFS_DELTA); + &base->ofs_first, &base->ofs_last); if (base->ref_last == -1 && base->ofs_last == -1) { free(base->data); From 46e6fb1e4462fa4363b3d6ace827140dd1c4bc39 Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Mon, 24 Aug 2020 12:16:34 -0700 Subject: [PATCH 3/7] index-pack: unify threaded and unthreaded code Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- builtin/index-pack.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/builtin/index-pack.c b/builtin/index-pack.c index a8444d550b..357e03b5aa 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1211,15 +1211,7 @@ static void resolve_deltas(void) cleanup_thread(); return; } - - for (i = 0; i < nr_objects; i++) { - struct object_entry *obj = &objects[i]; - - if (is_delta_type(obj->type)) - continue; - resolve_base(obj); - display_progress(progress, nr_resolved_deltas); - } + threaded_second_pass(¬hread_data); } /* From a7f7e84a496d194ceeeba9827d17c4c206932d2b Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Mon, 24 Aug 2020 12:16:35 -0700 Subject: [PATCH 4/7] index-pack: remove redundant child field This is refactoring 1 of 2 to simplify struct base_data. In index-pack, each thread maintains a doubly-linked list of the delta chain that it is currently processing (the "base" and "child" pointers in struct base_data). When a thread exceeds the delta base cache limit and needs to reclaim memory, it uses the "child" pointers to traverse the lineage, reclaiming the memory of the eldest delta bases first. A subsequent patch will perform memory reclaiming in a different way and will thus no longer need the "child" pointer. Because the "child" pointer is redundant even now, remove it so that the aforementioned subsequent patch will be clearer. In the meantime, reclaim memory in the reverse order of the "base" pointers. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- builtin/index-pack.c | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 357e03b5aa..032716553c 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -34,7 +34,6 @@ struct object_stat { struct base_data { struct base_data *base; - struct base_data *child; struct object_entry *obj; void *data; unsigned long size; @@ -44,7 +43,6 @@ struct base_data { struct thread_local { pthread_t thread; - struct base_data *base_cache; size_t base_cache_used; int pack_fd; }; @@ -380,27 +378,37 @@ static void free_base_data(struct base_data *c) } } -static void prune_base_data(struct base_data *retain) +static void prune_base_data(struct base_data *youngest_child) { struct base_data *b; struct thread_local *data = get_thread_data(); - for (b = data->base_cache; - data->base_cache_used > delta_base_cache_limit && b; - b = b->child) { - if (b->data && b != retain) - free_base_data(b); + struct base_data **ancestry = NULL; + size_t nr = 0, alloc = 0; + ssize_t i; + + if (data->base_cache_used <= delta_base_cache_limit) + return; + + /* + * Free all ancestors of youngest_child until we have enough space, + * starting with the oldest. (We cannot free youngest_child itself.) + */ + for (b = youngest_child->base; b != NULL; b = b->base) { + ALLOC_GROW(ancestry, nr + 1, alloc); + ancestry[nr++] = b; } + for (i = nr - 1; + i >= 0 && data->base_cache_used > delta_base_cache_limit; + i--) { + if (ancestry[i]->data) + free_base_data(ancestry[i]); + } + free(ancestry); } static void link_base_data(struct base_data *base, struct base_data *c) { - if (base) - base->child = c; - else - get_thread_data()->base_cache = c; - c->base = base; - c->child = NULL; if (c->data) get_thread_data()->base_cache_used += c->size; prune_base_data(c); @@ -408,11 +416,6 @@ static void link_base_data(struct base_data *base, struct base_data *c) static void unlink_base_data(struct base_data *c) { - struct base_data *base = c->base; - if (base) - base->child = NULL; - else - get_thread_data()->base_cache = NULL; free_base_data(c); } From b4718cae51f9630f2901c452ca4e4dfbf68f47ef Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Mon, 24 Aug 2020 12:16:36 -0700 Subject: [PATCH 5/7] index-pack: calculate {ref,ofs}_{first,last} early This is refactoring 2 of 2 to simplify struct base_data. Whenever we make a struct base_data, immediately calculate its delta children. This eliminates confusion as to when the {ref,ofs}_{first,last} fields are initialized. Before this patch, the delta children were calculated at the last possible moment. This allowed the members of struct base_data to be populated in any order, superficially useful when we have the object contents before the struct object_entry. But this makes reasoning about the state of struct base_data more complicated, hence this patch. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- builtin/index-pack.c | 123 +++++++++++++++++++++---------------------- 1 file changed, 60 insertions(+), 63 deletions(-) diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 032716553c..e98b11ab37 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -33,12 +33,15 @@ struct object_stat { }; struct base_data { + /* Initialized by make_base(). */ struct base_data *base; struct object_entry *obj; - void *data; - unsigned long size; int ref_first, ref_last; int ofs_first, ofs_last; + + /* Not initialized by make_base(). */ + void *data; + unsigned long size; }; struct thread_local { @@ -362,14 +365,6 @@ static void set_thread_data(struct thread_local *data) pthread_setspecific(key, data); } -static struct base_data *alloc_base_data(void) -{ - struct base_data *base = xcalloc(1, sizeof(struct base_data)); - base->ref_last = -1; - base->ofs_last = -1; - return base; -} - static void free_base_data(struct base_data *c) { if (c->data) { @@ -406,19 +401,6 @@ static void prune_base_data(struct base_data *youngest_child) free(ancestry); } -static void link_base_data(struct base_data *base, struct base_data *c) -{ - c->base = base; - if (c->data) - get_thread_data()->base_cache_used += c->size; - prune_base_data(c); -} - -static void unlink_base_data(struct base_data *c) -{ - free_base_data(c); -} - static int is_delta_type(enum object_type type) { return (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA); @@ -929,10 +911,25 @@ static void *get_base_data(struct base_data *c) return c->data; } -static void resolve_delta(struct object_entry *delta_obj, - struct base_data *base, struct base_data *result) +static struct base_data *make_base(struct object_entry *obj, + struct base_data *parent) { - void *base_data, *delta_data; + struct base_data *base = xcalloc(1, sizeof(struct base_data)); + base->base = parent; + base->obj = obj; + find_ref_delta_children(&obj->idx.oid, + &base->ref_first, &base->ref_last); + find_ofs_delta_children(obj->idx.offset, + &base->ofs_first, &base->ofs_last); + return base; +} + +static struct base_data *resolve_delta(struct object_entry *delta_obj, + struct base_data *base) +{ + void *base_data, *delta_data, *result_data; + struct base_data *result; + unsigned long result_size; if (show_stat) { int i = delta_obj - objects; @@ -946,19 +943,31 @@ static void resolve_delta(struct object_entry *delta_obj, } delta_data = get_data_from_pack(delta_obj); base_data = get_base_data(base); - result->obj = delta_obj; - result->data = patch_delta(base_data, base->size, - delta_data, delta_obj->size, &result->size); + result_data = patch_delta(base_data, base->size, + delta_data, delta_obj->size, &result_size); free(delta_data); - if (!result->data) + if (!result_data) bad_object(delta_obj->idx.offset, _("failed to apply delta")); - hash_object_file(the_hash_algo, result->data, result->size, + hash_object_file(the_hash_algo, result_data, result_size, type_name(delta_obj->real_type), &delta_obj->idx.oid); - sha1_object(result->data, NULL, result->size, delta_obj->real_type, + sha1_object(result_data, NULL, result_size, delta_obj->real_type, &delta_obj->idx.oid); + + result = make_base(delta_obj, base); + if (result->ref_last == -1 && result->ofs_last == -1) { + free(result_data); + } else { + result->data = result_data; + result->size = result_size; + get_thread_data()->base_cache_used += result->size; + prune_base_data(result); + } + counter_lock(); nr_resolved_deltas++; counter_unlock(); + + return result; } /* @@ -984,24 +993,9 @@ static int compare_and_swap_type(signed char *type, static struct base_data *find_unresolved_deltas_1(struct base_data *base, struct base_data *prev_base) { - if (base->ref_last == -1 && base->ofs_last == -1) { - find_ref_delta_children(&base->obj->idx.oid, - &base->ref_first, &base->ref_last); - - find_ofs_delta_children(base->obj->idx.offset, - &base->ofs_first, &base->ofs_last); - - if (base->ref_last == -1 && base->ofs_last == -1) { - free(base->data); - return NULL; - } - - link_base_data(prev_base, base); - } - if (base->ref_first <= base->ref_last) { struct object_entry *child = objects + ref_deltas[base->ref_first].obj_no; - struct base_data *result = alloc_base_data(); + struct base_data *result; if (!compare_and_swap_type(&child->real_type, OBJ_REF_DELTA, base->obj->real_type)) @@ -1009,7 +1003,7 @@ static struct base_data *find_unresolved_deltas_1(struct base_data *base, (uintmax_t)child->idx.offset, oid_to_hex(&base->obj->idx.oid)); - resolve_delta(child, base, result); + result = resolve_delta(child, base); if (base->ref_first == base->ref_last && base->ofs_last == -1) free_base_data(base); @@ -1019,11 +1013,11 @@ static struct base_data *find_unresolved_deltas_1(struct base_data *base, if (base->ofs_first <= base->ofs_last) { struct object_entry *child = objects + ofs_deltas[base->ofs_first].obj_no; - struct base_data *result = alloc_base_data(); + struct base_data *result; assert(child->real_type == OBJ_OFS_DELTA); child->real_type = base->obj->real_type; - resolve_delta(child, base, result); + result = resolve_delta(child, base); if (base->ofs_first == base->ofs_last) free_base_data(base); @@ -1031,7 +1025,7 @@ static struct base_data *find_unresolved_deltas_1(struct base_data *base, return result; } - unlink_base_data(base); + free_base_data(base); return NULL; } @@ -1074,9 +1068,8 @@ static int compare_ref_delta_entry(const void *a, const void *b) static void resolve_base(struct object_entry *obj) { - struct base_data *base_obj = alloc_base_data(); - base_obj->obj = obj; - base_obj->data = NULL; + struct base_data *base_obj = make_base(obj, NULL); + find_unresolved_deltas(base_obj); } @@ -1369,22 +1362,26 @@ static void fix_unresolved_deltas(struct hashfile *f) for (i = 0; i < nr_ref_deltas; i++) { struct ref_delta_entry *d = sorted_by_pos[i]; enum object_type type; - struct base_data *base_obj = alloc_base_data(); + struct base_data *base; + void *data; + unsigned long size; + struct object_entry *obj; if (objects[d->obj_no].real_type != OBJ_REF_DELTA) continue; - base_obj->data = read_object_file(&d->oid, &type, - &base_obj->size); - if (!base_obj->data) + data = read_object_file(&d->oid, &type, &size); + if (!data) continue; if (check_object_signature(the_repository, &d->oid, - base_obj->data, base_obj->size, + data, size, type_name(type))) die(_("local object %s is corrupt"), oid_to_hex(&d->oid)); - base_obj->obj = append_obj_to_pack(f, d->oid.hash, - base_obj->data, base_obj->size, type); - find_unresolved_deltas(base_obj); + obj = append_obj_to_pack(f, d->oid.hash, data, size, type); + base = make_base(obj, NULL); + base->data = data; + base->size = size; + find_unresolved_deltas(base); display_progress(progress, nr_resolved_deltas); } free(sorted_by_pos); From ee6f058384aa61ec175de9e45f413f209c6bf1dc Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Mon, 24 Aug 2020 12:16:37 -0700 Subject: [PATCH 6/7] index-pack: make resolve_delta() assume base data A subsequent commit will make the quantum of work smaller, necessitating more locking. This commit allows resolve_delta() to be called outside the lock. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- builtin/index-pack.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/builtin/index-pack.c b/builtin/index-pack.c index e98b11ab37..c6d2acc13a 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -927,7 +927,7 @@ static struct base_data *make_base(struct object_entry *obj, static struct base_data *resolve_delta(struct object_entry *delta_obj, struct base_data *base) { - void *base_data, *delta_data, *result_data; + void *delta_data, *result_data; struct base_data *result; unsigned long result_size; @@ -942,8 +942,8 @@ static struct base_data *resolve_delta(struct object_entry *delta_obj, obj_stat[i].base_object_no = j; } delta_data = get_data_from_pack(delta_obj); - base_data = get_base_data(base); - result_data = patch_delta(base_data, base->size, + assert(base->data); + result_data = patch_delta(base->data, base->size, delta_data, delta_obj->size, &result_size); free(delta_data); if (!result_data) @@ -1003,6 +1003,7 @@ static struct base_data *find_unresolved_deltas_1(struct base_data *base, (uintmax_t)child->idx.offset, oid_to_hex(&base->obj->idx.oid)); + get_base_data(base); result = resolve_delta(child, base); if (base->ref_first == base->ref_last && base->ofs_last == -1) free_base_data(base); @@ -1017,6 +1018,7 @@ static struct base_data *find_unresolved_deltas_1(struct base_data *base, assert(child->real_type == OBJ_OFS_DELTA); child->real_type = base->obj->real_type; + get_base_data(base); result = resolve_delta(child, base); if (base->ofs_first == base->ofs_last) free_base_data(base); From f08cbf60fe11507b5ff722ceae95dfb86ce654ee Mon Sep 17 00:00:00 2001 From: Jonathan Tan Date: Tue, 8 Sep 2020 12:48:35 -0700 Subject: [PATCH 7/7] index-pack: make quantum of work smaller Currently, when index-pack resolves deltas, it does not split up delta trees into threads: each delta base root (an object that is not a REF_DELTA or OFS_DELTA) can go into its own thread, but all deltas on that root (direct or indirect) are processed in the same thread. This is a problem when a repository contains a large text file (thus, delta-able) that is modified many times - delta resolution time during fetching is dominated by processing the deltas corresponding to that text file. This patch contains a solution to that. When cloning using git -c core.deltabasecachelimit=1g clone \ https://fuchsia.googlesource.com/third_party/vulkan-cts on my laptop, clone time improved from 3m2s to 2m5s (using 3 threads, which is the default). The solution is to have a global work stack. This stack contains delta bases (objects, whether appearing directly in the packfile or generated by delta resolution, that themselves have delta children) that need to be processed; whenever a thread needs work, it peeks at the top of the stack and processes its next unprocessed child. If a thread finds the stack empty, it will look for more delta base roots to push on the stack instead. The main weakness of having a global work stack is that more time is spent in the mutex, but profiling has shown that most time is spent in the resolution of the deltas themselves, so this shouldn't be an issue in practice. In any case, experimentation (as described in the clone command above) shows that this patch is a net improvement. Signed-off-by: Jonathan Tan Signed-off-by: Junio C Hamano --- builtin/index-pack.c | 348 +++++++++++++++++++++++++------------------ 1 file changed, 200 insertions(+), 148 deletions(-) diff --git a/builtin/index-pack.c b/builtin/index-pack.c index c6d2acc13a..8da4031e72 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -38,15 +38,56 @@ struct base_data { struct object_entry *obj; int ref_first, ref_last; int ofs_first, ofs_last; + /* + * Threads should increment retain_data if they are about to call + * patch_delta() using this struct's data as a base, and decrement this + * when they are done. While retain_data is nonzero, this struct's data + * will not be freed even if the delta base cache limit is exceeded. + */ + int retain_data; + /* + * The number of direct children that have not been fully processed + * (entered work_head, entered done_head, left done_head). When this + * number reaches zero, this struct base_data can be freed. + */ + int children_remaining; /* Not initialized by make_base(). */ + struct list_head list; void *data; unsigned long size; }; +/* + * Stack of struct base_data that have unprocessed children. + * threaded_second_pass() uses this as a source of work (the other being the + * objects array). + * + * Guarded by work_mutex. + */ +static LIST_HEAD(work_head); + +/* + * Stack of struct base_data that have children, all of whom have been + * processed or are being processed, and at least one child is being processed. + * These struct base_data must be kept around until the last child is + * processed. + * + * Guarded by work_mutex. + */ +static LIST_HEAD(done_head); + +/* + * All threads share one delta base cache. + * + * base_cache_used is guarded by work_mutex, and base_cache_limit is read-only + * in a thread. + */ +static size_t base_cache_used; +static size_t base_cache_limit; + struct thread_local { pthread_t thread; - size_t base_cache_used; int pack_fd; }; @@ -369,36 +410,38 @@ static void free_base_data(struct base_data *c) { if (c->data) { FREE_AND_NULL(c->data); - get_thread_data()->base_cache_used -= c->size; + base_cache_used -= c->size; } } -static void prune_base_data(struct base_data *youngest_child) +static void prune_base_data(struct base_data *retain) { - struct base_data *b; - struct thread_local *data = get_thread_data(); - struct base_data **ancestry = NULL; - size_t nr = 0, alloc = 0; - ssize_t i; + struct list_head *pos; - if (data->base_cache_used <= delta_base_cache_limit) + if (base_cache_used <= base_cache_limit) return; - /* - * Free all ancestors of youngest_child until we have enough space, - * starting with the oldest. (We cannot free youngest_child itself.) - */ - for (b = youngest_child->base; b != NULL; b = b->base) { - ALLOC_GROW(ancestry, nr + 1, alloc); - ancestry[nr++] = b; + list_for_each_prev(pos, &done_head) { + struct base_data *b = list_entry(pos, struct base_data, list); + if (b->retain_data || b == retain) + continue; + if (b->data) { + free_base_data(b); + if (base_cache_used <= base_cache_limit) + return; + } } - for (i = nr - 1; - i >= 0 && data->base_cache_used > delta_base_cache_limit; - i--) { - if (ancestry[i]->data) - free_base_data(ancestry[i]); + + list_for_each_prev(pos, &work_head) { + struct base_data *b = list_entry(pos, struct base_data, list); + if (b->retain_data || b == retain) + continue; + if (b->data) { + free_base_data(b); + if (base_cache_used <= base_cache_limit) + return; + } } - free(ancestry); } static int is_delta_type(enum object_type type) @@ -851,15 +894,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry, } /* - * This function is part of find_unresolved_deltas(). There are two - * walkers going in the opposite ways. - * - * The first one in find_unresolved_deltas() traverses down from - * parent node to children, deflating nodes along the way. However, - * memory for deflated nodes is limited by delta_base_cache_limit, so - * at some point parent node's deflated content may be freed. - * - * The second walker is this function, which goes from current node up + * Walk from current node up * to top parent if necessary to deflate the node. In normal * situation, its parent node would be already deflated, so it just * needs to apply delta. @@ -887,7 +922,7 @@ static void *get_base_data(struct base_data *c) if (!delta_nr) { c->data = get_data_from_pack(obj); c->size = obj->size; - get_thread_data()->base_cache_used += c->size; + base_cache_used += c->size; prune_base_data(c); } for (; delta_nr > 0; delta_nr--) { @@ -903,7 +938,7 @@ static void *get_base_data(struct base_data *c) free(raw); if (!c->data) bad_object(obj->idx.offset, _("failed to apply delta")); - get_thread_data()->base_cache_used += c->size; + base_cache_used += c->size; prune_base_data(c); } free(delta); @@ -921,6 +956,8 @@ static struct base_data *make_base(struct object_entry *obj, &base->ref_first, &base->ref_last); find_ofs_delta_children(obj->idx.offset, &base->ofs_first, &base->ofs_last); + base->children_remaining = base->ref_last - base->ref_first + + base->ofs_last - base->ofs_first + 2; return base; } @@ -954,14 +991,8 @@ static struct base_data *resolve_delta(struct object_entry *delta_obj, &delta_obj->idx.oid); result = make_base(delta_obj, base); - if (result->ref_last == -1 && result->ofs_last == -1) { - free(result_data); - } else { - result->data = result_data; - result->size = result_size; - get_thread_data()->base_cache_used += result->size; - prune_base_data(result); - } + result->data = result_data; + result->size = result_size; counter_lock(); nr_resolved_deltas++; @@ -970,86 +1001,6 @@ static struct base_data *resolve_delta(struct object_entry *delta_obj, return result; } -/* - * Standard boolean compare-and-swap: atomically check whether "*type" is - * "want"; if so, swap in "set" and return true. Otherwise, leave it untouched - * and return false. - */ -static int compare_and_swap_type(signed char *type, - enum object_type want, - enum object_type set) -{ - enum object_type old; - - type_cas_lock(); - old = *type; - if (old == want) - *type = set; - type_cas_unlock(); - - return old == want; -} - -static struct base_data *find_unresolved_deltas_1(struct base_data *base, - struct base_data *prev_base) -{ - if (base->ref_first <= base->ref_last) { - struct object_entry *child = objects + ref_deltas[base->ref_first].obj_no; - struct base_data *result; - - if (!compare_and_swap_type(&child->real_type, OBJ_REF_DELTA, - base->obj->real_type)) - die("REF_DELTA at offset %"PRIuMAX" already resolved (duplicate base %s?)", - (uintmax_t)child->idx.offset, - oid_to_hex(&base->obj->idx.oid)); - - get_base_data(base); - result = resolve_delta(child, base); - if (base->ref_first == base->ref_last && base->ofs_last == -1) - free_base_data(base); - - base->ref_first++; - return result; - } - - if (base->ofs_first <= base->ofs_last) { - struct object_entry *child = objects + ofs_deltas[base->ofs_first].obj_no; - struct base_data *result; - - assert(child->real_type == OBJ_OFS_DELTA); - child->real_type = base->obj->real_type; - get_base_data(base); - result = resolve_delta(child, base); - if (base->ofs_first == base->ofs_last) - free_base_data(base); - - base->ofs_first++; - return result; - } - - free_base_data(base); - return NULL; -} - -static void find_unresolved_deltas(struct base_data *base) -{ - struct base_data *new_base, *prev_base = NULL; - for (;;) { - new_base = find_unresolved_deltas_1(base, prev_base); - - if (new_base) { - prev_base = base; - base = new_base; - } else { - free(base); - base = prev_base; - if (!base) - return; - prev_base = base->base; - } - } -} - static int compare_ofs_delta_entry(const void *a, const void *b) { const struct ofs_delta_entry *delta_a = a; @@ -1068,33 +1019,131 @@ static int compare_ref_delta_entry(const void *a, const void *b) return oidcmp(&delta_a->oid, &delta_b->oid); } -static void resolve_base(struct object_entry *obj) -{ - struct base_data *base_obj = make_base(obj, NULL); - - find_unresolved_deltas(base_obj); -} - static void *threaded_second_pass(void *data) { - set_thread_data(data); + if (data) + set_thread_data(data); for (;;) { - int i; - counter_lock(); - display_progress(progress, nr_resolved_deltas); - counter_unlock(); + struct base_data *parent = NULL; + struct object_entry *child_obj; + struct base_data *child; + work_lock(); - while (nr_dispatched < nr_objects && - is_delta_type(objects[nr_dispatched].type)) - nr_dispatched++; - if (nr_dispatched >= nr_objects) { - work_unlock(); - break; + if (list_empty(&work_head)) { + /* + * Take an object from the object array. + */ + while (nr_dispatched < nr_objects && + is_delta_type(objects[nr_dispatched].type)) + nr_dispatched++; + if (nr_dispatched >= nr_objects) { + work_unlock(); + break; + } + child_obj = &objects[nr_dispatched++]; + } else { + /* + * Peek at the top of the stack, and take a child from + * it. + */ + parent = list_first_entry(&work_head, struct base_data, + list); + + if (parent->ref_first <= parent->ref_last) { + int offset = ref_deltas[parent->ref_first++].obj_no; + child_obj = objects + offset; + if (child_obj->real_type != OBJ_REF_DELTA) + die("REF_DELTA at offset %"PRIuMAX" already resolved (duplicate base %s?)", + (uintmax_t) child_obj->idx.offset, + oid_to_hex(&parent->obj->idx.oid)); + child_obj->real_type = parent->obj->real_type; + } else { + child_obj = objects + + ofs_deltas[parent->ofs_first++].obj_no; + assert(child_obj->real_type == OBJ_OFS_DELTA); + child_obj->real_type = parent->obj->real_type; + } + + if (parent->ref_first > parent->ref_last && + parent->ofs_first > parent->ofs_last) { + /* + * This parent has run out of children, so move + * it to done_head. + */ + list_del(&parent->list); + list_add(&parent->list, &done_head); + } + + /* + * Ensure that the parent has data, since we will need + * it later. + * + * NEEDSWORK: If parent data needs to be reloaded, this + * prolongs the time that the current thread spends in + * the mutex. A mitigating factor is that parent data + * needs to be reloaded only if the delta base cache + * limit is exceeded, so in the typical case, this does + * not happen. + */ + get_base_data(parent); + parent->retain_data++; } - i = nr_dispatched++; work_unlock(); - resolve_base(&objects[i]); + if (parent) { + child = resolve_delta(child_obj, parent); + if (!child->children_remaining) + FREE_AND_NULL(child->data); + } else { + child = make_base(child_obj, NULL); + if (child->children_remaining) { + /* + * Since this child has its own delta children, + * we will need this data in the future. + * Inflate now so that future iterations will + * have access to this object's data while + * outside the work mutex. + */ + child->data = get_data_from_pack(child_obj); + child->size = child_obj->size; + } + } + + work_lock(); + if (parent) + parent->retain_data--; + if (child->data) { + /* + * This child has its own children, so add it to + * work_head. + */ + list_add(&child->list, &work_head); + base_cache_used += child->size; + prune_base_data(NULL); + } else { + /* + * This child does not have its own children. It may be + * the last descendant of its ancestors; free those + * that we can. + */ + struct base_data *p = parent; + + while (p) { + struct base_data *next_p; + + p->children_remaining--; + if (p->children_remaining) + break; + + next_p = p->base; + free_base_data(p); + list_del(&p->list); + free(p); + + p = next_p; + } + } + work_unlock(); } return NULL; } @@ -1195,6 +1244,7 @@ static void resolve_deltas(void) nr_ref_deltas + nr_ofs_deltas); nr_dispatched = 0; + base_cache_limit = delta_base_cache_limit * nr_threads; if (nr_threads > 1 || getenv("GIT_FORCE_THREADS")) { init_thread(); for (i = 0; i < nr_threads; i++) { @@ -1364,10 +1414,8 @@ static void fix_unresolved_deltas(struct hashfile *f) for (i = 0; i < nr_ref_deltas; i++) { struct ref_delta_entry *d = sorted_by_pos[i]; enum object_type type; - struct base_data *base; void *data; unsigned long size; - struct object_entry *obj; if (objects[d->obj_no].real_type != OBJ_REF_DELTA) continue; @@ -1379,11 +1427,15 @@ static void fix_unresolved_deltas(struct hashfile *f) data, size, type_name(type))) die(_("local object %s is corrupt"), oid_to_hex(&d->oid)); - obj = append_obj_to_pack(f, d->oid.hash, data, size, type); - base = make_base(obj, NULL); - base->data = data; - base->size = size; - find_unresolved_deltas(base); + + /* + * Add this as an object to the objects array and call + * threaded_second_pass() (which will pick up the added + * object). + */ + append_obj_to_pack(f, d->oid.hash, data, size, type); + threaded_second_pass(NULL); + display_progress(progress, nr_resolved_deltas); } free(sorted_by_pos);