From 155dc8447d3590ea856bb17919bfc85172b52e09 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 16 Sep 2024 10:50:03 +0200 Subject: [PATCH 1/6] refs: properly apply exclude patterns to namespaced refs Reference namespaces allow commands like git-upload-pack(1) to serve different sets of references to the client depending on which namespace is enabled, which is for example useful in fork networks. Namespaced refs are stored with a `refs/namespaces/$namespace` prefix, but all the user will ultimately see is a stripped version where that prefix is removed. The way that this interacts with "transfer.hideRefs" is not immediately obvious: the hidden refs can either apply to the stripped references, or to the non-stripped ones that still have the namespace prefix. In fact, the "transfer.hideRefs" machinery does the former and applies to the stripped reference by default, but rules can have "^" prefixed to switch this behaviour to instead match against the full reference name. Namespaces are exclusively handled at the generic "refs" layer, the respective backends have no clue that such a thing even exists. This also has the consequence that they cannot handle hiding references as soon as reference namespaces come into play because they neither know whether a namespace is active, nor do they know how to strip references if they are active. Handling such exclude patterns in `refs_for_each_namespaced_ref()` and `refs_for_each_fullref_in_prefixes()` is broken though, as both support that the user passes both namespaces and exclude patterns. In the case where both are set we will exclude references with unstripped names, even though we really wanted to exclude references based on their stripped names. This only surfaces when: - A repository uses reference namespaces. - "transfer.hideRefs" is active. - The namespaced references are packed into the "packed-refs" file. None of our tests exercise this scenario, and thus we haven't ever hit it. While t5509 exercises both (1) and (2), it does not happen to hit (3). It is trivial to demonstrate the bug though by explicitly packing refs in the tests, and then we indeed surface the breakage. Fix this bug by prefixing exclude patterns with the namespace in the generic layer. The newly introduced function will be used outside of "refs.c" in the next patch, so we add a declaration to "refs.h". Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs.c | 35 ++++++++++++++++++++++++++++---- refs.h | 9 ++++++++ t/t5509-fetch-push-namespaces.sh | 1 + 3 files changed, 41 insertions(+), 4 deletions(-) diff --git a/refs.c b/refs.c index ceb72d4bd7..b3a367ea12 100644 --- a/refs.c +++ b/refs.c @@ -1517,6 +1517,19 @@ const char **hidden_refs_to_excludes(const struct strvec *hide_refs) return hide_refs->v; } +const char **get_namespaced_exclude_patterns(const char **exclude_patterns, + const char *namespace, + struct strvec *out) +{ + if (!namespace || !*namespace || !exclude_patterns || !*exclude_patterns) + return exclude_patterns; + + for (size_t i = 0; exclude_patterns[i]; i++) + strvec_pushf(out, "%s%s", namespace, exclude_patterns[i]); + + return out->v; +} + const char *find_descendant_ref(const char *dirname, const struct string_list *extras, const struct string_list *skip) @@ -1634,11 +1647,19 @@ int refs_for_each_namespaced_ref(struct ref_store *refs, const char **exclude_patterns, each_ref_fn fn, void *cb_data) { - struct strbuf buf = STRBUF_INIT; + struct strvec namespaced_exclude_patterns = STRVEC_INIT; + struct strbuf prefix = STRBUF_INIT; int ret; - strbuf_addf(&buf, "%srefs/", get_git_namespace()); - ret = do_for_each_ref(refs, buf.buf, exclude_patterns, fn, 0, 0, cb_data); - strbuf_release(&buf); + + exclude_patterns = get_namespaced_exclude_patterns(exclude_patterns, + get_git_namespace(), + &namespaced_exclude_patterns); + + strbuf_addf(&prefix, "%srefs/", get_git_namespace()); + ret = do_for_each_ref(refs, prefix.buf, exclude_patterns, fn, 0, 0, cb_data); + + strvec_clear(&namespaced_exclude_patterns); + strbuf_release(&prefix); return ret; } @@ -1719,6 +1740,7 @@ int refs_for_each_fullref_in_prefixes(struct ref_store *ref_store, const char **exclude_patterns, each_ref_fn fn, void *cb_data) { + struct strvec namespaced_exclude_patterns = STRVEC_INIT; struct string_list prefixes = STRING_LIST_INIT_DUP; struct string_list_item *prefix; struct strbuf buf = STRBUF_INIT; @@ -1730,6 +1752,10 @@ int refs_for_each_fullref_in_prefixes(struct ref_store *ref_store, strbuf_addstr(&buf, namespace); namespace_len = buf.len; + exclude_patterns = get_namespaced_exclude_patterns(exclude_patterns, + namespace, + &namespaced_exclude_patterns); + for_each_string_list_item(prefix, &prefixes) { strbuf_addstr(&buf, prefix->string); ret = refs_for_each_fullref_in(ref_store, buf.buf, @@ -1739,6 +1765,7 @@ int refs_for_each_fullref_in_prefixes(struct ref_store *ref_store, strbuf_setlen(&buf, namespace_len); } + strvec_clear(&namespaced_exclude_patterns); string_list_clear(&prefixes, 0); strbuf_release(&buf); return ret; diff --git a/refs.h b/refs.h index f8b919a138..3f774e96d1 100644 --- a/refs.h +++ b/refs.h @@ -859,6 +859,15 @@ int ref_is_hidden(const char *, const char *, const struct strvec *); */ const char **hidden_refs_to_excludes(const struct strvec *hide_refs); +/* + * Prefix all exclude patterns with the namespace, if any. This is required + * because exclude patterns apply to the stripped reference name, not the full + * reference name with the namespace. + */ +const char **get_namespaced_exclude_patterns(const char **exclude_patterns, + const char *namespace, + struct strvec *out); + /* Is this a per-worktree ref living in the refs/ namespace? */ int is_per_worktree_ref(const char *refname); diff --git a/t/t5509-fetch-push-namespaces.sh b/t/t5509-fetch-push-namespaces.sh index 05090feaf9..98e8352b6c 100755 --- a/t/t5509-fetch-push-namespaces.sh +++ b/t/t5509-fetch-push-namespaces.sh @@ -96,6 +96,7 @@ test_expect_success 'hide namespaced refs with transfer.hideRefs' ' ' test_expect_success 'check that transfer.hideRefs does not match unstripped refs' ' + git -C pushee pack-refs --all && GIT_NAMESPACE=namespace \ git -C pushee -c transfer.hideRefs=refs/namespaces/namespace/refs/tags \ ls-remote "ext::git %s ." >actual && From d8faf50c36e94bdf47f57e2f1e7ea1b767e0ea00 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 16 Sep 2024 10:50:05 +0200 Subject: [PATCH 2/6] builtin/receive-pack: fix exclude patterns when announcing refs In `write_head_info()` we announce references to the remote client. We need to honor "transfer.hideRefs" here so that we do not announce any references that the client shouldn't be able to learn about. This is done via two separate mechanisms: - We hand over exclude patterns to the reference backend. We can only honor "plain" exclude patterns here that do not have prefixes with special meaning such as "^" or "!". Filtering down the references is handled by `hidden_refs_to_excludes()`. - In `show_ref_cb()` we perform a second check against hidden refs. For one this is done such that we can handle those special prefixes. And second, handling exclude patterns in ref backends is optional, so we also have to handle "normal" patterns. The special-meaning "^" prefix alters whether a hidden ref applies to the namespace-stripped reference name or the full name. So while we would usually call `refs_for_each_namespaced_ref()` to only get those references in the current namespace, we can't because we'd get the already-rewritten reference names. Instead, we are forced to use `refs_for_each_fullref_in()` and then manually strip away the namespace prefix such that we have access to both names. But this also means that we do not get namespace handling for exclude patterns, which `refs_for_each_namespaced_ref()` brings for free. This results in a bug because we potentially end up hiding away references based on their namespaced name and not on the stripped name as we really should be doing. Fix this by manually rewriting the exclude patterns to their namespaced variants. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/receive-pack.c | 18 ++++++++++++++++-- t/t5509-fetch-push-namespaces.sh | 8 ++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index 3f35140e48..478c62ca83 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -339,12 +339,26 @@ static void show_one_alternate_ref(const struct object_id *oid, static void write_head_info(void) { static struct oidset seen = OIDSET_INIT; + struct strvec excludes_vector = STRVEC_INIT; + const char **exclude_patterns; + + /* + * We need access to the reference names both with and without their + * namespace and thus cannot use `refs_for_each_namespaced_ref()`. We + * thus have to adapt exclude patterns to carry the namespace prefix + * ourselves. + */ + exclude_patterns = get_namespaced_exclude_patterns( + hidden_refs_to_excludes(&hidden_refs), + get_git_namespace(), &excludes_vector); refs_for_each_fullref_in(get_main_ref_store(the_repository), "", - hidden_refs_to_excludes(&hidden_refs), - show_ref_cb, &seen); + exclude_patterns, show_ref_cb, &seen); for_each_alternate_ref(show_one_alternate_ref, &seen); + oidset_clear(&seen); + strvec_clear(&excludes_vector); + if (!sent_capabilities) show_ref("capabilities^{}", null_oid()); diff --git a/t/t5509-fetch-push-namespaces.sh b/t/t5509-fetch-push-namespaces.sh index 98e8352b6c..f029ae0d28 100755 --- a/t/t5509-fetch-push-namespaces.sh +++ b/t/t5509-fetch-push-namespaces.sh @@ -124,6 +124,14 @@ test_expect_success 'try to update a ref that is not hidden' ' git -C original push pushee-namespaced main ' +test_expect_success 'git-receive-pack(1) with transfer.hideRefs does not match unstripped refs during advertisement' ' + git -C pushee update-ref refs/namespaces/namespace/refs/heads/foo/1 refs/namespaces/namespace/refs/heads/main && + git -C pushee pack-refs --all && + test_config -C pushee transfer.hideRefs refs/namespaces/namespace/refs/heads/foo && + GIT_TRACE_PACKET="$(pwd)/trace" git -C original push pushee-namespaced main && + test_grep refs/heads/foo/1 trace +' + test_expect_success 'try to update a hidden full ref' ' test_config -C pushee transfer.hideRefs "^refs/namespaces/namespace/refs/heads/main" && test_must_fail git -C original push pushee-namespaced main From 428672a3b16f65c01f0bb78d266b9fc168a94eb3 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 16 Sep 2024 10:50:08 +0200 Subject: [PATCH 3/6] Makefile: stop listing test library objects twice Whenever one adds another test library compilation unit one has to wire it up twice in the Makefile: once to append it to `UNIT_TEST_OBJS`, and once to append it to the `UNIT_TEST_PROGS` target. Ideally, we'd just reuse the `UNIT_TEST_OBJS` variable in the target so that we can avoid the duplication. But it also contains all the objects for our test programs, each of which contains a `cmd_main()`, and thus we cannot link them all into the target executable. Refactor the code such that `UNIT_TEST_OBJS` does not contain the unit test program objects anymore, which we can instead manually append to the `OBJECTS` variable. Like this, the former variable now only contains objects for test libraries and can thus be reused. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Makefile | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index bdea061971..4ed5f1f50a 100644 --- a/Makefile +++ b/Makefile @@ -1356,7 +1356,6 @@ UNIT_TEST_PROGRAMS += t-strvec UNIT_TEST_PROGRAMS += t-trailer UNIT_TEST_PROGRAMS += t-urlmatch-normalization UNIT_TEST_PROGS = $(patsubst %,$(UNIT_TEST_BIN)/%$X,$(UNIT_TEST_PROGRAMS)) -UNIT_TEST_OBJS = $(patsubst %,$(UNIT_TEST_DIR)/%.o,$(UNIT_TEST_PROGRAMS)) UNIT_TEST_OBJS += $(UNIT_TEST_DIR)/test-lib.o UNIT_TEST_OBJS += $(UNIT_TEST_DIR)/lib-oid.o @@ -2715,6 +2714,7 @@ OBJECTS += $(XDIFF_OBJS) OBJECTS += $(FUZZ_OBJS) OBJECTS += $(REFTABLE_OBJS) $(REFTABLE_TEST_OBJS) OBJECTS += $(UNIT_TEST_OBJS) +OBJECTS += $(patsubst %,$(UNIT_TEST_DIR)/%.o,$(UNIT_TEST_PROGRAMS)) ifndef NO_CURL OBJECTS += http.o http-walker.o remote-curl.o @@ -3852,9 +3852,7 @@ $(FUZZ_PROGRAMS): %: %.o oss-fuzz/dummy-cmd-main.o $(GITLIBS) GIT-LDFLAGS -Wl,--allow-multiple-definition \ $(filter %.o,$^) $(filter %.a,$^) $(LIBS) $(LIB_FUZZING_ENGINE) -$(UNIT_TEST_PROGS): $(UNIT_TEST_BIN)/%$X: $(UNIT_TEST_DIR)/%.o \ - $(UNIT_TEST_DIR)/test-lib.o \ - $(UNIT_TEST_DIR)/lib-oid.o \ +$(UNIT_TEST_PROGS): $(UNIT_TEST_BIN)/%$X: $(UNIT_TEST_DIR)/%.o $(UNIT_TEST_OBJS) \ $(GITLIBS) GIT-LDFLAGS $(call mkdir_p_parent_template) $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) \ From a4f50bb1e9b6840bb4583ac4bc6d93bea9704879 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 16 Sep 2024 10:50:11 +0200 Subject: [PATCH 4/6] t/unit-tests: introduce reftable library We have recently migrated all of the reftable unit tests that were part of the reftable library into our own unit testing framework. As part of that migration we have duplicated some of the functionality that was part of the reftable test framework into each of the migrated test suites. This was a sensible decision to not have all of the migrations dependent on each other, but now that the migration is done it makes sense to deduplicate the functionality again. Introduce a new reftable test library that hosts some shared code and adapt tests to use it. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Makefile | 1 + t/unit-tests/lib-reftable.c | 93 ++++++++++++++++++++ t/unit-tests/lib-reftable.h | 20 +++++ t/unit-tests/t-reftable-merged.c | 87 +++---------------- t/unit-tests/t-reftable-readwrite.c | 130 +++++++++------------------- t/unit-tests/t-reftable-stack.c | 25 +++--- 6 files changed, 177 insertions(+), 179 deletions(-) create mode 100644 t/unit-tests/lib-reftable.c create mode 100644 t/unit-tests/lib-reftable.h diff --git a/Makefile b/Makefile index 4ed5f1f50a..9460a80d0d 100644 --- a/Makefile +++ b/Makefile @@ -1358,6 +1358,7 @@ UNIT_TEST_PROGRAMS += t-urlmatch-normalization UNIT_TEST_PROGS = $(patsubst %,$(UNIT_TEST_BIN)/%$X,$(UNIT_TEST_PROGRAMS)) UNIT_TEST_OBJS += $(UNIT_TEST_DIR)/test-lib.o UNIT_TEST_OBJS += $(UNIT_TEST_DIR)/lib-oid.o +UNIT_TEST_OBJS += $(UNIT_TEST_DIR)/lib-reftable.o # xdiff and reftable libs may in turn depend on what is in libgit.a GITLIBS = common-main.o $(LIB_FILE) $(XDIFF_LIB) $(REFTABLE_LIB) $(LIB_FILE) diff --git a/t/unit-tests/lib-reftable.c b/t/unit-tests/lib-reftable.c new file mode 100644 index 0000000000..ab1fa44a28 --- /dev/null +++ b/t/unit-tests/lib-reftable.c @@ -0,0 +1,93 @@ +#include "lib-reftable.h" +#include "test-lib.h" +#include "reftable/constants.h" +#include "reftable/writer.h" + +void t_reftable_set_hash(uint8_t *p, int i, uint32_t id) +{ + memset(p, (uint8_t)i, hash_size(id)); +} + +static ssize_t strbuf_writer_write(void *b, const void *data, size_t sz) +{ + strbuf_add(b, data, sz); + return sz; +} + +static int strbuf_writer_flush(void *arg UNUSED) +{ + return 0; +} + +struct reftable_writer *t_reftable_strbuf_writer(struct strbuf *buf, + struct reftable_write_options *opts) +{ + return reftable_new_writer(&strbuf_writer_write, + &strbuf_writer_flush, + buf, opts); +} + +void t_reftable_write_to_buf(struct strbuf *buf, + struct reftable_ref_record *refs, + size_t nrefs, + struct reftable_log_record *logs, + size_t nlogs, + struct reftable_write_options *_opts) +{ + struct reftable_write_options opts = { 0 }; + const struct reftable_stats *stats; + struct reftable_writer *writer; + uint64_t min = 0xffffffff; + uint64_t max = 0; + int ret; + + if (_opts) + opts = *_opts; + + for (size_t i = 0; i < nrefs; i++) { + uint64_t ui = refs[i].update_index; + if (ui > max) + max = ui; + if (ui < min) + min = ui; + } + for (size_t i = 0; i < nlogs; i++) { + uint64_t ui = logs[i].update_index; + if (ui > max) + max = ui; + if (ui < min) + min = ui; + } + + writer = t_reftable_strbuf_writer(buf, &opts); + reftable_writer_set_limits(writer, min, max); + + if (nrefs) { + ret = reftable_writer_add_refs(writer, refs, nrefs); + check_int(ret, ==, 0); + } + + if (nlogs) { + ret = reftable_writer_add_logs(writer, logs, nlogs); + check_int(ret, ==, 0); + } + + ret = reftable_writer_close(writer); + check_int(ret, ==, 0); + + stats = reftable_writer_stats(writer); + for (size_t i = 0; i < stats->ref_stats.blocks; i++) { + size_t off = i * (opts.block_size ? opts.block_size + : DEFAULT_BLOCK_SIZE); + if (!off) + off = header_size(opts.hash_id == GIT_SHA256_FORMAT_ID ? 2 : 1); + check_char(buf->buf[off], ==, 'r'); + } + + if (nrefs) + check_int(stats->ref_stats.blocks, >, 0); + if (nlogs) + check_int(stats->log_stats.blocks, >, 0); + + reftable_writer_free(writer); +} diff --git a/t/unit-tests/lib-reftable.h b/t/unit-tests/lib-reftable.h new file mode 100644 index 0000000000..d115419084 --- /dev/null +++ b/t/unit-tests/lib-reftable.h @@ -0,0 +1,20 @@ +#ifndef LIB_REFTABLE_H +#define LIB_REFTABLE_H + +#include "git-compat-util.h" +#include "strbuf.h" +#include "reftable/reftable-writer.h" + +void t_reftable_set_hash(uint8_t *p, int i, uint32_t id); + +struct reftable_writer *t_reftable_strbuf_writer(struct strbuf *buf, + struct reftable_write_options *opts); + +void t_reftable_write_to_buf(struct strbuf *buf, + struct reftable_ref_record *refs, + size_t nrecords, + struct reftable_log_record *logs, + size_t nlogs, + struct reftable_write_options *opts); + +#endif diff --git a/t/unit-tests/t-reftable-merged.c b/t/unit-tests/t-reftable-merged.c index e9d100a01e..b8c92fdb36 100644 --- a/t/unit-tests/t-reftable-merged.c +++ b/t/unit-tests/t-reftable-merged.c @@ -7,6 +7,7 @@ https://developers.google.com/open-source/licenses/bsd */ #include "test-lib.h" +#include "lib-reftable.h" #include "reftable/blocksource.h" #include "reftable/constants.h" #include "reftable/merged.h" @@ -15,77 +16,6 @@ https://developers.google.com/open-source/licenses/bsd #include "reftable/reftable-merged.h" #include "reftable/reftable-writer.h" -static ssize_t strbuf_add_void(void *b, const void *data, const size_t sz) -{ - strbuf_add(b, data, sz); - return sz; -} - -static int noop_flush(void *arg UNUSED) -{ - return 0; -} - -static void write_test_table(struct strbuf *buf, - struct reftable_ref_record refs[], const size_t n) -{ - uint64_t min = 0xffffffff; - uint64_t max = 0; - size_t i; - int err; - - struct reftable_write_options opts = { - .block_size = 256, - }; - struct reftable_writer *w = NULL; - for (i = 0; i < n; i++) { - uint64_t ui = refs[i].update_index; - if (ui > max) - max = ui; - if (ui < min) - min = ui; - } - - w = reftable_new_writer(&strbuf_add_void, &noop_flush, buf, &opts); - reftable_writer_set_limits(w, min, max); - - for (i = 0; i < n; i++) { - uint64_t before = refs[i].update_index; - int n = reftable_writer_add_ref(w, &refs[i]); - check_int(n, ==, 0); - check_int(before, ==, refs[i].update_index); - } - - err = reftable_writer_close(w); - check(!err); - - reftable_writer_free(w); -} - -static void write_test_log_table(struct strbuf *buf, struct reftable_log_record logs[], - const size_t n, const uint64_t update_index) -{ - int err; - - struct reftable_write_options opts = { - .block_size = 256, - .exact_log_message = 1, - }; - struct reftable_writer *w = NULL; - w = reftable_new_writer(&strbuf_add_void, &noop_flush, buf, &opts); - reftable_writer_set_limits(w, update_index, update_index); - - for (size_t i = 0; i < n; i++) { - int err = reftable_writer_add_log(w, &logs[i]); - check(!err); - } - - err = reftable_writer_close(w); - check(!err); - - reftable_writer_free(w); -} - static struct reftable_merged_table * merged_table_from_records(struct reftable_ref_record **refs, struct reftable_block_source **source, @@ -93,13 +23,16 @@ merged_table_from_records(struct reftable_ref_record **refs, struct strbuf *buf, const size_t n) { struct reftable_merged_table *mt = NULL; + struct reftable_write_options opts = { + .block_size = 256, + }; int err; REFTABLE_CALLOC_ARRAY(*readers, n); REFTABLE_CALLOC_ARRAY(*source, n); for (size_t i = 0; i < n; i++) { - write_test_table(&buf[i], refs[i], sizes[i]); + t_reftable_write_to_buf(&buf[i], refs[i], sizes[i], NULL, 0, &opts); block_source_from_strbuf(&(*source)[i], &buf[i]); err = reftable_reader_new(&(*readers)[i], &(*source)[i], @@ -268,13 +201,17 @@ merged_table_from_log_records(struct reftable_log_record **logs, struct strbuf *buf, const size_t n) { struct reftable_merged_table *mt = NULL; + struct reftable_write_options opts = { + .block_size = 256, + .exact_log_message = 1, + }; int err; REFTABLE_CALLOC_ARRAY(*readers, n); REFTABLE_CALLOC_ARRAY(*source, n); for (size_t i = 0; i < n; i++) { - write_test_log_table(&buf[i], logs[i], sizes[i], i + 1); + t_reftable_write_to_buf(&buf[i], NULL, 0, logs[i], sizes[i], &opts); block_source_from_strbuf(&(*source)[i], &buf[i]); err = reftable_reader_new(&(*readers)[i], &(*source)[i], @@ -402,9 +339,7 @@ static void t_default_write_opts(void) { struct reftable_write_options opts = { 0 }; struct strbuf buf = STRBUF_INIT; - struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); - + struct reftable_writer *w = t_reftable_strbuf_writer(&buf, &opts); struct reftable_ref_record rec = { .refname = (char *) "master", .update_index = 1, diff --git a/t/unit-tests/t-reftable-readwrite.c b/t/unit-tests/t-reftable-readwrite.c index 82bfaf3287..e1b235a5f1 100644 --- a/t/unit-tests/t-reftable-readwrite.c +++ b/t/unit-tests/t-reftable-readwrite.c @@ -7,6 +7,7 @@ https://developers.google.com/open-source/licenses/bsd */ #include "test-lib.h" +#include "lib-reftable.h" #include "reftable/basics.h" #include "reftable/blocksource.h" #include "reftable/reader.h" @@ -15,22 +16,6 @@ https://developers.google.com/open-source/licenses/bsd static const int update_index = 5; -static void set_test_hash(uint8_t *p, int i) -{ - memset(p, (uint8_t)i, hash_size(GIT_SHA1_FORMAT_ID)); -} - -static ssize_t strbuf_add_void(void *b, const void *data, size_t sz) -{ - strbuf_add(b, data, sz); - return sz; -} - -static int noop_flush(void *arg UNUSED) -{ - return 0; -} - static void t_buffer(void) { struct strbuf buf = STRBUF_INIT; @@ -62,61 +47,34 @@ static void write_table(char ***names, struct strbuf *buf, int N, .block_size = block_size, .hash_id = hash_id, }; - struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &noop_flush, buf, &opts); - struct reftable_ref_record ref = { 0 }; - int i = 0, n; - struct reftable_log_record log = { 0 }; - const struct reftable_stats *stats = NULL; + struct reftable_ref_record *refs; + struct reftable_log_record *logs; + int i; REFTABLE_CALLOC_ARRAY(*names, N + 1); + REFTABLE_CALLOC_ARRAY(refs, N); + REFTABLE_CALLOC_ARRAY(logs, N); - reftable_writer_set_limits(w, update_index, update_index); for (i = 0; i < N; i++) { - char name[100]; - int n; - - snprintf(name, sizeof(name), "refs/heads/branch%02d", i); - - ref.refname = name; - ref.update_index = update_index; - ref.value_type = REFTABLE_REF_VAL1; - set_test_hash(ref.value.val1, i); - (*names)[i] = xstrdup(name); - - n = reftable_writer_add_ref(w, &ref); - check_int(n, ==, 0); + refs[i].refname = (*names)[i] = xstrfmt("refs/heads/branch%02d", i); + refs[i].update_index = update_index; + refs[i].value_type = REFTABLE_REF_VAL1; + t_reftable_set_hash(refs[i].value.val1, i, GIT_SHA1_FORMAT_ID); } for (i = 0; i < N; i++) { - char name[100]; - int n; - - snprintf(name, sizeof(name), "refs/heads/branch%02d", i); - - log.refname = name; - log.update_index = update_index; - log.value_type = REFTABLE_LOG_UPDATE; - set_test_hash(log.value.update.new_hash, i); - log.value.update.message = (char *) "message"; - - n = reftable_writer_add_log(w, &log); - check_int(n, ==, 0); + logs[i].refname = (*names)[i]; + logs[i].update_index = update_index; + logs[i].value_type = REFTABLE_LOG_UPDATE; + t_reftable_set_hash(logs[i].value.update.new_hash, i, + GIT_SHA1_FORMAT_ID); + logs[i].value.update.message = (char *) "message"; } - n = reftable_writer_close(w); - check_int(n, ==, 0); + t_reftable_write_to_buf(buf, refs, N, logs, N, &opts); - stats = reftable_writer_stats(w); - for (i = 0; i < stats->ref_stats.blocks; i++) { - int off = i * opts.block_size; - if (!off) - off = header_size((hash_id == GIT_SHA256_FORMAT_ID) ? 2 : 1); - check_char(buf->buf[off], ==, 'r'); - } - - check_int(stats->log_stats.blocks, >, 0); - reftable_writer_free(w); + free(refs); + free(logs); } static void t_log_buffer_size(void) @@ -138,8 +96,7 @@ static void t_log_buffer_size(void) .time = 0x5e430672, .message = (char *) "commit: 9\n", } } }; - struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); + struct reftable_writer *w = t_reftable_strbuf_writer(&buf, &opts); /* This tests buffer extension for log compression. Must use a random hash, to ensure that the compressed part is larger than the original. @@ -181,8 +138,7 @@ static void t_log_overflow(void) }, }, }; - struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); + struct reftable_writer *w = t_reftable_strbuf_writer(&buf, &opts); memset(msg, 'x', sizeof(msg) - 1); reftable_writer_set_limits(w, update_index, update_index); @@ -208,8 +164,7 @@ static void t_log_write_read(void) struct reftable_reader *reader; struct reftable_block_source source = { 0 }; struct strbuf buf = STRBUF_INIT; - struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); + struct reftable_writer *w = t_reftable_strbuf_writer(&buf, &opts); const struct reftable_stats *stats = NULL; reftable_writer_set_limits(w, 0, N); for (i = 0; i < N; i++) { @@ -229,8 +184,10 @@ static void t_log_write_read(void) log.refname = names[i]; log.update_index = i; log.value_type = REFTABLE_LOG_UPDATE; - set_test_hash(log.value.update.old_hash, i); - set_test_hash(log.value.update.new_hash, i + 1); + t_reftable_set_hash(log.value.update.old_hash, i, + GIT_SHA1_FORMAT_ID); + t_reftable_set_hash(log.value.update.new_hash, i + 1, + GIT_SHA1_FORMAT_ID); err = reftable_writer_add_log(w, &log); check(!err); @@ -297,8 +254,7 @@ static void t_log_zlib_corruption(void) struct reftable_reader *reader; struct reftable_block_source source = { 0 }; struct strbuf buf = STRBUF_INIT; - struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); + struct reftable_writer *w = t_reftable_strbuf_writer(&buf, &opts); const struct reftable_stats *stats = NULL; char message[100] = { 0 }; int err, i, n; @@ -528,15 +484,12 @@ static void t_table_refs_for(int indexed) int err; struct reftable_reader *reader; struct reftable_block_source source = { 0 }; - struct strbuf buf = STRBUF_INIT; - struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); - + struct reftable_writer *w = t_reftable_strbuf_writer(&buf, &opts); struct reftable_iterator it = { 0 }; int j; - set_test_hash(want_hash, 4); + t_reftable_set_hash(want_hash, 4, GIT_SHA1_FORMAT_ID); for (i = 0; i < N; i++) { uint8_t hash[GIT_SHA1_RAWSZ]; @@ -552,8 +505,10 @@ static void t_table_refs_for(int indexed) ref.refname = name; ref.value_type = REFTABLE_REF_VAL2; - set_test_hash(ref.value.val2.value, i / 4); - set_test_hash(ref.value.val2.target_value, 3 + i / 4); + t_reftable_set_hash(ref.value.val2.value, i / 4, + GIT_SHA1_FORMAT_ID); + t_reftable_set_hash(ref.value.val2.target_value, 3 + i / 4, + GIT_SHA1_FORMAT_ID); /* 80 bytes / entry, so 3 entries per block. Yields 17 */ @@ -618,8 +573,7 @@ static void t_write_empty_table(void) { struct reftable_write_options opts = { 0 }; struct strbuf buf = STRBUF_INIT; - struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); + struct reftable_writer *w = t_reftable_strbuf_writer(&buf, &opts); struct reftable_block_source source = { 0 }; struct reftable_reader *rd = NULL; struct reftable_ref_record rec = { 0 }; @@ -657,8 +611,7 @@ static void t_write_object_id_min_length(void) .block_size = 75, }; struct strbuf buf = STRBUF_INIT; - struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); + struct reftable_writer *w = t_reftable_strbuf_writer(&buf, &opts); struct reftable_ref_record ref = { .update_index = 1, .value_type = REFTABLE_REF_VAL1, @@ -692,8 +645,7 @@ static void t_write_object_id_length(void) .block_size = 75, }; struct strbuf buf = STRBUF_INIT; - struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); + struct reftable_writer *w = t_reftable_strbuf_writer(&buf, &opts); struct reftable_ref_record ref = { .update_index = 1, .value_type = REFTABLE_REF_VAL1, @@ -726,8 +678,7 @@ static void t_write_empty_key(void) { struct reftable_write_options opts = { 0 }; struct strbuf buf = STRBUF_INIT; - struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); + struct reftable_writer *w = t_reftable_strbuf_writer(&buf, &opts); struct reftable_ref_record ref = { .refname = (char *) "", .update_index = 1, @@ -749,8 +700,7 @@ static void t_write_key_order(void) { struct reftable_write_options opts = { 0 }; struct strbuf buf = STRBUF_INIT; - struct reftable_writer *w = - reftable_new_writer(&strbuf_add_void, &noop_flush, &buf, &opts); + struct reftable_writer *w = t_reftable_strbuf_writer(&buf, &opts); struct reftable_ref_record refs[2] = { { .refname = (char *) "b", @@ -798,7 +748,7 @@ static void t_write_multiple_indices(void) struct reftable_reader *reader; int err, i; - writer = reftable_new_writer(&strbuf_add_void, &noop_flush, &writer_buf, &opts); + writer = t_reftable_strbuf_writer(&writer_buf, &opts); reftable_writer_set_limits(writer, 1, 1); for (i = 0; i < 100; i++) { struct reftable_ref_record ref = { @@ -876,7 +826,7 @@ static void t_write_multi_level_index(void) struct reftable_reader *reader; int err; - writer = reftable_new_writer(&strbuf_add_void, &noop_flush, &writer_buf, &opts); + writer = t_reftable_strbuf_writer(&writer_buf, &opts); reftable_writer_set_limits(writer, 1, 1); for (size_t i = 0; i < 200; i++) { struct reftable_ref_record ref = { diff --git a/t/unit-tests/t-reftable-stack.c b/t/unit-tests/t-reftable-stack.c index d62a9c1bed..65e513d5ec 100644 --- a/t/unit-tests/t-reftable-stack.c +++ b/t/unit-tests/t-reftable-stack.c @@ -7,17 +7,13 @@ https://developers.google.com/open-source/licenses/bsd */ #include "test-lib.h" +#include "lib-reftable.h" #include "reftable/merged.h" #include "reftable/reader.h" #include "reftable/reftable-error.h" #include "reftable/stack.h" #include -static void set_test_hash(uint8_t *p, int i) -{ - memset(p, (uint8_t)i, hash_size(GIT_SHA1_FORMAT_ID)); -} - static void clear_dir(const char *dirname) { struct strbuf path = STRBUF_INIT; @@ -125,7 +121,7 @@ static void write_n_ref_tables(struct reftable_stack *st, strbuf_reset(&buf); strbuf_addf(&buf, "refs/heads/branch-%04"PRIuMAX, (uintmax_t)i); ref.refname = buf.buf; - set_test_hash(ref.value.val1, i); + t_reftable_set_hash(ref.value.val1, i, GIT_SHA1_FORMAT_ID); err = reftable_stack_add(st, &write_test_ref, &ref); check(!err); @@ -470,13 +466,13 @@ static void t_reftable_stack_add(void) refs[i].refname = xstrdup(buf); refs[i].update_index = i + 1; refs[i].value_type = REFTABLE_REF_VAL1; - set_test_hash(refs[i].value.val1, i); + t_reftable_set_hash(refs[i].value.val1, i, GIT_SHA1_FORMAT_ID); logs[i].refname = xstrdup(buf); logs[i].update_index = N + i + 1; logs[i].value_type = REFTABLE_LOG_UPDATE; logs[i].value.update.email = xstrdup("identity@invalid"); - set_test_hash(logs[i].value.update.new_hash, i); + t_reftable_set_hash(logs[i].value.update.new_hash, i, GIT_SHA1_FORMAT_ID); } for (i = 0; i < N; i++) { @@ -562,14 +558,14 @@ static void t_reftable_stack_iterator(void) refs[i].refname = xstrfmt("branch%02"PRIuMAX, (uintmax_t)i); refs[i].update_index = i + 1; refs[i].value_type = REFTABLE_REF_VAL1; - set_test_hash(refs[i].value.val1, i); + t_reftable_set_hash(refs[i].value.val1, i, GIT_SHA1_FORMAT_ID); logs[i].refname = xstrfmt("branch%02"PRIuMAX, (uintmax_t)i); logs[i].update_index = i + 1; logs[i].value_type = REFTABLE_LOG_UPDATE; logs[i].value.update.email = xstrdup("johndoe@invalid"); logs[i].value.update.message = xstrdup("commit\n"); - set_test_hash(logs[i].value.update.new_hash, i); + t_reftable_set_hash(logs[i].value.update.new_hash, i, GIT_SHA1_FORMAT_ID); } for (i = 0; i < N; i++) { @@ -704,7 +700,8 @@ static void t_reftable_stack_tombstone(void) refs[i].update_index = i + 1; if (i % 2 == 0) { refs[i].value_type = REFTABLE_REF_VAL1; - set_test_hash(refs[i].value.val1, i); + t_reftable_set_hash(refs[i].value.val1, i, + GIT_SHA1_FORMAT_ID); } logs[i].refname = xstrdup(buf); @@ -712,7 +709,8 @@ static void t_reftable_stack_tombstone(void) logs[i].update_index = 42; if (i % 2 == 0) { logs[i].value_type = REFTABLE_LOG_UPDATE; - set_test_hash(logs[i].value.update.new_hash, i); + t_reftable_set_hash(logs[i].value.update.new_hash, i, + GIT_SHA1_FORMAT_ID); logs[i].value.update.email = xstrdup("identity@invalid"); } @@ -844,7 +842,8 @@ static void t_reflog_expire(void) logs[i].value_type = REFTABLE_LOG_UPDATE; logs[i].value.update.time = i; logs[i].value.update.email = xstrdup("identity@invalid"); - set_test_hash(logs[i].value.update.new_hash, i); + t_reftable_set_hash(logs[i].value.update.new_hash, i, + GIT_SHA1_FORMAT_ID); } for (i = 1; i <= N; i++) { From 0a148a8eda7da6f7ff039ff2ea62b143ceb57f2f Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 16 Sep 2024 10:50:14 +0200 Subject: [PATCH 5/6] reftable/reader: make table iterator reseekable In 67ce50ba26 (Merge branch 'ps/reftable-reusable-iterator', 2024-05-30) we have refactored the interface of reftable iterators such that they can be reused in theory. This patch series only landed the required changes on the interface level, but didn't yet implement the actual logic to make iterators reusable. As it turns out almost all of the infrastructure already does support re-seeking. The only exception is the table iterator, which does not reset its `is_finished` bit. Do so and add a couple of tests that verify that we can re-seek iterators. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Makefile | 1 + reftable/reader.c | 1 + t/unit-tests/t-reftable-merged.c | 76 +++++++++++++++++++++++++ t/unit-tests/t-reftable-reader.c | 96 ++++++++++++++++++++++++++++++++ 4 files changed, 174 insertions(+) create mode 100644 t/unit-tests/t-reftable-reader.c diff --git a/Makefile b/Makefile index 9460a80d0d..4039e355b0 100644 --- a/Makefile +++ b/Makefile @@ -1346,6 +1346,7 @@ UNIT_TEST_PROGRAMS += t-reftable-basics UNIT_TEST_PROGRAMS += t-reftable-block UNIT_TEST_PROGRAMS += t-reftable-merged UNIT_TEST_PROGRAMS += t-reftable-pq +UNIT_TEST_PROGRAMS += t-reftable-reader UNIT_TEST_PROGRAMS += t-reftable-readwrite UNIT_TEST_PROGRAMS += t-reftable-record UNIT_TEST_PROGRAMS += t-reftable-stack diff --git a/reftable/reader.c b/reftable/reader.c index f877099087..6494ce2e32 100644 --- a/reftable/reader.c +++ b/reftable/reader.c @@ -328,6 +328,7 @@ static int table_iter_seek_to(struct table_iter *ti, uint64_t off, uint8_t typ) ti->typ = block_reader_type(&ti->br); ti->block_off = off; block_iter_seek_start(&ti->bi, &ti->br); + ti->is_finished = 0; return 0; } diff --git a/t/unit-tests/t-reftable-merged.c b/t/unit-tests/t-reftable-merged.c index b8c92fdb36..19e54bdfb8 100644 --- a/t/unit-tests/t-reftable-merged.c +++ b/t/unit-tests/t-reftable-merged.c @@ -194,6 +194,81 @@ static void t_merged_refs(void) reftable_free(bs); } +static void t_merged_seek_multiple_times(void) +{ + struct reftable_ref_record r1[] = { + { + .refname = (char *) "a", + .update_index = 1, + .value_type = REFTABLE_REF_VAL1, + .value.val1 = { 1 }, + }, + { + .refname = (char *) "c", + .update_index = 1, + .value_type = REFTABLE_REF_VAL1, + .value.val1 = { 2 }, + } + }; + struct reftable_ref_record r2[] = { + { + .refname = (char *) "b", + .update_index = 2, + .value_type = REFTABLE_REF_VAL1, + .value.val1 = { 3 }, + }, + { + .refname = (char *) "d", + .update_index = 2, + .value_type = REFTABLE_REF_VAL1, + .value.val1 = { 4 }, + }, + }; + struct reftable_ref_record *refs[] = { + r1, r2, + }; + size_t sizes[] = { + ARRAY_SIZE(r1), ARRAY_SIZE(r2), + }; + struct strbuf bufs[] = { + STRBUF_INIT, STRBUF_INIT, + }; + struct reftable_block_source *sources = NULL; + struct reftable_reader **readers = NULL; + struct reftable_ref_record rec = { 0 }; + struct reftable_iterator it = { 0 }; + struct reftable_merged_table *mt; + + mt = merged_table_from_records(refs, &sources, &readers, sizes, bufs, 2); + merged_table_init_iter(mt, &it, BLOCK_TYPE_REF); + + for (size_t i = 0; i < 5; i++) { + int err = reftable_iterator_seek_ref(&it, "c"); + check(!err); + + err = reftable_iterator_next_ref(&it, &rec); + check(!err); + err = reftable_ref_record_equal(&rec, &r1[1], GIT_SHA1_RAWSZ); + check(err == 1); + + err = reftable_iterator_next_ref(&it, &rec); + check(!err); + err = reftable_ref_record_equal(&rec, &r2[1], GIT_SHA1_RAWSZ); + check(err == 1); + + err = reftable_iterator_next_ref(&it, &rec); + check(err > 0); + } + + for (size_t i = 0; i < ARRAY_SIZE(bufs); i++) + strbuf_release(&bufs[i]); + readers_destroy(readers, ARRAY_SIZE(refs)); + reftable_ref_record_release(&rec); + reftable_iterator_destroy(&it); + reftable_merged_table_free(mt); + reftable_free(sources); +} + static struct reftable_merged_table * merged_table_from_log_records(struct reftable_log_record **logs, struct reftable_block_source **source, @@ -383,6 +458,7 @@ int cmd_main(int argc UNUSED, const char *argv[] UNUSED) TEST(t_default_write_opts(), "merged table with default write opts"); TEST(t_merged_logs(), "merged table with multiple log updates for same ref"); TEST(t_merged_refs(), "merged table with multiple updates to same ref"); + TEST(t_merged_seek_multiple_times(), "merged table can seek multiple times"); TEST(t_merged_single_record(), "ref ocurring in only one record can be fetched"); return test_done(); diff --git a/t/unit-tests/t-reftable-reader.c b/t/unit-tests/t-reftable-reader.c new file mode 100644 index 0000000000..eea86966c0 --- /dev/null +++ b/t/unit-tests/t-reftable-reader.c @@ -0,0 +1,96 @@ +#include "test-lib.h" +#include "lib-reftable.h" +#include "reftable/blocksource.h" +#include "reftable/reader.h" + +static int t_reader_seek_once(void) +{ + struct reftable_ref_record records[] = { + { + .refname = (char *) "refs/heads/main", + .value_type = REFTABLE_REF_VAL1, + .value.val1 = { 42 }, + }, + }; + struct reftable_block_source source = { 0 }; + struct reftable_ref_record ref = { 0 }; + struct reftable_iterator it = { 0 }; + struct reftable_reader *reader; + struct strbuf buf = STRBUF_INIT; + int ret; + + t_reftable_write_to_buf(&buf, records, ARRAY_SIZE(records), NULL, 0, NULL); + block_source_from_strbuf(&source, &buf); + + ret = reftable_reader_new(&reader, &source, "name"); + check(!ret); + + reftable_reader_init_ref_iterator(reader, &it); + ret = reftable_iterator_seek_ref(&it, ""); + check(!ret); + ret = reftable_iterator_next_ref(&it, &ref); + check(!ret); + + ret = reftable_ref_record_equal(&ref, &records[0], GIT_SHA1_RAWSZ); + check_int(ret, ==, 1); + + ret = reftable_iterator_next_ref(&it, &ref); + check_int(ret, ==, 1); + + reftable_ref_record_release(&ref); + reftable_iterator_destroy(&it); + reftable_reader_decref(reader); + strbuf_release(&buf); + return 0; +} + +static int t_reader_reseek(void) +{ + struct reftable_ref_record records[] = { + { + .refname = (char *) "refs/heads/main", + .value_type = REFTABLE_REF_VAL1, + .value.val1 = { 42 }, + }, + }; + struct reftable_block_source source = { 0 }; + struct reftable_ref_record ref = { 0 }; + struct reftable_iterator it = { 0 }; + struct reftable_reader *reader; + struct strbuf buf = STRBUF_INIT; + int ret; + + t_reftable_write_to_buf(&buf, records, ARRAY_SIZE(records), NULL, 0, NULL); + block_source_from_strbuf(&source, &buf); + + ret = reftable_reader_new(&reader, &source, "name"); + check(!ret); + + reftable_reader_init_ref_iterator(reader, &it); + + for (size_t i = 0; i < 5; i++) { + ret = reftable_iterator_seek_ref(&it, ""); + check(!ret); + ret = reftable_iterator_next_ref(&it, &ref); + check(!ret); + + ret = reftable_ref_record_equal(&ref, &records[0], GIT_SHA1_RAWSZ); + check_int(ret, ==, 1); + + ret = reftable_iterator_next_ref(&it, &ref); + check_int(ret, ==, 1); + } + + reftable_ref_record_release(&ref); + reftable_iterator_destroy(&it); + reftable_reader_decref(reader); + strbuf_release(&buf); + return 0; +} + +int cmd_main(int argc UNUSED, const char *argv[] UNUSED) +{ + TEST(t_reader_seek_once(), "reader can seek once"); + TEST(t_reader_reseek(), "reader can reseek multiple times"); + return test_done(); +} From 18695250667912d8278e76dce453706c3d488173 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 16 Sep 2024 10:50:16 +0200 Subject: [PATCH 6/6] refs/reftable: wire up support for exclude patterns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Exclude patterns can be used by reference backends to skip over blocks of references that are uninteresting to the caller. Reference backends do not have to wire up support for them, and all callers are expected to behave as if the backend didn't support them. In fact, the only backend that supports exclude patterns right now is the "packed" backend. Exclude patterns can be quite an important performance optimization in repositories that have loads of references. The patterns are set up in case "transfer.hideRefs" and friends are configured during a fetch, so handling these patterns becomes important once there are lots of hidden refs in a served repository. Now that we have properly re-seekable reftable iterators we can also wire up support for these patterns in the "reftable" backend. Doing so is conceptually simple: once we hit a reference whose prefix matches the current exclude pattern we re-seek the iterator to the first reference that doesn't match the pattern anymore. This schema only works for trivial patterns that do not have any globbing characters in them, but this restriction also applies do the "packed" backend. This makes t1419 work with the "reftable" backend with some slight modifications. Of course it also speeds up listing of references with hidden refs. The following benchmark prints one reference with 1 million hidden references: Benchmark 1: HEAD~ Time (mean ± σ): 93.3 ms ± 2.1 ms [User: 90.3 ms, System: 2.5 ms] Range (min … max): 89.8 ms … 97.2 ms 33 runs Benchmark 2: HEAD Time (mean ± σ): 4.2 ms ± 0.6 ms [User: 2.2 ms, System: 1.8 ms] Range (min … max): 3.1 ms … 8.1 ms 765 runs Summary HEAD ran 22.15 ± 3.19 times faster than HEAD~ Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/reftable-backend.c | 133 +++++++++++++++++++++++++++++++++++++++- t/t1419-exclude-refs.sh | 49 ++++++++++++--- trace2.h | 1 + trace2/tr2_ctr.c | 5 ++ 4 files changed, 176 insertions(+), 12 deletions(-) diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 1c4b19e737..3e63833ce4 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -21,6 +21,7 @@ #include "../reftable/reftable-iterator.h" #include "../setup.h" #include "../strmap.h" +#include "../trace2.h" #include "parse.h" #include "refs-internal.h" @@ -447,10 +448,81 @@ struct reftable_ref_iterator { const char *prefix; size_t prefix_len; + char **exclude_patterns; + size_t exclude_patterns_index; + size_t exclude_patterns_strlen; unsigned int flags; int err; }; +/* + * Handle exclude patterns. Returns either `1`, which tells the caller that the + * current reference shall not be shown. Or `0`, which indicates that it should + * be shown. + */ +static int should_exclude_current_ref(struct reftable_ref_iterator *iter) +{ + while (iter->exclude_patterns[iter->exclude_patterns_index]) { + const char *pattern = iter->exclude_patterns[iter->exclude_patterns_index]; + char *ref_after_pattern; + int cmp; + + /* + * Lazily cache the pattern length so that we don't have to + * recompute it every time this function is called. + */ + if (!iter->exclude_patterns_strlen) + iter->exclude_patterns_strlen = strlen(pattern); + + /* + * When the reference name is lexicographically bigger than the + * current exclude pattern we know that it won't ever match any + * of the following references, either. We thus advance to the + * next pattern and re-check whether it matches. + * + * Otherwise, if it's smaller, then we do not have a match and + * thus want to show the current reference. + */ + cmp = strncmp(iter->ref.refname, pattern, + iter->exclude_patterns_strlen); + if (cmp > 0) { + iter->exclude_patterns_index++; + iter->exclude_patterns_strlen = 0; + continue; + } + if (cmp < 0) + return 0; + + /* + * The reference shares a prefix with the exclude pattern and + * shall thus be omitted. We skip all references that match the + * pattern by seeking to the first reference after the block of + * matches. + * + * This is done by appending the highest possible character to + * the pattern. Consequently, all references that have the + * pattern as prefix and whose suffix starts with anything in + * the range [0x00, 0xfe] are skipped. And given that 0xff is a + * non-printable character that shouldn't ever be in a ref name, + * we'd not yield any such record, either. + * + * Note that the seeked-to reference may also be excluded. This + * is not handled here though, but the caller is expected to + * loop and re-verify the next reference for us. + */ + ref_after_pattern = xstrfmt("%s%c", pattern, 0xff); + iter->err = reftable_iterator_seek_ref(&iter->iter, ref_after_pattern); + iter->exclude_patterns_index++; + iter->exclude_patterns_strlen = 0; + trace2_counter_add(TRACE2_COUNTER_ID_REFTABLE_RESEEKS, 1); + + free(ref_after_pattern); + return 1; + } + + return 0; +} + static int reftable_ref_iterator_advance(struct ref_iterator *ref_iterator) { struct reftable_ref_iterator *iter = @@ -481,6 +553,9 @@ static int reftable_ref_iterator_advance(struct ref_iterator *ref_iterator) break; } + if (iter->exclude_patterns && should_exclude_current_ref(iter)) + continue; + if (iter->flags & DO_FOR_EACH_PER_WORKTREE_ONLY && parse_worktree_ref(iter->ref.refname, NULL, NULL, NULL) != REF_WORKTREE_CURRENT) @@ -570,6 +645,11 @@ static int reftable_ref_iterator_abort(struct ref_iterator *ref_iterator) (struct reftable_ref_iterator *)ref_iterator; reftable_ref_record_release(&iter->ref); reftable_iterator_destroy(&iter->iter); + if (iter->exclude_patterns) { + for (size_t i = 0; iter->exclude_patterns[i]; i++) + free(iter->exclude_patterns[i]); + free(iter->exclude_patterns); + } free(iter); return ITER_DONE; } @@ -580,9 +660,53 @@ static struct ref_iterator_vtable reftable_ref_iterator_vtable = { .abort = reftable_ref_iterator_abort }; +static int qsort_strcmp(const void *va, const void *vb) +{ + const char *a = *(const char **)va; + const char *b = *(const char **)vb; + return strcmp(a, b); +} + +static char **filter_exclude_patterns(const char **exclude_patterns) +{ + size_t filtered_size = 0, filtered_alloc = 0; + char **filtered = NULL; + + if (!exclude_patterns) + return NULL; + + for (size_t i = 0; ; i++) { + const char *exclude_pattern = exclude_patterns[i]; + int has_glob = 0; + + if (!exclude_pattern) + break; + + for (const char *p = exclude_pattern; *p; p++) { + has_glob = is_glob_special(*p); + if (has_glob) + break; + } + if (has_glob) + continue; + + ALLOC_GROW(filtered, filtered_size + 1, filtered_alloc); + filtered[filtered_size++] = xstrdup(exclude_pattern); + } + + if (filtered_size) { + QSORT(filtered, filtered_size, qsort_strcmp); + ALLOC_GROW(filtered, filtered_size + 1, filtered_alloc); + filtered[filtered_size++] = NULL; + } + + return filtered; +} + static struct reftable_ref_iterator *ref_iterator_for_stack(struct reftable_ref_store *refs, struct reftable_stack *stack, const char *prefix, + const char **exclude_patterns, int flags) { struct reftable_ref_iterator *iter; @@ -595,6 +719,7 @@ static struct reftable_ref_iterator *ref_iterator_for_stack(struct reftable_ref_ iter->base.oid = &iter->oid; iter->flags = flags; iter->refs = refs; + iter->exclude_patterns = filter_exclude_patterns(exclude_patterns); ret = refs->err; if (ret) @@ -616,7 +741,7 @@ done: static struct ref_iterator *reftable_be_iterator_begin(struct ref_store *ref_store, const char *prefix, - const char **exclude_patterns UNUSED, + const char **exclude_patterns, unsigned int flags) { struct reftable_ref_iterator *main_iter, *worktree_iter; @@ -627,7 +752,8 @@ static struct ref_iterator *reftable_be_iterator_begin(struct ref_store *ref_sto required_flags |= REF_STORE_ODB; refs = reftable_be_downcast(ref_store, required_flags, "ref_iterator_begin"); - main_iter = ref_iterator_for_stack(refs, refs->main_stack, prefix, flags); + main_iter = ref_iterator_for_stack(refs, refs->main_stack, prefix, + exclude_patterns, flags); /* * The worktree stack is only set when we're in an actual worktree @@ -641,7 +767,8 @@ static struct ref_iterator *reftable_be_iterator_begin(struct ref_store *ref_sto * Otherwise we merge both the common and the per-worktree refs into a * single iterator. */ - worktree_iter = ref_iterator_for_stack(refs, refs->worktree_stack, prefix, flags); + worktree_iter = ref_iterator_for_stack(refs, refs->worktree_stack, prefix, + exclude_patterns, flags); return merge_ref_iterator_begin(&worktree_iter->base, &main_iter->base, ref_iterator_select, NULL); } diff --git a/t/t1419-exclude-refs.sh b/t/t1419-exclude-refs.sh index 1359574419..3256e4462f 100755 --- a/t/t1419-exclude-refs.sh +++ b/t/t1419-exclude-refs.sh @@ -8,12 +8,6 @@ export GIT_TEST_DEFAULT_INITIAL_BRANCH_NAME TEST_PASSES_SANITIZE_LEAK=true . ./test-lib.sh -if test_have_prereq !REFFILES -then - skip_all='skipping `git for-each-ref --exclude` tests; need files backend' - test_done -fi - for_each_ref__exclude () { GIT_TRACE2_PERF=1 test-tool ref-store main \ for-each-ref--exclude "$@" >actual.raw @@ -28,7 +22,14 @@ assert_jumps () { local nr="$1" local trace="$2" - grep -q "name:jumps_made value:$nr$" $trace + case "$GIT_DEFAULT_REF_FORMAT" in + files) + grep -q "name:jumps_made value:$nr$" $trace;; + reftable) + grep -q "name:reseeks_made value:$nr$" $trace;; + *) + BUG "unhandled ref format $GIT_DEFAULT_REF_FORMAT";; + esac } assert_no_jumps () { @@ -89,7 +90,14 @@ test_expect_success 'adjacent, non-overlapping excluded regions' ' for_each_ref refs/heads/foo refs/heads/quux >expect && test_cmp expect actual && - assert_jumps 1 perf + case "$GIT_DEFAULT_REF_FORMAT" in + files) + assert_jumps 1 perf;; + reftable) + assert_jumps 2 perf;; + *) + BUG "unhandled ref format $GIT_DEFAULT_REF_FORMAT";; + esac ' test_expect_success 'overlapping excluded regions' ' @@ -106,7 +114,30 @@ test_expect_success 'several overlapping excluded regions' ' for_each_ref refs/heads/quux >expect && test_cmp expect actual && - assert_jumps 1 perf + case "$GIT_DEFAULT_REF_FORMAT" in + files) + assert_jumps 1 perf;; + reftable) + assert_jumps 3 perf;; + *) + BUG "unhandled ref format $GIT_DEFAULT_REF_FORMAT";; + esac +' + +test_expect_success 'unordered excludes' ' + for_each_ref__exclude refs/heads \ + refs/heads/foo refs/heads/baz >actual 2>perf && + for_each_ref refs/heads/bar refs/heads/quux >expect && + + test_cmp expect actual && + case "$GIT_DEFAULT_REF_FORMAT" in + files) + assert_jumps 1 perf;; + reftable) + assert_jumps 2 perf;; + *) + BUG "unhandled ref format $GIT_DEFAULT_REF_FORMAT";; + esac ' test_expect_success 'non-matching excluded section' ' diff --git a/trace2.h b/trace2.h index 19e04bf040..901f39253a 100644 --- a/trace2.h +++ b/trace2.h @@ -554,6 +554,7 @@ enum trace2_counter_id { TRACE2_COUNTER_ID_TEST2, /* emits summary and thread events */ TRACE2_COUNTER_ID_PACKED_REFS_JUMPS, /* counts number of jumps */ + TRACE2_COUNTER_ID_REFTABLE_RESEEKS, /* counts number of re-seeks */ /* counts number of fsyncs */ TRACE2_COUNTER_ID_FSYNC_WRITEOUT_ONLY, diff --git a/trace2/tr2_ctr.c b/trace2/tr2_ctr.c index d3a33715c1..036b643578 100644 --- a/trace2/tr2_ctr.c +++ b/trace2/tr2_ctr.c @@ -31,6 +31,11 @@ static struct tr2_counter_metadata tr2_counter_metadata[TRACE2_NUMBER_OF_COUNTER .name = "jumps_made", .want_per_thread_events = 0, }, + [TRACE2_COUNTER_ID_REFTABLE_RESEEKS] = { + .category = "reftable", + .name = "reseeks_made", + .want_per_thread_events = 0, + }, [TRACE2_COUNTER_ID_FSYNC_WRITEOUT_ONLY] = { .category = "fsync", .name = "writeout-only",