2021-10-08 04:25:02 +08:00
|
|
|
/*
|
|
|
|
Copyright 2020 Google LLC
|
|
|
|
|
|
|
|
Use of this source code is governed by a BSD-style
|
|
|
|
license that can be found in the LICENSE file or at
|
|
|
|
https://developers.google.com/open-source/licenses/bsd
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* record.c - methods for different types of records. */
|
|
|
|
|
|
|
|
#include "record.h"
|
|
|
|
|
|
|
|
#include "system.h"
|
|
|
|
#include "constants.h"
|
|
|
|
#include "reftable-error.h"
|
|
|
|
#include "basics.h"
|
|
|
|
|
2022-01-20 23:12:13 +08:00
|
|
|
static struct reftable_record_vtable *
|
|
|
|
reftable_record_vtable(struct reftable_record *rec);
|
|
|
|
static void *reftable_record_data(struct reftable_record *rec);
|
|
|
|
|
2021-10-08 04:25:02 +08:00
|
|
|
int get_var_int(uint64_t *dest, struct string_view *in)
|
|
|
|
{
|
|
|
|
int ptr = 0;
|
|
|
|
uint64_t val;
|
|
|
|
|
|
|
|
if (in->len == 0)
|
|
|
|
return -1;
|
|
|
|
val = in->buf[ptr] & 0x7f;
|
|
|
|
|
|
|
|
while (in->buf[ptr] & 0x80) {
|
|
|
|
ptr++;
|
|
|
|
if (ptr > in->len) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
val = (val + 1) << 7 | (uint64_t)(in->buf[ptr] & 0x7f);
|
|
|
|
}
|
|
|
|
|
|
|
|
*dest = val;
|
|
|
|
return ptr + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
int put_var_int(struct string_view *dest, uint64_t val)
|
|
|
|
{
|
|
|
|
uint8_t buf[10] = { 0 };
|
|
|
|
int i = 9;
|
|
|
|
int n = 0;
|
|
|
|
buf[i] = (uint8_t)(val & 0x7f);
|
|
|
|
i--;
|
|
|
|
while (1) {
|
|
|
|
val >>= 7;
|
|
|
|
if (!val) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
val--;
|
|
|
|
buf[i] = 0x80 | (uint8_t)(val & 0x7f);
|
|
|
|
i--;
|
|
|
|
}
|
|
|
|
|
|
|
|
n = sizeof(buf) - i - 1;
|
|
|
|
if (dest->len < n)
|
|
|
|
return -1;
|
|
|
|
memcpy(dest->buf, &buf[i + 1], n);
|
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
|
|
|
int reftable_is_block_type(uint8_t typ)
|
|
|
|
{
|
|
|
|
switch (typ) {
|
|
|
|
case BLOCK_TYPE_REF:
|
|
|
|
case BLOCK_TYPE_LOG:
|
|
|
|
case BLOCK_TYPE_OBJ:
|
|
|
|
case BLOCK_TYPE_INDEX:
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2024-01-03 14:22:26 +08:00
|
|
|
const unsigned char *reftable_ref_record_val1(const struct reftable_ref_record *rec)
|
2021-10-08 04:25:02 +08:00
|
|
|
{
|
|
|
|
switch (rec->value_type) {
|
|
|
|
case REFTABLE_REF_VAL1:
|
|
|
|
return rec->value.val1;
|
|
|
|
case REFTABLE_REF_VAL2:
|
|
|
|
return rec->value.val2.value;
|
|
|
|
default:
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-01-03 14:22:26 +08:00
|
|
|
const unsigned char *reftable_ref_record_val2(const struct reftable_ref_record *rec)
|
2021-10-08 04:25:02 +08:00
|
|
|
{
|
|
|
|
switch (rec->value_type) {
|
|
|
|
case REFTABLE_REF_VAL2:
|
|
|
|
return rec->value.val2.target_value;
|
|
|
|
default:
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int decode_string(struct strbuf *dest, struct string_view in)
|
|
|
|
{
|
|
|
|
int start_len = in.len;
|
|
|
|
uint64_t tsize = 0;
|
|
|
|
int n = get_var_int(&tsize, &in);
|
|
|
|
if (n <= 0)
|
|
|
|
return -1;
|
|
|
|
string_view_consume(&in, n);
|
|
|
|
if (in.len < tsize)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
strbuf_reset(dest);
|
|
|
|
strbuf_add(dest, in.buf, tsize);
|
|
|
|
string_view_consume(&in, tsize);
|
|
|
|
|
|
|
|
return start_len - in.len;
|
|
|
|
}
|
|
|
|
|
2024-06-07 14:37:39 +08:00
|
|
|
static int encode_string(const char *str, struct string_view s)
|
2021-10-08 04:25:02 +08:00
|
|
|
{
|
|
|
|
struct string_view start = s;
|
|
|
|
int l = strlen(str);
|
|
|
|
int n = put_var_int(&s, l);
|
|
|
|
if (n < 0)
|
|
|
|
return -1;
|
|
|
|
string_view_consume(&s, n);
|
|
|
|
if (s.len < l)
|
|
|
|
return -1;
|
|
|
|
memcpy(s.buf, str, l);
|
|
|
|
string_view_consume(&s, l);
|
|
|
|
|
|
|
|
return start.len - s.len;
|
|
|
|
}
|
|
|
|
|
|
|
|
int reftable_encode_key(int *restart, struct string_view dest,
|
|
|
|
struct strbuf prev_key, struct strbuf key,
|
|
|
|
uint8_t extra)
|
|
|
|
{
|
|
|
|
struct string_view start = dest;
|
|
|
|
int prefix_len = common_prefix_size(&prev_key, &key);
|
|
|
|
uint64_t suffix_len = key.len - prefix_len;
|
|
|
|
int n = put_var_int(&dest, (uint64_t)prefix_len);
|
|
|
|
if (n < 0)
|
|
|
|
return -1;
|
|
|
|
string_view_consume(&dest, n);
|
|
|
|
|
|
|
|
*restart = (prefix_len == 0);
|
|
|
|
|
|
|
|
n = put_var_int(&dest, suffix_len << 3 | (uint64_t)extra);
|
|
|
|
if (n < 0)
|
|
|
|
return -1;
|
|
|
|
string_view_consume(&dest, n);
|
|
|
|
|
|
|
|
if (dest.len < suffix_len)
|
|
|
|
return -1;
|
|
|
|
memcpy(dest.buf, key.buf + prefix_len, suffix_len);
|
|
|
|
string_view_consume(&dest, suffix_len);
|
|
|
|
|
|
|
|
return start.len - dest.len;
|
|
|
|
}
|
|
|
|
|
2024-04-03 14:04:22 +08:00
|
|
|
int reftable_decode_keylen(struct string_view in,
|
|
|
|
uint64_t *prefix_len,
|
|
|
|
uint64_t *suffix_len,
|
|
|
|
uint8_t *extra)
|
2021-10-08 04:25:02 +08:00
|
|
|
{
|
2024-04-03 14:04:22 +08:00
|
|
|
size_t start_len = in.len;
|
reftable/record: decode keys in place
When reading a record from a block, we need to decode the record's key.
As reftable keys are prefix-compressed, meaning they reuse a prefix from
the preceding record's key, this is a bit more involved than just having
to copy the relevant bytes: we need to figure out the prefix and suffix
lengths, copy the prefix from the preceding record and finally copy the
suffix from the current record.
This is done by passing three buffers to `reftable_decode_key()`: one
buffer that holds the result, one buffer that holds the last key, and
one buffer that points to the current record. The final key is then
assembled by calling `strbuf_add()` twice to copy over the prefix and
suffix.
Performing two memory copies is inefficient though. And we can indeed do
better by decoding keys in place. Instead of providing two buffers, the
caller may only call a single buffer that is already pre-populated with
the last key. Like this, we only have to call `strbuf_setlen()` to trim
the record to its prefix and then `strbuf_add()` to add the suffix.
This refactoring leads to a noticeable performance bump when iterating
over 1 million refs:
Benchmark 1: show-ref: single matching ref (revision = HEAD~)
Time (mean ± σ): 112.2 ms ± 3.9 ms [User: 109.3 ms, System: 2.8 ms]
Range (min … max): 109.2 ms … 149.6 ms 1000 runs
Benchmark 2: show-ref: single matching ref (revision = HEAD)
Time (mean ± σ): 106.0 ms ± 3.5 ms [User: 103.2 ms, System: 2.7 ms]
Range (min … max): 103.2 ms … 133.7 ms 1000 runs
Summary
show-ref: single matching ref (revision = HEAD) ran
1.06 ± 0.05 times faster than show-ref: single matching ref (revision = HEAD~)
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-04 18:49:31 +08:00
|
|
|
int n;
|
|
|
|
|
2024-04-03 14:04:22 +08:00
|
|
|
n = get_var_int(prefix_len, &in);
|
2021-10-08 04:25:02 +08:00
|
|
|
if (n < 0)
|
|
|
|
return -1;
|
|
|
|
string_view_consume(&in, n);
|
|
|
|
|
2024-04-03 14:04:22 +08:00
|
|
|
n = get_var_int(suffix_len, &in);
|
2021-10-08 04:25:02 +08:00
|
|
|
if (n <= 0)
|
|
|
|
return -1;
|
|
|
|
string_view_consume(&in, n);
|
|
|
|
|
2024-04-03 14:04:22 +08:00
|
|
|
*extra = (uint8_t)(*suffix_len & 0x7);
|
|
|
|
*suffix_len >>= 3;
|
|
|
|
|
|
|
|
return start_len - in.len;
|
|
|
|
}
|
|
|
|
|
|
|
|
int reftable_decode_key(struct strbuf *last_key, uint8_t *extra,
|
|
|
|
struct string_view in)
|
|
|
|
{
|
|
|
|
int start_len = in.len;
|
|
|
|
uint64_t prefix_len = 0;
|
|
|
|
uint64_t suffix_len = 0;
|
|
|
|
int n;
|
|
|
|
|
|
|
|
n = reftable_decode_keylen(in, &prefix_len, &suffix_len, extra);
|
|
|
|
if (n < 0)
|
|
|
|
return -1;
|
|
|
|
string_view_consume(&in, n);
|
2021-10-08 04:25:02 +08:00
|
|
|
|
reftable/record: decode keys in place
When reading a record from a block, we need to decode the record's key.
As reftable keys are prefix-compressed, meaning they reuse a prefix from
the preceding record's key, this is a bit more involved than just having
to copy the relevant bytes: we need to figure out the prefix and suffix
lengths, copy the prefix from the preceding record and finally copy the
suffix from the current record.
This is done by passing three buffers to `reftable_decode_key()`: one
buffer that holds the result, one buffer that holds the last key, and
one buffer that points to the current record. The final key is then
assembled by calling `strbuf_add()` twice to copy over the prefix and
suffix.
Performing two memory copies is inefficient though. And we can indeed do
better by decoding keys in place. Instead of providing two buffers, the
caller may only call a single buffer that is already pre-populated with
the last key. Like this, we only have to call `strbuf_setlen()` to trim
the record to its prefix and then `strbuf_add()` to add the suffix.
This refactoring leads to a noticeable performance bump when iterating
over 1 million refs:
Benchmark 1: show-ref: single matching ref (revision = HEAD~)
Time (mean ± σ): 112.2 ms ± 3.9 ms [User: 109.3 ms, System: 2.8 ms]
Range (min … max): 109.2 ms … 149.6 ms 1000 runs
Benchmark 2: show-ref: single matching ref (revision = HEAD)
Time (mean ± σ): 106.0 ms ± 3.5 ms [User: 103.2 ms, System: 2.7 ms]
Range (min … max): 103.2 ms … 133.7 ms 1000 runs
Summary
show-ref: single matching ref (revision = HEAD) ran
1.06 ± 0.05 times faster than show-ref: single matching ref (revision = HEAD~)
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-04 18:49:31 +08:00
|
|
|
if (in.len < suffix_len ||
|
|
|
|
prefix_len > last_key->len)
|
2021-10-08 04:25:02 +08:00
|
|
|
return -1;
|
|
|
|
|
reftable/record: decode keys in place
When reading a record from a block, we need to decode the record's key.
As reftable keys are prefix-compressed, meaning they reuse a prefix from
the preceding record's key, this is a bit more involved than just having
to copy the relevant bytes: we need to figure out the prefix and suffix
lengths, copy the prefix from the preceding record and finally copy the
suffix from the current record.
This is done by passing three buffers to `reftable_decode_key()`: one
buffer that holds the result, one buffer that holds the last key, and
one buffer that points to the current record. The final key is then
assembled by calling `strbuf_add()` twice to copy over the prefix and
suffix.
Performing two memory copies is inefficient though. And we can indeed do
better by decoding keys in place. Instead of providing two buffers, the
caller may only call a single buffer that is already pre-populated with
the last key. Like this, we only have to call `strbuf_setlen()` to trim
the record to its prefix and then `strbuf_add()` to add the suffix.
This refactoring leads to a noticeable performance bump when iterating
over 1 million refs:
Benchmark 1: show-ref: single matching ref (revision = HEAD~)
Time (mean ± σ): 112.2 ms ± 3.9 ms [User: 109.3 ms, System: 2.8 ms]
Range (min … max): 109.2 ms … 149.6 ms 1000 runs
Benchmark 2: show-ref: single matching ref (revision = HEAD)
Time (mean ± σ): 106.0 ms ± 3.5 ms [User: 103.2 ms, System: 2.7 ms]
Range (min … max): 103.2 ms … 133.7 ms 1000 runs
Summary
show-ref: single matching ref (revision = HEAD) ran
1.06 ± 0.05 times faster than show-ref: single matching ref (revision = HEAD~)
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-04 18:49:31 +08:00
|
|
|
strbuf_setlen(last_key, prefix_len);
|
|
|
|
strbuf_add(last_key, in.buf, suffix_len);
|
2021-10-08 04:25:02 +08:00
|
|
|
string_view_consume(&in, suffix_len);
|
|
|
|
|
|
|
|
return start_len - in.len;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void reftable_ref_record_key(const void *r, struct strbuf *dest)
|
|
|
|
{
|
|
|
|
const struct reftable_ref_record *rec =
|
|
|
|
(const struct reftable_ref_record *)r;
|
|
|
|
strbuf_reset(dest);
|
|
|
|
strbuf_addstr(dest, rec->refname);
|
|
|
|
}
|
|
|
|
|
2024-10-02 18:55:40 +08:00
|
|
|
static int reftable_ref_record_copy_from(void *rec, const void *src_rec,
|
|
|
|
int hash_size)
|
2021-10-08 04:25:02 +08:00
|
|
|
{
|
|
|
|
struct reftable_ref_record *ref = rec;
|
|
|
|
const struct reftable_ref_record *src = src_rec;
|
2024-03-04 18:49:26 +08:00
|
|
|
char *refname = NULL;
|
|
|
|
size_t refname_cap = 0;
|
2024-10-02 18:55:40 +08:00
|
|
|
int err;
|
2024-03-04 18:49:26 +08:00
|
|
|
|
2021-10-08 04:25:02 +08:00
|
|
|
assert(hash_size > 0);
|
|
|
|
|
2024-03-04 18:49:26 +08:00
|
|
|
SWAP(refname, ref->refname);
|
|
|
|
SWAP(refname_cap, ref->refname_cap);
|
2021-10-08 04:25:02 +08:00
|
|
|
reftable_ref_record_release(ref);
|
2024-03-04 18:49:26 +08:00
|
|
|
SWAP(ref->refname, refname);
|
|
|
|
SWAP(ref->refname_cap, refname_cap);
|
|
|
|
|
2021-10-08 04:25:02 +08:00
|
|
|
if (src->refname) {
|
2024-03-04 18:49:26 +08:00
|
|
|
size_t refname_len = strlen(src->refname);
|
|
|
|
|
|
|
|
REFTABLE_ALLOC_GROW(ref->refname, refname_len + 1,
|
|
|
|
ref->refname_cap);
|
2024-10-02 18:55:40 +08:00
|
|
|
if (!ref->refname) {
|
|
|
|
err = REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2024-03-04 18:49:26 +08:00
|
|
|
memcpy(ref->refname, src->refname, refname_len);
|
|
|
|
ref->refname[refname_len] = 0;
|
2021-10-08 04:25:02 +08:00
|
|
|
}
|
2024-03-04 18:49:26 +08:00
|
|
|
|
2021-10-08 04:25:02 +08:00
|
|
|
ref->update_index = src->update_index;
|
|
|
|
ref->value_type = src->value_type;
|
|
|
|
switch (src->value_type) {
|
|
|
|
case REFTABLE_REF_DELETION:
|
|
|
|
break;
|
|
|
|
case REFTABLE_REF_VAL1:
|
|
|
|
memcpy(ref->value.val1, src->value.val1, hash_size);
|
|
|
|
break;
|
|
|
|
case REFTABLE_REF_VAL2:
|
|
|
|
memcpy(ref->value.val2.value, src->value.val2.value, hash_size);
|
|
|
|
memcpy(ref->value.val2.target_value,
|
|
|
|
src->value.val2.target_value, hash_size);
|
|
|
|
break;
|
|
|
|
case REFTABLE_REF_SYMREF:
|
2024-10-02 18:55:40 +08:00
|
|
|
ref->value.symref = reftable_strdup(src->value.symref);
|
|
|
|
if (!ref->value.symref) {
|
|
|
|
err = REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
|
|
goto out;
|
|
|
|
}
|
2021-10-08 04:25:02 +08:00
|
|
|
break;
|
|
|
|
}
|
2024-10-02 18:55:40 +08:00
|
|
|
|
|
|
|
err = 0;
|
|
|
|
out:
|
|
|
|
return err;
|
2021-10-08 04:25:02 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void reftable_ref_record_release_void(void *rec)
|
|
|
|
{
|
|
|
|
reftable_ref_record_release(rec);
|
|
|
|
}
|
|
|
|
|
|
|
|
void reftable_ref_record_release(struct reftable_ref_record *ref)
|
|
|
|
{
|
|
|
|
switch (ref->value_type) {
|
|
|
|
case REFTABLE_REF_SYMREF:
|
|
|
|
reftable_free(ref->value.symref);
|
|
|
|
break;
|
|
|
|
case REFTABLE_REF_VAL2:
|
|
|
|
break;
|
|
|
|
case REFTABLE_REF_VAL1:
|
|
|
|
break;
|
|
|
|
case REFTABLE_REF_DELETION:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
|
|
|
reftable_free(ref->refname);
|
|
|
|
memset(ref, 0, sizeof(struct reftable_ref_record));
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint8_t reftable_ref_record_val_type(const void *rec)
|
|
|
|
{
|
|
|
|
const struct reftable_ref_record *r =
|
|
|
|
(const struct reftable_ref_record *)rec;
|
|
|
|
return r->value_type;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int reftable_ref_record_encode(const void *rec, struct string_view s,
|
|
|
|
int hash_size)
|
|
|
|
{
|
|
|
|
const struct reftable_ref_record *r =
|
|
|
|
(const struct reftable_ref_record *)rec;
|
|
|
|
struct string_view start = s;
|
|
|
|
int n = put_var_int(&s, r->update_index);
|
|
|
|
assert(hash_size > 0);
|
|
|
|
if (n < 0)
|
|
|
|
return -1;
|
|
|
|
string_view_consume(&s, n);
|
|
|
|
|
|
|
|
switch (r->value_type) {
|
|
|
|
case REFTABLE_REF_SYMREF:
|
|
|
|
n = encode_string(r->value.symref, s);
|
|
|
|
if (n < 0) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
string_view_consume(&s, n);
|
|
|
|
break;
|
|
|
|
case REFTABLE_REF_VAL2:
|
|
|
|
if (s.len < 2 * hash_size) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
memcpy(s.buf, r->value.val2.value, hash_size);
|
|
|
|
string_view_consume(&s, hash_size);
|
|
|
|
memcpy(s.buf, r->value.val2.target_value, hash_size);
|
|
|
|
string_view_consume(&s, hash_size);
|
|
|
|
break;
|
|
|
|
case REFTABLE_REF_VAL1:
|
|
|
|
if (s.len < hash_size) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
memcpy(s.buf, r->value.val1, hash_size);
|
|
|
|
string_view_consume(&s, hash_size);
|
|
|
|
break;
|
|
|
|
case REFTABLE_REF_DELETION:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
|
|
|
return start.len - s.len;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int reftable_ref_record_decode(void *rec, struct strbuf key,
|
|
|
|
uint8_t val_type, struct string_view in,
|
reftable/record: use scratch buffer when decoding records
When decoding log records we need a temporary buffer to decode the
reflog entry's name, mail address and message. As this buffer is local
to the function we thus have to reallocate it for every single log
record which we're about to decode, which is inefficient.
Refactor the code such that callers need to pass in a scratch buffer,
which allows us to reuse it for multiple decodes. This reduces the
number of allocations when iterating through reflogs. Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 2,068,487 allocs, 2,068,365 frees, 305,122,946 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 1,068,485 allocs, 1,068,363 frees, 281,122,886 bytes allocated
Note that this commit also drop some redundant calls to `strbuf_reset()`
right before calling `decode_string()`. The latter already knows to
reset the buffer, so there is no need for these.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-05 20:11:16 +08:00
|
|
|
int hash_size, struct strbuf *scratch)
|
2021-10-08 04:25:02 +08:00
|
|
|
{
|
|
|
|
struct reftable_ref_record *r = rec;
|
|
|
|
struct string_view start = in;
|
|
|
|
uint64_t update_index = 0;
|
reftable/record: reuse refname when decoding
When decoding a reftable record we will first release the user-provided
record and then decode the new record into it. This is quite inefficient
as we basically need to reallocate at least the refname every time.
Refactor the function to start tracking the refname capacity. Like this,
we can stow away the refname, release, restore and then grow the refname
to the required number of bytes via `REFTABLE_ALLOC_GROW()`.
This refactoring is safe to do because all functions that assigning to
the refname will first call `reftable_ref_record_release()`, which will
zero out the complete record after releasing memory.
This change results in a nice speedup when iterating over 1 million
refs:
Benchmark 1: show-ref: single matching ref (revision = HEAD~)
Time (mean ± σ): 124.0 ms ± 3.9 ms [User: 121.1 ms, System: 2.7 ms]
Range (min … max): 120.4 ms … 152.7 ms 1000 runs
Benchmark 2: show-ref: single matching ref (revision = HEAD)
Time (mean ± σ): 114.4 ms ± 3.7 ms [User: 111.5 ms, System: 2.7 ms]
Range (min … max): 111.0 ms … 152.1 ms 1000 runs
Summary
show-ref: single matching ref (revision = HEAD) ran
1.08 ± 0.05 times faster than show-ref: single matching ref (revision = HEAD~)
Furthermore, with this change we now perform a mostly constant number of
allocations when iterating. Before this change:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 1,006,620 allocs, 1,006,495 frees, 25,398,363 bytes allocated
After this change:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,623 allocs, 6,498 frees, 509,592 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-04 18:49:22 +08:00
|
|
|
const char *refname = NULL;
|
|
|
|
size_t refname_cap = 0;
|
2024-10-02 18:55:43 +08:00
|
|
|
int n, err;
|
reftable/record: reuse refname when decoding
When decoding a reftable record we will first release the user-provided
record and then decode the new record into it. This is quite inefficient
as we basically need to reallocate at least the refname every time.
Refactor the function to start tracking the refname capacity. Like this,
we can stow away the refname, release, restore and then grow the refname
to the required number of bytes via `REFTABLE_ALLOC_GROW()`.
This refactoring is safe to do because all functions that assigning to
the refname will first call `reftable_ref_record_release()`, which will
zero out the complete record after releasing memory.
This change results in a nice speedup when iterating over 1 million
refs:
Benchmark 1: show-ref: single matching ref (revision = HEAD~)
Time (mean ± σ): 124.0 ms ± 3.9 ms [User: 121.1 ms, System: 2.7 ms]
Range (min … max): 120.4 ms … 152.7 ms 1000 runs
Benchmark 2: show-ref: single matching ref (revision = HEAD)
Time (mean ± σ): 114.4 ms ± 3.7 ms [User: 111.5 ms, System: 2.7 ms]
Range (min … max): 111.0 ms … 152.1 ms 1000 runs
Summary
show-ref: single matching ref (revision = HEAD) ran
1.08 ± 0.05 times faster than show-ref: single matching ref (revision = HEAD~)
Furthermore, with this change we now perform a mostly constant number of
allocations when iterating. Before this change:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 1,006,620 allocs, 1,006,495 frees, 25,398,363 bytes allocated
After this change:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,623 allocs, 6,498 frees, 509,592 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-04 18:49:22 +08:00
|
|
|
|
|
|
|
assert(hash_size > 0);
|
|
|
|
|
|
|
|
n = get_var_int(&update_index, &in);
|
2021-10-08 04:25:02 +08:00
|
|
|
if (n < 0)
|
|
|
|
return n;
|
|
|
|
string_view_consume(&in, n);
|
|
|
|
|
reftable/record: reuse refname when decoding
When decoding a reftable record we will first release the user-provided
record and then decode the new record into it. This is quite inefficient
as we basically need to reallocate at least the refname every time.
Refactor the function to start tracking the refname capacity. Like this,
we can stow away the refname, release, restore and then grow the refname
to the required number of bytes via `REFTABLE_ALLOC_GROW()`.
This refactoring is safe to do because all functions that assigning to
the refname will first call `reftable_ref_record_release()`, which will
zero out the complete record after releasing memory.
This change results in a nice speedup when iterating over 1 million
refs:
Benchmark 1: show-ref: single matching ref (revision = HEAD~)
Time (mean ± σ): 124.0 ms ± 3.9 ms [User: 121.1 ms, System: 2.7 ms]
Range (min … max): 120.4 ms … 152.7 ms 1000 runs
Benchmark 2: show-ref: single matching ref (revision = HEAD)
Time (mean ± σ): 114.4 ms ± 3.7 ms [User: 111.5 ms, System: 2.7 ms]
Range (min … max): 111.0 ms … 152.1 ms 1000 runs
Summary
show-ref: single matching ref (revision = HEAD) ran
1.08 ± 0.05 times faster than show-ref: single matching ref (revision = HEAD~)
Furthermore, with this change we now perform a mostly constant number of
allocations when iterating. Before this change:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 1,006,620 allocs, 1,006,495 frees, 25,398,363 bytes allocated
After this change:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,623 allocs, 6,498 frees, 509,592 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-04 18:49:22 +08:00
|
|
|
SWAP(refname, r->refname);
|
|
|
|
SWAP(refname_cap, r->refname_cap);
|
2021-10-08 04:25:02 +08:00
|
|
|
reftable_ref_record_release(r);
|
reftable/record: reuse refname when decoding
When decoding a reftable record we will first release the user-provided
record and then decode the new record into it. This is quite inefficient
as we basically need to reallocate at least the refname every time.
Refactor the function to start tracking the refname capacity. Like this,
we can stow away the refname, release, restore and then grow the refname
to the required number of bytes via `REFTABLE_ALLOC_GROW()`.
This refactoring is safe to do because all functions that assigning to
the refname will first call `reftable_ref_record_release()`, which will
zero out the complete record after releasing memory.
This change results in a nice speedup when iterating over 1 million
refs:
Benchmark 1: show-ref: single matching ref (revision = HEAD~)
Time (mean ± σ): 124.0 ms ± 3.9 ms [User: 121.1 ms, System: 2.7 ms]
Range (min … max): 120.4 ms … 152.7 ms 1000 runs
Benchmark 2: show-ref: single matching ref (revision = HEAD)
Time (mean ± σ): 114.4 ms ± 3.7 ms [User: 111.5 ms, System: 2.7 ms]
Range (min … max): 111.0 ms … 152.1 ms 1000 runs
Summary
show-ref: single matching ref (revision = HEAD) ran
1.08 ± 0.05 times faster than show-ref: single matching ref (revision = HEAD~)
Furthermore, with this change we now perform a mostly constant number of
allocations when iterating. Before this change:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 1,006,620 allocs, 1,006,495 frees, 25,398,363 bytes allocated
After this change:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,623 allocs, 6,498 frees, 509,592 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-04 18:49:22 +08:00
|
|
|
SWAP(r->refname, refname);
|
|
|
|
SWAP(r->refname_cap, refname_cap);
|
2021-10-08 04:25:02 +08:00
|
|
|
|
reftable/record: reuse refname when decoding
When decoding a reftable record we will first release the user-provided
record and then decode the new record into it. This is quite inefficient
as we basically need to reallocate at least the refname every time.
Refactor the function to start tracking the refname capacity. Like this,
we can stow away the refname, release, restore and then grow the refname
to the required number of bytes via `REFTABLE_ALLOC_GROW()`.
This refactoring is safe to do because all functions that assigning to
the refname will first call `reftable_ref_record_release()`, which will
zero out the complete record after releasing memory.
This change results in a nice speedup when iterating over 1 million
refs:
Benchmark 1: show-ref: single matching ref (revision = HEAD~)
Time (mean ± σ): 124.0 ms ± 3.9 ms [User: 121.1 ms, System: 2.7 ms]
Range (min … max): 120.4 ms … 152.7 ms 1000 runs
Benchmark 2: show-ref: single matching ref (revision = HEAD)
Time (mean ± σ): 114.4 ms ± 3.7 ms [User: 111.5 ms, System: 2.7 ms]
Range (min … max): 111.0 ms … 152.1 ms 1000 runs
Summary
show-ref: single matching ref (revision = HEAD) ran
1.08 ± 0.05 times faster than show-ref: single matching ref (revision = HEAD~)
Furthermore, with this change we now perform a mostly constant number of
allocations when iterating. Before this change:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 1,006,620 allocs, 1,006,495 frees, 25,398,363 bytes allocated
After this change:
HEAP SUMMARY:
in use at exit: 13,603 bytes in 125 blocks
total heap usage: 6,623 allocs, 6,498 frees, 509,592 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-04 18:49:22 +08:00
|
|
|
REFTABLE_ALLOC_GROW(r->refname, key.len + 1, r->refname_cap);
|
2024-10-02 18:55:43 +08:00
|
|
|
if (!r->refname) {
|
|
|
|
err = REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
|
|
goto done;
|
|
|
|
}
|
2021-10-08 04:25:02 +08:00
|
|
|
memcpy(r->refname, key.buf, key.len);
|
|
|
|
r->refname[key.len] = 0;
|
2024-02-12 16:32:53 +08:00
|
|
|
|
|
|
|
r->update_index = update_index;
|
2021-10-08 04:25:02 +08:00
|
|
|
r->value_type = val_type;
|
|
|
|
switch (val_type) {
|
|
|
|
case REFTABLE_REF_VAL1:
|
|
|
|
if (in.len < hash_size) {
|
2024-10-02 18:55:43 +08:00
|
|
|
err = REFTABLE_FORMAT_ERROR;
|
|
|
|
goto done;
|
2021-10-08 04:25:02 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
memcpy(r->value.val1, in.buf, hash_size);
|
|
|
|
string_view_consume(&in, hash_size);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case REFTABLE_REF_VAL2:
|
|
|
|
if (in.len < 2 * hash_size) {
|
2024-10-02 18:55:43 +08:00
|
|
|
err = REFTABLE_FORMAT_ERROR;
|
|
|
|
goto done;
|
2021-10-08 04:25:02 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
memcpy(r->value.val2.value, in.buf, hash_size);
|
|
|
|
string_view_consume(&in, hash_size);
|
|
|
|
|
|
|
|
memcpy(r->value.val2.target_value, in.buf, hash_size);
|
|
|
|
string_view_consume(&in, hash_size);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case REFTABLE_REF_SYMREF: {
|
reftable/record: use scratch buffer when decoding records
When decoding log records we need a temporary buffer to decode the
reflog entry's name, mail address and message. As this buffer is local
to the function we thus have to reallocate it for every single log
record which we're about to decode, which is inefficient.
Refactor the code such that callers need to pass in a scratch buffer,
which allows us to reuse it for multiple decodes. This reduces the
number of allocations when iterating through reflogs. Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 2,068,487 allocs, 2,068,365 frees, 305,122,946 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 1,068,485 allocs, 1,068,363 frees, 281,122,886 bytes allocated
Note that this commit also drop some redundant calls to `strbuf_reset()`
right before calling `decode_string()`. The latter already knows to
reset the buffer, so there is no need for these.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-05 20:11:16 +08:00
|
|
|
int n = decode_string(scratch, in);
|
2021-10-08 04:25:02 +08:00
|
|
|
if (n < 0) {
|
2024-10-02 18:55:43 +08:00
|
|
|
err = REFTABLE_FORMAT_ERROR;
|
|
|
|
goto done;
|
2021-10-08 04:25:02 +08:00
|
|
|
}
|
|
|
|
string_view_consume(&in, n);
|
reftable/record: use scratch buffer when decoding records
When decoding log records we need a temporary buffer to decode the
reflog entry's name, mail address and message. As this buffer is local
to the function we thus have to reallocate it for every single log
record which we're about to decode, which is inefficient.
Refactor the code such that callers need to pass in a scratch buffer,
which allows us to reuse it for multiple decodes. This reduces the
number of allocations when iterating through reflogs. Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 2,068,487 allocs, 2,068,365 frees, 305,122,946 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 1,068,485 allocs, 1,068,363 frees, 281,122,886 bytes allocated
Note that this commit also drop some redundant calls to `strbuf_reset()`
right before calling `decode_string()`. The latter already knows to
reset the buffer, so there is no need for these.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-05 20:11:16 +08:00
|
|
|
r->value.symref = strbuf_detach(scratch, NULL);
|
2021-10-08 04:25:02 +08:00
|
|
|
} break;
|
|
|
|
|
|
|
|
case REFTABLE_REF_DELETION:
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
abort();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return start.len - in.len;
|
2024-10-02 18:55:43 +08:00
|
|
|
|
|
|
|
done:
|
|
|
|
return err;
|
2021-10-08 04:25:02 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int reftable_ref_record_is_deletion_void(const void *p)
|
|
|
|
{
|
|
|
|
return reftable_ref_record_is_deletion(
|
|
|
|
(const struct reftable_ref_record *)p);
|
|
|
|
}
|
|
|
|
|
2022-01-20 23:12:11 +08:00
|
|
|
static int reftable_ref_record_equal_void(const void *a,
|
|
|
|
const void *b, int hash_size)
|
|
|
|
{
|
|
|
|
struct reftable_ref_record *ra = (struct reftable_ref_record *) a;
|
|
|
|
struct reftable_ref_record *rb = (struct reftable_ref_record *) b;
|
|
|
|
return reftable_ref_record_equal(ra, rb, hash_size);
|
|
|
|
}
|
|
|
|
|
2024-02-12 16:32:25 +08:00
|
|
|
static int reftable_ref_record_cmp_void(const void *_a, const void *_b)
|
|
|
|
{
|
|
|
|
const struct reftable_ref_record *a = _a;
|
|
|
|
const struct reftable_ref_record *b = _b;
|
|
|
|
return strcmp(a->refname, b->refname);
|
|
|
|
}
|
|
|
|
|
2021-10-08 04:25:02 +08:00
|
|
|
static struct reftable_record_vtable reftable_ref_record_vtable = {
|
|
|
|
.key = &reftable_ref_record_key,
|
|
|
|
.type = BLOCK_TYPE_REF,
|
|
|
|
.copy_from = &reftable_ref_record_copy_from,
|
|
|
|
.val_type = &reftable_ref_record_val_type,
|
|
|
|
.encode = &reftable_ref_record_encode,
|
|
|
|
.decode = &reftable_ref_record_decode,
|
|
|
|
.release = &reftable_ref_record_release_void,
|
|
|
|
.is_deletion = &reftable_ref_record_is_deletion_void,
|
2022-01-20 23:12:11 +08:00
|
|
|
.equal = &reftable_ref_record_equal_void,
|
2024-02-12 16:32:25 +08:00
|
|
|
.cmp = &reftable_ref_record_cmp_void,
|
2021-10-08 04:25:02 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static void reftable_obj_record_key(const void *r, struct strbuf *dest)
|
|
|
|
{
|
|
|
|
const struct reftable_obj_record *rec =
|
|
|
|
(const struct reftable_obj_record *)r;
|
|
|
|
strbuf_reset(dest);
|
|
|
|
strbuf_add(dest, rec->hash_prefix, rec->hash_prefix_len);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void reftable_obj_record_release(void *rec)
|
|
|
|
{
|
|
|
|
struct reftable_obj_record *obj = rec;
|
2024-10-02 18:56:36 +08:00
|
|
|
REFTABLE_FREE_AND_NULL(obj->hash_prefix);
|
|
|
|
REFTABLE_FREE_AND_NULL(obj->offsets);
|
2021-10-08 04:25:02 +08:00
|
|
|
memset(obj, 0, sizeof(struct reftable_obj_record));
|
|
|
|
}
|
|
|
|
|
2024-10-02 18:55:40 +08:00
|
|
|
static int reftable_obj_record_copy_from(void *rec, const void *src_rec,
|
|
|
|
int hash_size UNUSED)
|
2021-10-08 04:25:02 +08:00
|
|
|
{
|
|
|
|
struct reftable_obj_record *obj = rec;
|
2024-10-02 18:55:40 +08:00
|
|
|
const struct reftable_obj_record *src = src_rec;
|
2021-10-08 04:25:02 +08:00
|
|
|
|
|
|
|
reftable_obj_record_release(obj);
|
2024-02-06 14:35:27 +08:00
|
|
|
|
|
|
|
REFTABLE_ALLOC_ARRAY(obj->hash_prefix, src->hash_prefix_len);
|
2024-10-02 18:55:40 +08:00
|
|
|
if (!obj->hash_prefix)
|
|
|
|
return REFTABLE_OUT_OF_MEMORY_ERROR;
|
2022-01-20 23:12:13 +08:00
|
|
|
obj->hash_prefix_len = src->hash_prefix_len;
|
|
|
|
if (src->hash_prefix_len)
|
|
|
|
memcpy(obj->hash_prefix, src->hash_prefix, obj->hash_prefix_len);
|
2021-10-08 04:25:02 +08:00
|
|
|
|
2024-02-06 14:35:27 +08:00
|
|
|
REFTABLE_ALLOC_ARRAY(obj->offsets, src->offset_len);
|
2024-10-02 18:55:40 +08:00
|
|
|
if (!obj->offsets)
|
|
|
|
return REFTABLE_OUT_OF_MEMORY_ERROR;
|
2022-01-20 23:12:13 +08:00
|
|
|
obj->offset_len = src->offset_len;
|
|
|
|
COPY_ARRAY(obj->offsets, src->offsets, src->offset_len);
|
2024-10-02 18:55:40 +08:00
|
|
|
|
|
|
|
return 0;
|
2021-10-08 04:25:02 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static uint8_t reftable_obj_record_val_type(const void *rec)
|
|
|
|
{
|
|
|
|
const struct reftable_obj_record *r = rec;
|
|
|
|
if (r->offset_len > 0 && r->offset_len < 8)
|
|
|
|
return r->offset_len;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int reftable_obj_record_encode(const void *rec, struct string_view s,
|
2024-08-17 16:24:36 +08:00
|
|
|
int hash_size UNUSED)
|
2021-10-08 04:25:02 +08:00
|
|
|
{
|
|
|
|
const struct reftable_obj_record *r = rec;
|
|
|
|
struct string_view start = s;
|
|
|
|
int i = 0;
|
|
|
|
int n = 0;
|
|
|
|
uint64_t last = 0;
|
|
|
|
if (r->offset_len == 0 || r->offset_len >= 8) {
|
|
|
|
n = put_var_int(&s, r->offset_len);
|
|
|
|
if (n < 0) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
string_view_consume(&s, n);
|
|
|
|
}
|
|
|
|
if (r->offset_len == 0)
|
|
|
|
return start.len - s.len;
|
|
|
|
n = put_var_int(&s, r->offsets[0]);
|
|
|
|
if (n < 0)
|
|
|
|
return -1;
|
|
|
|
string_view_consume(&s, n);
|
|
|
|
|
|
|
|
last = r->offsets[0];
|
|
|
|
for (i = 1; i < r->offset_len; i++) {
|
|
|
|
int n = put_var_int(&s, r->offsets[i] - last);
|
|
|
|
if (n < 0) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
string_view_consume(&s, n);
|
|
|
|
last = r->offsets[i];
|
|
|
|
}
|
|
|
|
return start.len - s.len;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int reftable_obj_record_decode(void *rec, struct strbuf key,
|
|
|
|
uint8_t val_type, struct string_view in,
|
2024-08-17 16:24:36 +08:00
|
|
|
int hash_size UNUSED,
|
|
|
|
struct strbuf *scratch UNUSED)
|
2021-10-08 04:25:02 +08:00
|
|
|
{
|
|
|
|
struct string_view start = in;
|
|
|
|
struct reftable_obj_record *r = rec;
|
|
|
|
uint64_t count = val_type;
|
|
|
|
int n = 0;
|
|
|
|
uint64_t last;
|
|
|
|
int j;
|
2024-02-06 14:35:27 +08:00
|
|
|
|
2024-03-08 04:35:58 +08:00
|
|
|
reftable_obj_record_release(r);
|
|
|
|
|
2024-02-06 14:35:27 +08:00
|
|
|
REFTABLE_ALLOC_ARRAY(r->hash_prefix, key.len);
|
2024-10-02 18:55:43 +08:00
|
|
|
if (!r->hash_prefix)
|
|
|
|
return REFTABLE_OUT_OF_MEMORY_ERROR;
|
2021-10-08 04:25:02 +08:00
|
|
|
memcpy(r->hash_prefix, key.buf, key.len);
|
|
|
|
r->hash_prefix_len = key.len;
|
|
|
|
|
|
|
|
if (val_type == 0) {
|
|
|
|
n = get_var_int(&count, &in);
|
|
|
|
if (n < 0) {
|
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
|
|
|
string_view_consume(&in, n);
|
|
|
|
}
|
|
|
|
|
|
|
|
r->offsets = NULL;
|
|
|
|
r->offset_len = 0;
|
|
|
|
if (count == 0)
|
|
|
|
return start.len - in.len;
|
|
|
|
|
2024-02-06 14:35:27 +08:00
|
|
|
REFTABLE_ALLOC_ARRAY(r->offsets, count);
|
2024-10-02 18:55:43 +08:00
|
|
|
if (!r->offsets)
|
|
|
|
return REFTABLE_OUT_OF_MEMORY_ERROR;
|
2021-10-08 04:25:02 +08:00
|
|
|
r->offset_len = count;
|
|
|
|
|
|
|
|
n = get_var_int(&r->offsets[0], &in);
|
|
|
|
if (n < 0)
|
|
|
|
return n;
|
|
|
|
string_view_consume(&in, n);
|
|
|
|
|
|
|
|
last = r->offsets[0];
|
|
|
|
j = 1;
|
|
|
|
while (j < count) {
|
|
|
|
uint64_t delta = 0;
|
|
|
|
int n = get_var_int(&delta, &in);
|
|
|
|
if (n < 0) {
|
|
|
|
return n;
|
|
|
|
}
|
|
|
|
string_view_consume(&in, n);
|
|
|
|
|
|
|
|
last = r->offsets[j] = (delta + last);
|
|
|
|
j++;
|
|
|
|
}
|
|
|
|
return start.len - in.len;
|
|
|
|
}
|
|
|
|
|
2024-08-17 16:24:36 +08:00
|
|
|
static int not_a_deletion(const void *p UNUSED)
|
2021-10-08 04:25:02 +08:00
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2024-08-17 16:24:36 +08:00
|
|
|
static int reftable_obj_record_equal_void(const void *a, const void *b,
|
|
|
|
int hash_size UNUSED)
|
2022-01-20 23:12:11 +08:00
|
|
|
{
|
|
|
|
struct reftable_obj_record *ra = (struct reftable_obj_record *) a;
|
|
|
|
struct reftable_obj_record *rb = (struct reftable_obj_record *) b;
|
|
|
|
|
|
|
|
if (ra->hash_prefix_len != rb->hash_prefix_len
|
|
|
|
|| ra->offset_len != rb->offset_len)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (ra->hash_prefix_len &&
|
|
|
|
memcmp(ra->hash_prefix, rb->hash_prefix, ra->hash_prefix_len))
|
|
|
|
return 0;
|
|
|
|
if (ra->offset_len &&
|
|
|
|
memcmp(ra->offsets, rb->offsets, ra->offset_len * sizeof(uint64_t)))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2024-02-12 16:32:25 +08:00
|
|
|
static int reftable_obj_record_cmp_void(const void *_a, const void *_b)
|
|
|
|
{
|
|
|
|
const struct reftable_obj_record *a = _a;
|
|
|
|
const struct reftable_obj_record *b = _b;
|
|
|
|
int cmp;
|
|
|
|
|
|
|
|
cmp = memcmp(a->hash_prefix, b->hash_prefix,
|
|
|
|
a->hash_prefix_len > b->hash_prefix_len ?
|
|
|
|
a->hash_prefix_len : b->hash_prefix_len);
|
|
|
|
if (cmp)
|
|
|
|
return cmp;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* When the prefix is the same then the object record that is longer is
|
|
|
|
* considered to be bigger.
|
|
|
|
*/
|
|
|
|
return a->hash_prefix_len - b->hash_prefix_len;
|
|
|
|
}
|
|
|
|
|
2021-10-08 04:25:02 +08:00
|
|
|
static struct reftable_record_vtable reftable_obj_record_vtable = {
|
|
|
|
.key = &reftable_obj_record_key,
|
|
|
|
.type = BLOCK_TYPE_OBJ,
|
|
|
|
.copy_from = &reftable_obj_record_copy_from,
|
|
|
|
.val_type = &reftable_obj_record_val_type,
|
|
|
|
.encode = &reftable_obj_record_encode,
|
|
|
|
.decode = &reftable_obj_record_decode,
|
|
|
|
.release = &reftable_obj_record_release,
|
2022-01-20 23:12:11 +08:00
|
|
|
.is_deletion = ¬_a_deletion,
|
|
|
|
.equal = &reftable_obj_record_equal_void,
|
2024-02-12 16:32:25 +08:00
|
|
|
.cmp = &reftable_obj_record_cmp_void,
|
2021-10-08 04:25:02 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static void reftable_log_record_key(const void *r, struct strbuf *dest)
|
|
|
|
{
|
|
|
|
const struct reftable_log_record *rec =
|
|
|
|
(const struct reftable_log_record *)r;
|
|
|
|
int len = strlen(rec->refname);
|
|
|
|
uint8_t i64[8];
|
|
|
|
uint64_t ts = 0;
|
|
|
|
strbuf_reset(dest);
|
|
|
|
strbuf_add(dest, (uint8_t *)rec->refname, len + 1);
|
|
|
|
|
|
|
|
ts = (~ts) - rec->update_index;
|
|
|
|
put_be64(&i64[0], ts);
|
|
|
|
strbuf_add(dest, i64, sizeof(i64));
|
|
|
|
}
|
|
|
|
|
2024-10-02 18:55:40 +08:00
|
|
|
static int reftable_log_record_copy_from(void *rec, const void *src_rec,
|
|
|
|
int hash_size)
|
2021-10-08 04:25:02 +08:00
|
|
|
{
|
|
|
|
struct reftable_log_record *dst = rec;
|
|
|
|
const struct reftable_log_record *src =
|
|
|
|
(const struct reftable_log_record *)src_rec;
|
2024-10-02 18:55:40 +08:00
|
|
|
int ret;
|
2021-10-08 04:25:02 +08:00
|
|
|
|
|
|
|
reftable_log_record_release(dst);
|
|
|
|
*dst = *src;
|
2024-10-02 18:55:40 +08:00
|
|
|
|
2021-10-08 04:25:02 +08:00
|
|
|
if (dst->refname) {
|
2024-10-02 18:55:40 +08:00
|
|
|
dst->refname = reftable_strdup(dst->refname);
|
|
|
|
if (!dst->refname) {
|
|
|
|
ret = REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
|
|
goto out;
|
|
|
|
}
|
2021-10-08 04:25:02 +08:00
|
|
|
}
|
2024-10-02 18:55:40 +08:00
|
|
|
|
2021-10-08 04:25:02 +08:00
|
|
|
switch (dst->value_type) {
|
|
|
|
case REFTABLE_LOG_DELETION:
|
|
|
|
break;
|
|
|
|
case REFTABLE_LOG_UPDATE:
|
2024-10-02 18:55:40 +08:00
|
|
|
if (dst->value.update.email)
|
2021-10-08 04:25:02 +08:00
|
|
|
dst->value.update.email =
|
2024-10-02 18:55:40 +08:00
|
|
|
reftable_strdup(dst->value.update.email);
|
|
|
|
if (dst->value.update.name)
|
2021-10-08 04:25:02 +08:00
|
|
|
dst->value.update.name =
|
2024-10-02 18:55:40 +08:00
|
|
|
reftable_strdup(dst->value.update.name);
|
|
|
|
if (dst->value.update.message)
|
2021-10-08 04:25:02 +08:00
|
|
|
dst->value.update.message =
|
2024-10-02 18:55:40 +08:00
|
|
|
reftable_strdup(dst->value.update.message);
|
|
|
|
|
|
|
|
if (!dst->value.update.email ||
|
|
|
|
!dst->value.update.name ||
|
|
|
|
!dst->value.update.message) {
|
|
|
|
ret = REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
|
|
goto out;
|
2021-10-08 04:25:02 +08:00
|
|
|
}
|
|
|
|
|
reftable/record: convert old and new object IDs to arrays
In 7af607c58d (reftable/record: store "val1" hashes as static arrays,
2024-01-03) and b31e3cc620 (reftable/record: store "val2" hashes as
static arrays, 2024-01-03) we have converted ref records to store their
object IDs in a static array. Convert log records to do the same so that
their old and new object IDs are arrays, too.
This change results in two allocations less per log record that we're
iterating over. Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 8,068,495 allocs, 8,068,373 frees, 401,011,862 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 6,068,489 allocs, 6,068,367 frees, 361,011,822 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-05 20:10:59 +08:00
|
|
|
memcpy(dst->value.update.new_hash,
|
|
|
|
src->value.update.new_hash, hash_size);
|
|
|
|
memcpy(dst->value.update.old_hash,
|
|
|
|
src->value.update.old_hash, hash_size);
|
2021-10-08 04:25:02 +08:00
|
|
|
break;
|
|
|
|
}
|
2024-10-02 18:55:40 +08:00
|
|
|
|
|
|
|
ret = 0;
|
|
|
|
out:
|
|
|
|
return ret;
|
2021-10-08 04:25:02 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void reftable_log_record_release_void(void *rec)
|
|
|
|
{
|
|
|
|
struct reftable_log_record *r = rec;
|
|
|
|
reftable_log_record_release(r);
|
|
|
|
}
|
|
|
|
|
|
|
|
void reftable_log_record_release(struct reftable_log_record *r)
|
|
|
|
{
|
|
|
|
reftable_free(r->refname);
|
|
|
|
switch (r->value_type) {
|
|
|
|
case REFTABLE_LOG_DELETION:
|
|
|
|
break;
|
|
|
|
case REFTABLE_LOG_UPDATE:
|
|
|
|
reftable_free(r->value.update.name);
|
|
|
|
reftable_free(r->value.update.email);
|
|
|
|
reftable_free(r->value.update.message);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
memset(r, 0, sizeof(struct reftable_log_record));
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint8_t reftable_log_record_val_type(const void *rec)
|
|
|
|
{
|
|
|
|
const struct reftable_log_record *log =
|
|
|
|
(const struct reftable_log_record *)rec;
|
|
|
|
|
|
|
|
return reftable_log_record_is_deletion(log) ? 0 : 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int reftable_log_record_encode(const void *rec, struct string_view s,
|
|
|
|
int hash_size)
|
|
|
|
{
|
|
|
|
const struct reftable_log_record *r = rec;
|
|
|
|
struct string_view start = s;
|
|
|
|
int n = 0;
|
|
|
|
if (reftable_log_record_is_deletion(r))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (s.len < 2 * hash_size)
|
|
|
|
return -1;
|
|
|
|
|
reftable/record: convert old and new object IDs to arrays
In 7af607c58d (reftable/record: store "val1" hashes as static arrays,
2024-01-03) and b31e3cc620 (reftable/record: store "val2" hashes as
static arrays, 2024-01-03) we have converted ref records to store their
object IDs in a static array. Convert log records to do the same so that
their old and new object IDs are arrays, too.
This change results in two allocations less per log record that we're
iterating over. Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 8,068,495 allocs, 8,068,373 frees, 401,011,862 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 6,068,489 allocs, 6,068,367 frees, 361,011,822 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-05 20:10:59 +08:00
|
|
|
memcpy(s.buf, r->value.update.old_hash, hash_size);
|
|
|
|
memcpy(s.buf + hash_size, r->value.update.new_hash, hash_size);
|
2021-10-08 04:25:02 +08:00
|
|
|
string_view_consume(&s, 2 * hash_size);
|
|
|
|
|
|
|
|
n = encode_string(r->value.update.name ? r->value.update.name : "", s);
|
|
|
|
if (n < 0)
|
|
|
|
return -1;
|
|
|
|
string_view_consume(&s, n);
|
|
|
|
|
|
|
|
n = encode_string(r->value.update.email ? r->value.update.email : "",
|
|
|
|
s);
|
|
|
|
if (n < 0)
|
|
|
|
return -1;
|
|
|
|
string_view_consume(&s, n);
|
|
|
|
|
|
|
|
n = put_var_int(&s, r->value.update.time);
|
|
|
|
if (n < 0)
|
|
|
|
return -1;
|
|
|
|
string_view_consume(&s, n);
|
|
|
|
|
|
|
|
if (s.len < 2)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
put_be16(s.buf, r->value.update.tz_offset);
|
|
|
|
string_view_consume(&s, 2);
|
|
|
|
|
|
|
|
n = encode_string(
|
|
|
|
r->value.update.message ? r->value.update.message : "", s);
|
|
|
|
if (n < 0)
|
|
|
|
return -1;
|
|
|
|
string_view_consume(&s, n);
|
|
|
|
|
|
|
|
return start.len - s.len;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int reftable_log_record_decode(void *rec, struct strbuf key,
|
|
|
|
uint8_t val_type, struct string_view in,
|
reftable/record: use scratch buffer when decoding records
When decoding log records we need a temporary buffer to decode the
reflog entry's name, mail address and message. As this buffer is local
to the function we thus have to reallocate it for every single log
record which we're about to decode, which is inefficient.
Refactor the code such that callers need to pass in a scratch buffer,
which allows us to reuse it for multiple decodes. This reduces the
number of allocations when iterating through reflogs. Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 2,068,487 allocs, 2,068,365 frees, 305,122,946 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 1,068,485 allocs, 1,068,363 frees, 281,122,886 bytes allocated
Note that this commit also drop some redundant calls to `strbuf_reset()`
right before calling `decode_string()`. The latter already knows to
reset the buffer, so there is no need for these.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-05 20:11:16 +08:00
|
|
|
int hash_size, struct strbuf *scratch)
|
2021-10-08 04:25:02 +08:00
|
|
|
{
|
|
|
|
struct string_view start = in;
|
|
|
|
struct reftable_log_record *r = rec;
|
|
|
|
uint64_t max = 0;
|
|
|
|
uint64_t ts = 0;
|
2024-10-02 18:55:43 +08:00
|
|
|
int err, n;
|
2021-10-08 04:25:02 +08:00
|
|
|
|
|
|
|
if (key.len <= 9 || key.buf[key.len - 9] != 0)
|
|
|
|
return REFTABLE_FORMAT_ERROR;
|
|
|
|
|
reftable/record: reuse refnames when decoding log records
When decoding a log record we always reallocate their refname arrays.
This results in quite a lot of needless allocation churn.
Refactor the code to grow the array as required only. Like this, we
should usually only end up reallocating the array a small handful of
times when iterating over many refs. Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 4,068,487 allocs, 4,068,365 frees, 332,011,793 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 3,068,488 allocs, 3,068,366 frees, 307,122,961 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-05 20:11:07 +08:00
|
|
|
REFTABLE_ALLOC_GROW(r->refname, key.len - 8, r->refname_cap);
|
2024-10-02 18:55:43 +08:00
|
|
|
if (!r->refname) {
|
|
|
|
err = REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
2021-10-08 04:25:02 +08:00
|
|
|
memcpy(r->refname, key.buf, key.len - 8);
|
|
|
|
ts = get_be64(key.buf + key.len - 8);
|
|
|
|
|
|
|
|
r->update_index = (~max) - ts;
|
|
|
|
|
|
|
|
if (val_type != r->value_type) {
|
|
|
|
switch (r->value_type) {
|
|
|
|
case REFTABLE_LOG_UPDATE:
|
2024-10-02 18:56:36 +08:00
|
|
|
REFTABLE_FREE_AND_NULL(r->value.update.message);
|
reftable/record: reuse message when decoding log records
Same as the preceding commit we can allocate log messages as needed when
decoding log records, thus further reducing the number of allocations.
Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 3,068,488 allocs, 3,068,366 frees, 307,122,961 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 2,068,487 allocs, 2,068,365 frees, 305,122,946 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-05 20:11:12 +08:00
|
|
|
r->value.update.message_cap = 0;
|
2024-10-02 18:56:36 +08:00
|
|
|
REFTABLE_FREE_AND_NULL(r->value.update.email);
|
|
|
|
REFTABLE_FREE_AND_NULL(r->value.update.name);
|
2021-10-08 04:25:02 +08:00
|
|
|
break;
|
|
|
|
case REFTABLE_LOG_DELETION:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
r->value_type = val_type;
|
|
|
|
if (val_type == REFTABLE_LOG_DELETION)
|
|
|
|
return 0;
|
|
|
|
|
2024-10-02 18:55:43 +08:00
|
|
|
if (in.len < 2 * hash_size) {
|
|
|
|
err = REFTABLE_FORMAT_ERROR;
|
|
|
|
goto done;
|
|
|
|
}
|
2021-10-08 04:25:02 +08:00
|
|
|
|
|
|
|
memcpy(r->value.update.old_hash, in.buf, hash_size);
|
|
|
|
memcpy(r->value.update.new_hash, in.buf + hash_size, hash_size);
|
|
|
|
|
|
|
|
string_view_consume(&in, 2 * hash_size);
|
|
|
|
|
reftable/record: use scratch buffer when decoding records
When decoding log records we need a temporary buffer to decode the
reflog entry's name, mail address and message. As this buffer is local
to the function we thus have to reallocate it for every single log
record which we're about to decode, which is inefficient.
Refactor the code such that callers need to pass in a scratch buffer,
which allows us to reuse it for multiple decodes. This reduces the
number of allocations when iterating through reflogs. Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 2,068,487 allocs, 2,068,365 frees, 305,122,946 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 1,068,485 allocs, 1,068,363 frees, 281,122,886 bytes allocated
Note that this commit also drop some redundant calls to `strbuf_reset()`
right before calling `decode_string()`. The latter already knows to
reset the buffer, so there is no need for these.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-05 20:11:16 +08:00
|
|
|
n = decode_string(scratch, in);
|
2024-10-02 18:55:43 +08:00
|
|
|
if (n < 0) {
|
|
|
|
err = REFTABLE_FORMAT_ERROR;
|
2021-10-08 04:25:02 +08:00
|
|
|
goto done;
|
2024-10-02 18:55:43 +08:00
|
|
|
}
|
2021-10-08 04:25:02 +08:00
|
|
|
string_view_consume(&in, n);
|
|
|
|
|
reftable/record: avoid copying author info
Each reflog entry contains information regarding the authorship of who
has made the change. This authorship information is not the same as that
of any of the commits that the reflog entry references, but instead
corresponds to the local user that has executed the command. Thus, it is
almost always the case that all reflog entries have the same author.
We can make use of this fact when decoding reftable records: instead of
freeing and then reallocating the authorship information of log records,
we can special-case when the next record during an iteration has the
exact same authorship as the preceding record. If so, then there is no
need to reallocate the respective fields.
This change results in two allocations less per log record that we're
iterating over in the most common case. Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 6,068,489 allocs, 6,068,367 frees, 361,011,822 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 4,068,487 allocs, 4,068,365 frees, 332,011,793 bytes allocated
An alternative would be to store the capacity of both name and email and
then use `REFTABLE_ALLOC_GROW()` to conditionally reallocate the array.
But reftable records are copied around quite a lot, and thus we need to
be a bit mindful of the overall record size. Furthermore, a memory
comparison should also be more efficient than having to copy over memory
even if we wouldn't have to allocate a new array every time.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-05 20:11:03 +08:00
|
|
|
/*
|
|
|
|
* In almost all cases we can expect the reflog name to not change for
|
|
|
|
* reflog entries as they are tied to the local identity, not to the
|
|
|
|
* target commits. As an optimization for this common case we can thus
|
|
|
|
* skip copying over the name in case it's accurate already.
|
|
|
|
*/
|
|
|
|
if (!r->value.update.name ||
|
reftable/record: use scratch buffer when decoding records
When decoding log records we need a temporary buffer to decode the
reflog entry's name, mail address and message. As this buffer is local
to the function we thus have to reallocate it for every single log
record which we're about to decode, which is inefficient.
Refactor the code such that callers need to pass in a scratch buffer,
which allows us to reuse it for multiple decodes. This reduces the
number of allocations when iterating through reflogs. Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 2,068,487 allocs, 2,068,365 frees, 305,122,946 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 1,068,485 allocs, 1,068,363 frees, 281,122,886 bytes allocated
Note that this commit also drop some redundant calls to `strbuf_reset()`
right before calling `decode_string()`. The latter already knows to
reset the buffer, so there is no need for these.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-05 20:11:16 +08:00
|
|
|
strcmp(r->value.update.name, scratch->buf)) {
|
2024-10-02 18:55:43 +08:00
|
|
|
char *name = reftable_realloc(r->value.update.name, scratch->len + 1);
|
|
|
|
if (!name) {
|
|
|
|
err = REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
r->value.update.name = name;
|
reftable/record: use scratch buffer when decoding records
When decoding log records we need a temporary buffer to decode the
reflog entry's name, mail address and message. As this buffer is local
to the function we thus have to reallocate it for every single log
record which we're about to decode, which is inefficient.
Refactor the code such that callers need to pass in a scratch buffer,
which allows us to reuse it for multiple decodes. This reduces the
number of allocations when iterating through reflogs. Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 2,068,487 allocs, 2,068,365 frees, 305,122,946 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 1,068,485 allocs, 1,068,363 frees, 281,122,886 bytes allocated
Note that this commit also drop some redundant calls to `strbuf_reset()`
right before calling `decode_string()`. The latter already knows to
reset the buffer, so there is no need for these.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-05 20:11:16 +08:00
|
|
|
memcpy(r->value.update.name, scratch->buf, scratch->len);
|
|
|
|
r->value.update.name[scratch->len] = 0;
|
reftable/record: avoid copying author info
Each reflog entry contains information regarding the authorship of who
has made the change. This authorship information is not the same as that
of any of the commits that the reflog entry references, but instead
corresponds to the local user that has executed the command. Thus, it is
almost always the case that all reflog entries have the same author.
We can make use of this fact when decoding reftable records: instead of
freeing and then reallocating the authorship information of log records,
we can special-case when the next record during an iteration has the
exact same authorship as the preceding record. If so, then there is no
need to reallocate the respective fields.
This change results in two allocations less per log record that we're
iterating over in the most common case. Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 6,068,489 allocs, 6,068,367 frees, 361,011,822 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 4,068,487 allocs, 4,068,365 frees, 332,011,793 bytes allocated
An alternative would be to store the capacity of both name and email and
then use `REFTABLE_ALLOC_GROW()` to conditionally reallocate the array.
But reftable records are copied around quite a lot, and thus we need to
be a bit mindful of the overall record size. Furthermore, a memory
comparison should also be more efficient than having to copy over memory
even if we wouldn't have to allocate a new array every time.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-05 20:11:03 +08:00
|
|
|
}
|
2021-10-08 04:25:02 +08:00
|
|
|
|
reftable/record: use scratch buffer when decoding records
When decoding log records we need a temporary buffer to decode the
reflog entry's name, mail address and message. As this buffer is local
to the function we thus have to reallocate it for every single log
record which we're about to decode, which is inefficient.
Refactor the code such that callers need to pass in a scratch buffer,
which allows us to reuse it for multiple decodes. This reduces the
number of allocations when iterating through reflogs. Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 2,068,487 allocs, 2,068,365 frees, 305,122,946 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 1,068,485 allocs, 1,068,363 frees, 281,122,886 bytes allocated
Note that this commit also drop some redundant calls to `strbuf_reset()`
right before calling `decode_string()`. The latter already knows to
reset the buffer, so there is no need for these.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-05 20:11:16 +08:00
|
|
|
n = decode_string(scratch, in);
|
2024-10-02 18:55:43 +08:00
|
|
|
if (n < 0) {
|
|
|
|
err = REFTABLE_FORMAT_ERROR;
|
2021-10-08 04:25:02 +08:00
|
|
|
goto done;
|
2024-10-02 18:55:43 +08:00
|
|
|
}
|
2021-10-08 04:25:02 +08:00
|
|
|
string_view_consume(&in, n);
|
|
|
|
|
reftable/record: avoid copying author info
Each reflog entry contains information regarding the authorship of who
has made the change. This authorship information is not the same as that
of any of the commits that the reflog entry references, but instead
corresponds to the local user that has executed the command. Thus, it is
almost always the case that all reflog entries have the same author.
We can make use of this fact when decoding reftable records: instead of
freeing and then reallocating the authorship information of log records,
we can special-case when the next record during an iteration has the
exact same authorship as the preceding record. If so, then there is no
need to reallocate the respective fields.
This change results in two allocations less per log record that we're
iterating over in the most common case. Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 6,068,489 allocs, 6,068,367 frees, 361,011,822 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 4,068,487 allocs, 4,068,365 frees, 332,011,793 bytes allocated
An alternative would be to store the capacity of both name and email and
then use `REFTABLE_ALLOC_GROW()` to conditionally reallocate the array.
But reftable records are copied around quite a lot, and thus we need to
be a bit mindful of the overall record size. Furthermore, a memory
comparison should also be more efficient than having to copy over memory
even if we wouldn't have to allocate a new array every time.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-05 20:11:03 +08:00
|
|
|
/* Same as above, but for the reflog email. */
|
|
|
|
if (!r->value.update.email ||
|
reftable/record: use scratch buffer when decoding records
When decoding log records we need a temporary buffer to decode the
reflog entry's name, mail address and message. As this buffer is local
to the function we thus have to reallocate it for every single log
record which we're about to decode, which is inefficient.
Refactor the code such that callers need to pass in a scratch buffer,
which allows us to reuse it for multiple decodes. This reduces the
number of allocations when iterating through reflogs. Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 2,068,487 allocs, 2,068,365 frees, 305,122,946 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 1,068,485 allocs, 1,068,363 frees, 281,122,886 bytes allocated
Note that this commit also drop some redundant calls to `strbuf_reset()`
right before calling `decode_string()`. The latter already knows to
reset the buffer, so there is no need for these.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-05 20:11:16 +08:00
|
|
|
strcmp(r->value.update.email, scratch->buf)) {
|
2024-10-02 18:55:43 +08:00
|
|
|
char *email = reftable_realloc(r->value.update.email, scratch->len + 1);
|
|
|
|
if (!email) {
|
|
|
|
err = REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
r->value.update.email = email;
|
reftable/record: use scratch buffer when decoding records
When decoding log records we need a temporary buffer to decode the
reflog entry's name, mail address and message. As this buffer is local
to the function we thus have to reallocate it for every single log
record which we're about to decode, which is inefficient.
Refactor the code such that callers need to pass in a scratch buffer,
which allows us to reuse it for multiple decodes. This reduces the
number of allocations when iterating through reflogs. Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 2,068,487 allocs, 2,068,365 frees, 305,122,946 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 1,068,485 allocs, 1,068,363 frees, 281,122,886 bytes allocated
Note that this commit also drop some redundant calls to `strbuf_reset()`
right before calling `decode_string()`. The latter already knows to
reset the buffer, so there is no need for these.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-05 20:11:16 +08:00
|
|
|
memcpy(r->value.update.email, scratch->buf, scratch->len);
|
|
|
|
r->value.update.email[scratch->len] = 0;
|
reftable/record: avoid copying author info
Each reflog entry contains information regarding the authorship of who
has made the change. This authorship information is not the same as that
of any of the commits that the reflog entry references, but instead
corresponds to the local user that has executed the command. Thus, it is
almost always the case that all reflog entries have the same author.
We can make use of this fact when decoding reftable records: instead of
freeing and then reallocating the authorship information of log records,
we can special-case when the next record during an iteration has the
exact same authorship as the preceding record. If so, then there is no
need to reallocate the respective fields.
This change results in two allocations less per log record that we're
iterating over in the most common case. Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 6,068,489 allocs, 6,068,367 frees, 361,011,822 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 4,068,487 allocs, 4,068,365 frees, 332,011,793 bytes allocated
An alternative would be to store the capacity of both name and email and
then use `REFTABLE_ALLOC_GROW()` to conditionally reallocate the array.
But reftable records are copied around quite a lot, and thus we need to
be a bit mindful of the overall record size. Furthermore, a memory
comparison should also be more efficient than having to copy over memory
even if we wouldn't have to allocate a new array every time.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-05 20:11:03 +08:00
|
|
|
}
|
2021-10-08 04:25:02 +08:00
|
|
|
|
|
|
|
ts = 0;
|
|
|
|
n = get_var_int(&ts, &in);
|
2024-10-02 18:55:43 +08:00
|
|
|
if (n < 0) {
|
|
|
|
err = REFTABLE_FORMAT_ERROR;
|
2021-10-08 04:25:02 +08:00
|
|
|
goto done;
|
2024-10-02 18:55:43 +08:00
|
|
|
}
|
2021-10-08 04:25:02 +08:00
|
|
|
string_view_consume(&in, n);
|
|
|
|
r->value.update.time = ts;
|
2024-10-02 18:55:43 +08:00
|
|
|
if (in.len < 2) {
|
|
|
|
err = REFTABLE_FORMAT_ERROR;
|
2021-10-08 04:25:02 +08:00
|
|
|
goto done;
|
2024-10-02 18:55:43 +08:00
|
|
|
}
|
2021-10-08 04:25:02 +08:00
|
|
|
|
|
|
|
r->value.update.tz_offset = get_be16(in.buf);
|
|
|
|
string_view_consume(&in, 2);
|
|
|
|
|
reftable/record: use scratch buffer when decoding records
When decoding log records we need a temporary buffer to decode the
reflog entry's name, mail address and message. As this buffer is local
to the function we thus have to reallocate it for every single log
record which we're about to decode, which is inefficient.
Refactor the code such that callers need to pass in a scratch buffer,
which allows us to reuse it for multiple decodes. This reduces the
number of allocations when iterating through reflogs. Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 2,068,487 allocs, 2,068,365 frees, 305,122,946 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 1,068,485 allocs, 1,068,363 frees, 281,122,886 bytes allocated
Note that this commit also drop some redundant calls to `strbuf_reset()`
right before calling `decode_string()`. The latter already knows to
reset the buffer, so there is no need for these.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-05 20:11:16 +08:00
|
|
|
n = decode_string(scratch, in);
|
2024-10-02 18:55:43 +08:00
|
|
|
if (n < 0) {
|
|
|
|
err = REFTABLE_FORMAT_ERROR;
|
2021-10-08 04:25:02 +08:00
|
|
|
goto done;
|
2024-10-02 18:55:43 +08:00
|
|
|
}
|
2021-10-08 04:25:02 +08:00
|
|
|
string_view_consume(&in, n);
|
|
|
|
|
reftable/record: use scratch buffer when decoding records
When decoding log records we need a temporary buffer to decode the
reflog entry's name, mail address and message. As this buffer is local
to the function we thus have to reallocate it for every single log
record which we're about to decode, which is inefficient.
Refactor the code such that callers need to pass in a scratch buffer,
which allows us to reuse it for multiple decodes. This reduces the
number of allocations when iterating through reflogs. Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 2,068,487 allocs, 2,068,365 frees, 305,122,946 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 1,068,485 allocs, 1,068,363 frees, 281,122,886 bytes allocated
Note that this commit also drop some redundant calls to `strbuf_reset()`
right before calling `decode_string()`. The latter already knows to
reset the buffer, so there is no need for these.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-05 20:11:16 +08:00
|
|
|
REFTABLE_ALLOC_GROW(r->value.update.message, scratch->len + 1,
|
reftable/record: reuse message when decoding log records
Same as the preceding commit we can allocate log messages as needed when
decoding log records, thus further reducing the number of allocations.
Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 3,068,488 allocs, 3,068,366 frees, 307,122,961 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 2,068,487 allocs, 2,068,365 frees, 305,122,946 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-05 20:11:12 +08:00
|
|
|
r->value.update.message_cap);
|
2024-10-02 18:55:43 +08:00
|
|
|
if (!r->value.update.message) {
|
|
|
|
err = REFTABLE_OUT_OF_MEMORY_ERROR;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
reftable/record: use scratch buffer when decoding records
When decoding log records we need a temporary buffer to decode the
reflog entry's name, mail address and message. As this buffer is local
to the function we thus have to reallocate it for every single log
record which we're about to decode, which is inefficient.
Refactor the code such that callers need to pass in a scratch buffer,
which allows us to reuse it for multiple decodes. This reduces the
number of allocations when iterating through reflogs. Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 2,068,487 allocs, 2,068,365 frees, 305,122,946 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 1,068,485 allocs, 1,068,363 frees, 281,122,886 bytes allocated
Note that this commit also drop some redundant calls to `strbuf_reset()`
right before calling `decode_string()`. The latter already knows to
reset the buffer, so there is no need for these.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-05 20:11:16 +08:00
|
|
|
memcpy(r->value.update.message, scratch->buf, scratch->len);
|
|
|
|
r->value.update.message[scratch->len] = 0;
|
2021-10-08 04:25:02 +08:00
|
|
|
|
|
|
|
return start.len - in.len;
|
|
|
|
|
|
|
|
done:
|
2024-10-02 18:55:43 +08:00
|
|
|
return err;
|
2021-10-08 04:25:02 +08:00
|
|
|
}
|
|
|
|
|
2024-06-07 14:37:39 +08:00
|
|
|
static int null_streq(const char *a, const char *b)
|
2021-10-08 04:25:02 +08:00
|
|
|
{
|
2024-06-07 14:37:39 +08:00
|
|
|
const char *empty = "";
|
2021-10-08 04:25:02 +08:00
|
|
|
if (!a)
|
|
|
|
a = empty;
|
|
|
|
|
|
|
|
if (!b)
|
|
|
|
b = empty;
|
|
|
|
|
|
|
|
return 0 == strcmp(a, b);
|
|
|
|
}
|
|
|
|
|
2022-01-20 23:12:11 +08:00
|
|
|
static int reftable_log_record_equal_void(const void *a,
|
|
|
|
const void *b, int hash_size)
|
|
|
|
{
|
|
|
|
return reftable_log_record_equal((struct reftable_log_record *) a,
|
|
|
|
(struct reftable_log_record *) b,
|
|
|
|
hash_size);
|
|
|
|
}
|
|
|
|
|
2024-02-12 16:32:25 +08:00
|
|
|
static int reftable_log_record_cmp_void(const void *_a, const void *_b)
|
|
|
|
{
|
|
|
|
const struct reftable_log_record *a = _a;
|
|
|
|
const struct reftable_log_record *b = _b;
|
|
|
|
int cmp = strcmp(a->refname, b->refname);
|
|
|
|
if (cmp)
|
|
|
|
return cmp;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Note that the comparison here is reversed. This is because the
|
|
|
|
* update index is reversed when comparing keys. For reference, see how
|
|
|
|
* we handle this in reftable_log_record_key()`.
|
|
|
|
*/
|
|
|
|
return b->update_index - a->update_index;
|
|
|
|
}
|
|
|
|
|
2022-01-20 23:12:10 +08:00
|
|
|
int reftable_log_record_equal(const struct reftable_log_record *a,
|
|
|
|
const struct reftable_log_record *b, int hash_size)
|
2021-10-08 04:25:02 +08:00
|
|
|
{
|
|
|
|
if (!(null_streq(a->refname, b->refname) &&
|
|
|
|
a->update_index == b->update_index &&
|
|
|
|
a->value_type == b->value_type))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
switch (a->value_type) {
|
|
|
|
case REFTABLE_LOG_DELETION:
|
|
|
|
return 1;
|
|
|
|
case REFTABLE_LOG_UPDATE:
|
|
|
|
return null_streq(a->value.update.name, b->value.update.name) &&
|
|
|
|
a->value.update.time == b->value.update.time &&
|
|
|
|
a->value.update.tz_offset == b->value.update.tz_offset &&
|
|
|
|
null_streq(a->value.update.email,
|
|
|
|
b->value.update.email) &&
|
|
|
|
null_streq(a->value.update.message,
|
|
|
|
b->value.update.message) &&
|
reftable/record: convert old and new object IDs to arrays
In 7af607c58d (reftable/record: store "val1" hashes as static arrays,
2024-01-03) and b31e3cc620 (reftable/record: store "val2" hashes as
static arrays, 2024-01-03) we have converted ref records to store their
object IDs in a static array. Convert log records to do the same so that
their old and new object IDs are arrays, too.
This change results in two allocations less per log record that we're
iterating over. Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 8,068,495 allocs, 8,068,373 frees, 401,011,862 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 6,068,489 allocs, 6,068,367 frees, 361,011,822 bytes allocated
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-05 20:10:59 +08:00
|
|
|
!memcmp(a->value.update.old_hash,
|
|
|
|
b->value.update.old_hash, hash_size) &&
|
|
|
|
!memcmp(a->value.update.new_hash,
|
|
|
|
b->value.update.new_hash, hash_size);
|
2021-10-08 04:25:02 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
|
|
|
static int reftable_log_record_is_deletion_void(const void *p)
|
|
|
|
{
|
|
|
|
return reftable_log_record_is_deletion(
|
|
|
|
(const struct reftable_log_record *)p);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct reftable_record_vtable reftable_log_record_vtable = {
|
|
|
|
.key = &reftable_log_record_key,
|
|
|
|
.type = BLOCK_TYPE_LOG,
|
|
|
|
.copy_from = &reftable_log_record_copy_from,
|
|
|
|
.val_type = &reftable_log_record_val_type,
|
|
|
|
.encode = &reftable_log_record_encode,
|
|
|
|
.decode = &reftable_log_record_decode,
|
|
|
|
.release = &reftable_log_record_release_void,
|
|
|
|
.is_deletion = &reftable_log_record_is_deletion_void,
|
2022-01-20 23:12:14 +08:00
|
|
|
.equal = &reftable_log_record_equal_void,
|
2024-02-12 16:32:25 +08:00
|
|
|
.cmp = &reftable_log_record_cmp_void,
|
2021-10-08 04:25:02 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static void reftable_index_record_key(const void *r, struct strbuf *dest)
|
|
|
|
{
|
|
|
|
const struct reftable_index_record *rec = r;
|
|
|
|
strbuf_reset(dest);
|
|
|
|
strbuf_addbuf(dest, &rec->last_key);
|
|
|
|
}
|
|
|
|
|
2024-10-02 18:55:40 +08:00
|
|
|
static int reftable_index_record_copy_from(void *rec, const void *src_rec,
|
|
|
|
int hash_size UNUSED)
|
2021-10-08 04:25:02 +08:00
|
|
|
{
|
|
|
|
struct reftable_index_record *dst = rec;
|
|
|
|
const struct reftable_index_record *src = src_rec;
|
|
|
|
|
|
|
|
strbuf_reset(&dst->last_key);
|
|
|
|
strbuf_addbuf(&dst->last_key, &src->last_key);
|
|
|
|
dst->offset = src->offset;
|
2024-10-02 18:55:40 +08:00
|
|
|
|
|
|
|
return 0;
|
2021-10-08 04:25:02 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void reftable_index_record_release(void *rec)
|
|
|
|
{
|
|
|
|
struct reftable_index_record *idx = rec;
|
|
|
|
strbuf_release(&idx->last_key);
|
|
|
|
}
|
|
|
|
|
2024-08-17 16:24:36 +08:00
|
|
|
static uint8_t reftable_index_record_val_type(const void *rec UNUSED)
|
2021-10-08 04:25:02 +08:00
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int reftable_index_record_encode(const void *rec, struct string_view out,
|
2024-08-17 16:24:36 +08:00
|
|
|
int hash_size UNUSED)
|
2021-10-08 04:25:02 +08:00
|
|
|
{
|
|
|
|
const struct reftable_index_record *r =
|
|
|
|
(const struct reftable_index_record *)rec;
|
|
|
|
struct string_view start = out;
|
|
|
|
|
|
|
|
int n = put_var_int(&out, r->offset);
|
|
|
|
if (n < 0)
|
|
|
|
return n;
|
|
|
|
|
|
|
|
string_view_consume(&out, n);
|
|
|
|
|
|
|
|
return start.len - out.len;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int reftable_index_record_decode(void *rec, struct strbuf key,
|
2024-08-17 16:24:36 +08:00
|
|
|
uint8_t val_type UNUSED,
|
|
|
|
struct string_view in,
|
|
|
|
int hash_size UNUSED,
|
|
|
|
struct strbuf *scratch UNUSED)
|
2021-10-08 04:25:02 +08:00
|
|
|
{
|
|
|
|
struct string_view start = in;
|
|
|
|
struct reftable_index_record *r = rec;
|
|
|
|
int n = 0;
|
|
|
|
|
|
|
|
strbuf_reset(&r->last_key);
|
|
|
|
strbuf_addbuf(&r->last_key, &key);
|
|
|
|
|
|
|
|
n = get_var_int(&r->offset, &in);
|
|
|
|
if (n < 0)
|
|
|
|
return n;
|
|
|
|
|
|
|
|
string_view_consume(&in, n);
|
|
|
|
return start.len - in.len;
|
|
|
|
}
|
|
|
|
|
2024-08-17 16:24:36 +08:00
|
|
|
static int reftable_index_record_equal(const void *a, const void *b,
|
|
|
|
int hash_size UNUSED)
|
2022-01-20 23:12:11 +08:00
|
|
|
{
|
|
|
|
struct reftable_index_record *ia = (struct reftable_index_record *) a;
|
|
|
|
struct reftable_index_record *ib = (struct reftable_index_record *) b;
|
|
|
|
|
|
|
|
return ia->offset == ib->offset && !strbuf_cmp(&ia->last_key, &ib->last_key);
|
|
|
|
}
|
|
|
|
|
2024-02-12 16:32:25 +08:00
|
|
|
static int reftable_index_record_cmp(const void *_a, const void *_b)
|
|
|
|
{
|
|
|
|
const struct reftable_index_record *a = _a;
|
|
|
|
const struct reftable_index_record *b = _b;
|
|
|
|
return strbuf_cmp(&a->last_key, &b->last_key);
|
|
|
|
}
|
|
|
|
|
2021-10-08 04:25:02 +08:00
|
|
|
static struct reftable_record_vtable reftable_index_record_vtable = {
|
|
|
|
.key = &reftable_index_record_key,
|
|
|
|
.type = BLOCK_TYPE_INDEX,
|
|
|
|
.copy_from = &reftable_index_record_copy_from,
|
|
|
|
.val_type = &reftable_index_record_val_type,
|
|
|
|
.encode = &reftable_index_record_encode,
|
|
|
|
.decode = &reftable_index_record_decode,
|
|
|
|
.release = &reftable_index_record_release,
|
|
|
|
.is_deletion = ¬_a_deletion,
|
2022-01-20 23:12:11 +08:00
|
|
|
.equal = &reftable_index_record_equal,
|
2024-02-12 16:32:25 +08:00
|
|
|
.cmp = &reftable_index_record_cmp,
|
2021-10-08 04:25:02 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
void reftable_record_key(struct reftable_record *rec, struct strbuf *dest)
|
|
|
|
{
|
2022-01-20 23:12:13 +08:00
|
|
|
reftable_record_vtable(rec)->key(reftable_record_data(rec), dest);
|
2021-10-08 04:25:02 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
int reftable_record_encode(struct reftable_record *rec, struct string_view dest,
|
|
|
|
int hash_size)
|
|
|
|
{
|
2022-01-20 23:12:13 +08:00
|
|
|
return reftable_record_vtable(rec)->encode(reftable_record_data(rec),
|
|
|
|
dest, hash_size);
|
2021-10-08 04:25:02 +08:00
|
|
|
}
|
|
|
|
|
2024-10-02 18:55:40 +08:00
|
|
|
int reftable_record_copy_from(struct reftable_record *rec,
|
2021-10-08 04:25:02 +08:00
|
|
|
struct reftable_record *src, int hash_size)
|
|
|
|
{
|
2022-01-20 23:12:13 +08:00
|
|
|
assert(src->type == rec->type);
|
2021-10-08 04:25:02 +08:00
|
|
|
|
2024-10-02 18:55:40 +08:00
|
|
|
return reftable_record_vtable(rec)->copy_from(reftable_record_data(rec),
|
|
|
|
reftable_record_data(src),
|
|
|
|
hash_size);
|
2021-10-08 04:25:02 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
uint8_t reftable_record_val_type(struct reftable_record *rec)
|
|
|
|
{
|
2022-01-20 23:12:13 +08:00
|
|
|
return reftable_record_vtable(rec)->val_type(reftable_record_data(rec));
|
2021-10-08 04:25:02 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
int reftable_record_decode(struct reftable_record *rec, struct strbuf key,
|
reftable/record: use scratch buffer when decoding records
When decoding log records we need a temporary buffer to decode the
reflog entry's name, mail address and message. As this buffer is local
to the function we thus have to reallocate it for every single log
record which we're about to decode, which is inefficient.
Refactor the code such that callers need to pass in a scratch buffer,
which allows us to reuse it for multiple decodes. This reduces the
number of allocations when iterating through reflogs. Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 2,068,487 allocs, 2,068,365 frees, 305,122,946 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 1,068,485 allocs, 1,068,363 frees, 281,122,886 bytes allocated
Note that this commit also drop some redundant calls to `strbuf_reset()`
right before calling `decode_string()`. The latter already knows to
reset the buffer, so there is no need for these.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-05 20:11:16 +08:00
|
|
|
uint8_t extra, struct string_view src, int hash_size,
|
|
|
|
struct strbuf *scratch)
|
2021-10-08 04:25:02 +08:00
|
|
|
{
|
2022-01-20 23:12:13 +08:00
|
|
|
return reftable_record_vtable(rec)->decode(reftable_record_data(rec),
|
reftable/record: use scratch buffer when decoding records
When decoding log records we need a temporary buffer to decode the
reflog entry's name, mail address and message. As this buffer is local
to the function we thus have to reallocate it for every single log
record which we're about to decode, which is inefficient.
Refactor the code such that callers need to pass in a scratch buffer,
which allows us to reuse it for multiple decodes. This reduces the
number of allocations when iterating through reflogs. Before:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 2,068,487 allocs, 2,068,365 frees, 305,122,946 bytes allocated
After:
HEAP SUMMARY:
in use at exit: 13,473 bytes in 122 blocks
total heap usage: 1,068,485 allocs, 1,068,363 frees, 281,122,886 bytes allocated
Note that this commit also drop some redundant calls to `strbuf_reset()`
right before calling `decode_string()`. The latter already knows to
reset the buffer, so there is no need for these.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-03-05 20:11:16 +08:00
|
|
|
key, extra, src, hash_size,
|
|
|
|
scratch);
|
2021-10-08 04:25:02 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void reftable_record_release(struct reftable_record *rec)
|
|
|
|
{
|
2022-01-20 23:12:13 +08:00
|
|
|
reftable_record_vtable(rec)->release(reftable_record_data(rec));
|
2021-10-08 04:25:02 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
int reftable_record_is_deletion(struct reftable_record *rec)
|
|
|
|
{
|
2022-01-20 23:12:13 +08:00
|
|
|
return reftable_record_vtable(rec)->is_deletion(
|
|
|
|
reftable_record_data(rec));
|
2021-10-08 04:25:02 +08:00
|
|
|
}
|
|
|
|
|
2024-02-12 16:32:25 +08:00
|
|
|
int reftable_record_cmp(struct reftable_record *a, struct reftable_record *b)
|
|
|
|
{
|
|
|
|
if (a->type != b->type)
|
|
|
|
BUG("cannot compare reftable records of different type");
|
|
|
|
return reftable_record_vtable(a)->cmp(
|
|
|
|
reftable_record_data(a), reftable_record_data(b));
|
|
|
|
}
|
|
|
|
|
2022-01-20 23:12:11 +08:00
|
|
|
int reftable_record_equal(struct reftable_record *a, struct reftable_record *b, int hash_size)
|
|
|
|
{
|
2022-01-20 23:12:13 +08:00
|
|
|
if (a->type != b->type)
|
2022-01-20 23:12:11 +08:00
|
|
|
return 0;
|
2022-01-20 23:12:13 +08:00
|
|
|
return reftable_record_vtable(a)->equal(
|
|
|
|
reftable_record_data(a), reftable_record_data(b), hash_size);
|
2021-10-08 04:25:02 +08:00
|
|
|
}
|
|
|
|
|
2024-01-03 14:22:26 +08:00
|
|
|
static int hash_equal(const unsigned char *a, const unsigned char *b, int hash_size)
|
2021-10-08 04:25:02 +08:00
|
|
|
{
|
|
|
|
if (a && b)
|
|
|
|
return !memcmp(a, b, hash_size);
|
|
|
|
|
|
|
|
return a == b;
|
|
|
|
}
|
|
|
|
|
2022-01-20 23:12:10 +08:00
|
|
|
int reftable_ref_record_equal(const struct reftable_ref_record *a,
|
|
|
|
const struct reftable_ref_record *b, int hash_size)
|
2021-10-08 04:25:02 +08:00
|
|
|
{
|
|
|
|
assert(hash_size > 0);
|
2022-01-20 23:12:09 +08:00
|
|
|
if (!null_streq(a->refname, b->refname))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (a->update_index != b->update_index ||
|
|
|
|
a->value_type != b->value_type)
|
2021-10-08 04:25:02 +08:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
switch (a->value_type) {
|
|
|
|
case REFTABLE_REF_SYMREF:
|
|
|
|
return !strcmp(a->value.symref, b->value.symref);
|
|
|
|
case REFTABLE_REF_VAL2:
|
|
|
|
return hash_equal(a->value.val2.value, b->value.val2.value,
|
|
|
|
hash_size) &&
|
|
|
|
hash_equal(a->value.val2.target_value,
|
|
|
|
b->value.val2.target_value, hash_size);
|
|
|
|
case REFTABLE_REF_VAL1:
|
|
|
|
return hash_equal(a->value.val1, b->value.val1, hash_size);
|
|
|
|
case REFTABLE_REF_DELETION:
|
|
|
|
return 1;
|
|
|
|
default:
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int reftable_ref_record_compare_name(const void *a, const void *b)
|
|
|
|
{
|
|
|
|
return strcmp(((struct reftable_ref_record *)a)->refname,
|
|
|
|
((struct reftable_ref_record *)b)->refname);
|
|
|
|
}
|
|
|
|
|
|
|
|
int reftable_ref_record_is_deletion(const struct reftable_ref_record *ref)
|
|
|
|
{
|
|
|
|
return ref->value_type == REFTABLE_REF_DELETION;
|
|
|
|
}
|
|
|
|
|
|
|
|
int reftable_log_record_compare_key(const void *a, const void *b)
|
|
|
|
{
|
|
|
|
const struct reftable_log_record *la = a;
|
|
|
|
const struct reftable_log_record *lb = b;
|
|
|
|
|
|
|
|
int cmp = strcmp(la->refname, lb->refname);
|
|
|
|
if (cmp)
|
|
|
|
return cmp;
|
|
|
|
if (la->update_index > lb->update_index)
|
|
|
|
return -1;
|
|
|
|
return (la->update_index < lb->update_index) ? 1 : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int reftable_log_record_is_deletion(const struct reftable_log_record *log)
|
|
|
|
{
|
|
|
|
return (log->value_type == REFTABLE_LOG_DELETION);
|
|
|
|
}
|
|
|
|
|
2022-01-20 23:12:13 +08:00
|
|
|
static void *reftable_record_data(struct reftable_record *rec)
|
|
|
|
{
|
|
|
|
switch (rec->type) {
|
|
|
|
case BLOCK_TYPE_REF:
|
|
|
|
return &rec->u.ref;
|
|
|
|
case BLOCK_TYPE_LOG:
|
|
|
|
return &rec->u.log;
|
|
|
|
case BLOCK_TYPE_INDEX:
|
|
|
|
return &rec->u.idx;
|
|
|
|
case BLOCK_TYPE_OBJ:
|
|
|
|
return &rec->u.obj;
|
|
|
|
}
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct reftable_record_vtable *
|
|
|
|
reftable_record_vtable(struct reftable_record *rec)
|
|
|
|
{
|
|
|
|
switch (rec->type) {
|
|
|
|
case BLOCK_TYPE_REF:
|
|
|
|
return &reftable_ref_record_vtable;
|
|
|
|
case BLOCK_TYPE_LOG:
|
|
|
|
return &reftable_log_record_vtable;
|
|
|
|
case BLOCK_TYPE_INDEX:
|
|
|
|
return &reftable_index_record_vtable;
|
|
|
|
case BLOCK_TYPE_OBJ:
|
|
|
|
return &reftable_obj_record_vtable;
|
|
|
|
}
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
2024-02-06 14:35:59 +08:00
|
|
|
void reftable_record_init(struct reftable_record *rec, uint8_t typ)
|
2022-01-20 23:12:13 +08:00
|
|
|
{
|
2024-02-06 14:35:59 +08:00
|
|
|
memset(rec, 0, sizeof(*rec));
|
|
|
|
rec->type = typ;
|
2022-01-20 23:12:13 +08:00
|
|
|
|
|
|
|
switch (typ) {
|
|
|
|
case BLOCK_TYPE_REF:
|
|
|
|
case BLOCK_TYPE_LOG:
|
2024-02-06 14:35:59 +08:00
|
|
|
case BLOCK_TYPE_OBJ:
|
|
|
|
return;
|
|
|
|
case BLOCK_TYPE_INDEX:
|
|
|
|
strbuf_init(&rec->u.idx.last_key, 0);
|
|
|
|
return;
|
|
|
|
default:
|
|
|
|
BUG("unhandled record type");
|
2022-01-20 23:12:13 +08:00
|
|
|
}
|
|
|
|
}
|