git/pseudo-merge.c
Patrick Steinhardt d0ab6630a7 pseudo-merge: fix leaking strmap keys
When creating a new pseudo-merge group we collect a set of matchnig
commits and put them into a string map. This strmap is initialized such
that it does not allocate its keys, and instead we try to pass ownership
of the keys to it via `strmap_put()`. This isn't how it works though:
the strmap will never try to release these keys, and consequently they
end up leaking.

Fix this leak by initializing the strmap as duplicating its keys and not
trying to hand over ownership.

The leak is exposed by t5333, but plugging it does not yet make the full
test suite pass.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2024-09-30 11:23:06 -07:00

785 lines
20 KiB
C

#define USE_THE_REPOSITORY_VARIABLE
#include "git-compat-util.h"
#include "pseudo-merge.h"
#include "date.h"
#include "oid-array.h"
#include "strbuf.h"
#include "config.h"
#include "string-list.h"
#include "refs.h"
#include "pack-bitmap.h"
#include "commit.h"
#include "alloc.h"
#include "progress.h"
#include "hex.h"
#define DEFAULT_PSEUDO_MERGE_DECAY 1.0
#define DEFAULT_PSEUDO_MERGE_MAX_MERGES 64
#define DEFAULT_PSEUDO_MERGE_SAMPLE_RATE 1
#define DEFAULT_PSEUDO_MERGE_THRESHOLD approxidate("1.week.ago")
#define DEFAULT_PSEUDO_MERGE_STABLE_THRESHOLD approxidate("1.month.ago")
#define DEFAULT_PSEUDO_MERGE_STABLE_SIZE 512
static double gitexp(double base, int exp)
{
double result = 1;
while (1) {
if (exp % 2)
result *= base;
exp >>= 1;
if (!exp)
break;
base *= base;
}
return result;
}
static uint32_t pseudo_merge_group_size(const struct pseudo_merge_group *group,
const struct pseudo_merge_matches *matches,
uint32_t i)
{
double C = 0.0f;
uint32_t n;
/*
* The size of pseudo-merge groups decays according to a power series,
* which looks like:
*
* f(n) = C * n^-k
*
* , where 'n' is the n-th pseudo-merge group, 'f(n)' is its size, 'k'
* is the decay rate, and 'C' is a scaling value.
*
* The value of C depends on the number of groups, decay rate, and total
* number of commits. It is computed such that if there are M and N
* total groups and commits, respectively, that:
*
* N = f(0) + f(1) + ... f(M-1)
*
* Rearranging to isolate C, we get:
*
* N = \sum_{n=1}^M C / n^k
*
* N / C = \sum_{n=1}^M n^-k
*
* C = N / \sum_{n=1}^M n^-k
*
* For example, if we have a decay rate of 'k' being equal to 1.5, 'N'
* total commits equal to 10,000, and 'M' being equal to 6 groups, then
* the (rounded) group sizes are:
*
* { 5469, 1934, 1053, 684, 489, 372 }
*
* increasing the number of total groups, say to 10, scales the group
* sizes appropriately:
*
* { 5012, 1772, 964, 626, 448, 341, 271, 221, 186, 158 }
*/
for (n = 0; n < group->max_merges; n++)
C += 1.0 / gitexp(n + 1, group->decay);
C = matches->unstable_nr / C;
return (uint32_t)((C / gitexp(i + 1, group->decay)) + 0.5);
}
static void pseudo_merge_group_init(struct pseudo_merge_group *group)
{
memset(group, 0, sizeof(struct pseudo_merge_group));
strmap_init_with_options(&group->matches, NULL, 1);
group->decay = DEFAULT_PSEUDO_MERGE_DECAY;
group->max_merges = DEFAULT_PSEUDO_MERGE_MAX_MERGES;
group->sample_rate = DEFAULT_PSEUDO_MERGE_SAMPLE_RATE;
group->threshold = DEFAULT_PSEUDO_MERGE_THRESHOLD;
group->stable_threshold = DEFAULT_PSEUDO_MERGE_STABLE_THRESHOLD;
group->stable_size = DEFAULT_PSEUDO_MERGE_STABLE_SIZE;
}
void pseudo_merge_group_release(struct pseudo_merge_group *group)
{
struct hashmap_iter iter;
struct strmap_entry *e;
regfree(group->pattern);
free(group->pattern);
strmap_for_each_entry(&group->matches, &iter, e) {
struct pseudo_merge_matches *matches = e->value;
free(matches->stable);
free(matches->unstable);
free(matches);
}
strmap_clear(&group->matches, 0);
free(group->merges);
}
static int pseudo_merge_config(const char *var, const char *value,
const struct config_context *ctx,
void *cb_data)
{
struct string_list *list = cb_data;
struct string_list_item *item;
struct pseudo_merge_group *group;
struct strbuf buf = STRBUF_INIT;
const char *sub, *key;
size_t sub_len;
int ret = 0;
if (parse_config_key(var, "bitmappseudomerge", &sub, &sub_len, &key))
goto done;
if (!sub_len)
goto done;
strbuf_add(&buf, sub, sub_len);
item = string_list_lookup(list, buf.buf);
if (!item) {
item = string_list_insert(list, buf.buf);
item->util = xmalloc(sizeof(struct pseudo_merge_group));
pseudo_merge_group_init(item->util);
}
group = item->util;
if (!strcmp(key, "pattern")) {
struct strbuf re = STRBUF_INIT;
free(group->pattern);
if (*value != '^')
strbuf_addch(&re, '^');
strbuf_addstr(&re, value);
group->pattern = xcalloc(1, sizeof(regex_t));
if (regcomp(group->pattern, re.buf, REG_EXTENDED))
die(_("failed to load pseudo-merge regex for %s: '%s'"),
sub, re.buf);
strbuf_release(&re);
} else if (!strcmp(key, "decay")) {
group->decay = git_config_double(var, value, ctx->kvi);
if (group->decay < 0) {
warning(_("%s must be non-negative, using default"), var);
group->decay = DEFAULT_PSEUDO_MERGE_DECAY;
}
} else if (!strcmp(key, "samplerate")) {
group->sample_rate = git_config_double(var, value, ctx->kvi);
if (!(0 <= group->sample_rate && group->sample_rate <= 1)) {
warning(_("%s must be between 0 and 1, using default"), var);
group->sample_rate = DEFAULT_PSEUDO_MERGE_SAMPLE_RATE;
}
} else if (!strcmp(key, "threshold")) {
if (git_config_expiry_date(&group->threshold, var, value)) {
ret = -1;
goto done;
}
} else if (!strcmp(key, "maxmerges")) {
group->max_merges = git_config_int(var, value, ctx->kvi);
if (group->max_merges < 0) {
warning(_("%s must be non-negative, using default"), var);
group->max_merges = DEFAULT_PSEUDO_MERGE_MAX_MERGES;
}
} else if (!strcmp(key, "stablethreshold")) {
if (git_config_expiry_date(&group->stable_threshold, var, value)) {
ret = -1;
goto done;
}
} else if (!strcmp(key, "stablesize")) {
group->stable_size = git_config_int(var, value, ctx->kvi);
if (group->stable_size <= 0) {
warning(_("%s must be positive, using default"), var);
group->stable_size = DEFAULT_PSEUDO_MERGE_STABLE_SIZE;
}
}
done:
strbuf_release(&buf);
return ret;
}
void load_pseudo_merges_from_config(struct repository *r,
struct string_list *list)
{
struct string_list_item *item;
repo_config(r, pseudo_merge_config, list);
for_each_string_list_item(item, list) {
struct pseudo_merge_group *group = item->util;
if (!group->pattern)
die(_("pseudo-merge group '%s' missing required pattern"),
item->string);
if (group->threshold < group->stable_threshold)
die(_("pseudo-merge group '%s' has unstable threshold "
"before stable one"), item->string);
}
}
static int find_pseudo_merge_group_for_ref(const char *refname,
const char *referent UNUSED,
const struct object_id *oid,
int flags UNUSED,
void *_data)
{
struct bitmap_writer *writer = _data;
struct object_id peeled;
struct commit *c;
uint32_t i;
int has_bitmap;
if (!peel_iterated_oid(the_repository, oid, &peeled))
oid = &peeled;
c = lookup_commit(the_repository, oid);
if (!c)
return 0;
if (!packlist_find(writer->to_pack, oid))
return 0;
has_bitmap = bitmap_writer_has_bitmapped_object_id(writer, oid);
for (i = 0; i < writer->pseudo_merge_groups.nr; i++) {
struct pseudo_merge_group *group;
struct pseudo_merge_matches *matches;
struct strbuf group_name = STRBUF_INIT;
regmatch_t captures[16];
size_t j;
group = writer->pseudo_merge_groups.items[i].util;
if (regexec(group->pattern, refname, ARRAY_SIZE(captures),
captures, 0))
continue;
if (captures[ARRAY_SIZE(captures) - 1].rm_so != -1)
warning(_("pseudo-merge regex from config has too many capture "
"groups (max=%"PRIuMAX")"),
(uintmax_t)ARRAY_SIZE(captures) - 2);
for (j = !!group->pattern->re_nsub; j < ARRAY_SIZE(captures); j++) {
regmatch_t *match = &captures[j];
if (match->rm_so == -1)
continue;
if (group_name.len)
strbuf_addch(&group_name, '-');
strbuf_add(&group_name, refname + match->rm_so,
match->rm_eo - match->rm_so);
}
matches = strmap_get(&group->matches, group_name.buf);
if (!matches) {
matches = xcalloc(1, sizeof(*matches));
strmap_put(&group->matches, group_name.buf,
matches);
}
if (c->date <= group->stable_threshold) {
ALLOC_GROW(matches->stable, matches->stable_nr + 1,
matches->stable_alloc);
matches->stable[matches->stable_nr++] = c;
} else if (c->date <= group->threshold && !has_bitmap) {
ALLOC_GROW(matches->unstable, matches->unstable_nr + 1,
matches->unstable_alloc);
matches->unstable[matches->unstable_nr++] = c;
}
strbuf_release(&group_name);
}
return 0;
}
static struct commit *push_pseudo_merge(struct pseudo_merge_group *group)
{
struct commit *merge;
ALLOC_GROW(group->merges, group->merges_nr + 1, group->merges_alloc);
merge = alloc_commit_node(the_repository);
merge->object.parsed = 1;
merge->object.flags |= BITMAP_PSEUDO_MERGE;
group->merges[group->merges_nr++] = merge;
return merge;
}
static struct pseudo_merge_commit_idx *pseudo_merge_idx(kh_oid_map_t *pseudo_merge_commits,
const struct object_id *oid)
{
struct pseudo_merge_commit_idx *pmc;
int hash_ret;
khiter_t hash_pos = kh_put_oid_map(pseudo_merge_commits, *oid,
&hash_ret);
if (hash_ret) {
CALLOC_ARRAY(pmc, 1);
kh_value(pseudo_merge_commits, hash_pos) = pmc;
} else {
pmc = kh_value(pseudo_merge_commits, hash_pos);
}
return pmc;
}
#define MIN_PSEUDO_MERGE_SIZE 8
static void select_pseudo_merges_1(struct bitmap_writer *writer,
struct pseudo_merge_group *group,
struct pseudo_merge_matches *matches)
{
uint32_t i, j;
uint32_t stable_merges_nr;
if (!matches->stable_nr && !matches->unstable_nr)
return; /* all tips in this group already have bitmaps */
stable_merges_nr = matches->stable_nr / group->stable_size;
if (matches->stable_nr % group->stable_size)
stable_merges_nr++;
/* make stable_merges_nr pseudo merges for stable commits */
for (i = 0, j = 0; i < stable_merges_nr; i++) {
struct commit *merge;
struct commit_list **p;
merge = push_pseudo_merge(group);
p = &merge->parents;
/*
* For each pseudo-merge created above, add parents to the
* allocated commit node from the stable set of commits
* (un-bitmapped, newer than the stable threshold).
*/
do {
struct commit *c;
struct pseudo_merge_commit_idx *pmc;
if (j >= matches->stable_nr)
break;
c = matches->stable[j++];
/*
* Here and below, make sure that we keep our mapping of
* commits -> pseudo-merge(s) which include the key'd
* commit up-to-date.
*/
pmc = pseudo_merge_idx(writer->pseudo_merge_commits,
&c->object.oid);
ALLOC_GROW(pmc->pseudo_merge, pmc->nr + 1, pmc->alloc);
pmc->pseudo_merge[pmc->nr++] = writer->pseudo_merges_nr;
p = commit_list_append(c, p);
} while (j % group->stable_size);
if (merge->parents) {
bitmap_writer_push_commit(writer, merge, 1);
writer->pseudo_merges_nr++;
}
}
/* make up to group->max_merges pseudo merges for unstable commits */
for (i = 0, j = 0; i < group->max_merges; i++) {
struct commit *merge;
struct commit_list **p;
uint32_t size, end;
merge = push_pseudo_merge(group);
p = &merge->parents;
size = pseudo_merge_group_size(group, matches, i);
end = size < MIN_PSEUDO_MERGE_SIZE ? matches->unstable_nr : j + size;
/*
* For each pseudo-merge commit created above, add parents to
* the allocated commit node from the unstable set of commits
* (newer than the stable threshold).
*
* Account for the sample rate, since not every candidate from
* the set of stable commits will be included as a pseudo-merge
* parent.
*/
for (; j < end && j < matches->unstable_nr; j++) {
struct commit *c = matches->unstable[j];
struct pseudo_merge_commit_idx *pmc;
if (j % (uint32_t)(1.0 / group->sample_rate))
continue;
pmc = pseudo_merge_idx(writer->pseudo_merge_commits,
&c->object.oid);
ALLOC_GROW(pmc->pseudo_merge, pmc->nr + 1, pmc->alloc);
pmc->pseudo_merge[pmc->nr++] = writer->pseudo_merges_nr;
p = commit_list_append(c, p);
}
if (merge->parents) {
bitmap_writer_push_commit(writer, merge, 1);
writer->pseudo_merges_nr++; }
if (end >= matches->unstable_nr)
break;
}
}
static int commit_date_cmp(const void *va, const void *vb)
{
timestamp_t a = (*(const struct commit **)va)->date;
timestamp_t b = (*(const struct commit **)vb)->date;
if (a < b)
return -1;
else if (a > b)
return 1;
return 0;
}
static void sort_pseudo_merge_matches(struct pseudo_merge_matches *matches)
{
QSORT(matches->stable, matches->stable_nr, commit_date_cmp);
QSORT(matches->unstable, matches->unstable_nr, commit_date_cmp);
}
void select_pseudo_merges(struct bitmap_writer *writer)
{
struct progress *progress = NULL;
uint32_t i;
if (!writer->pseudo_merge_groups.nr)
return;
if (writer->show_progress)
progress = start_progress("Selecting pseudo-merge commits",
writer->pseudo_merge_groups.nr);
refs_for_each_ref(get_main_ref_store(the_repository),
find_pseudo_merge_group_for_ref, writer);
for (i = 0; i < writer->pseudo_merge_groups.nr; i++) {
struct pseudo_merge_group *group;
struct hashmap_iter iter;
struct strmap_entry *e;
group = writer->pseudo_merge_groups.items[i].util;
strmap_for_each_entry(&group->matches, &iter, e) {
struct pseudo_merge_matches *matches = e->value;
sort_pseudo_merge_matches(matches);
select_pseudo_merges_1(writer, group, matches);
}
display_progress(progress, i + 1);
}
stop_progress(&progress);
}
void free_pseudo_merge_map(struct pseudo_merge_map *pm)
{
uint32_t i;
for (i = 0; i < pm->nr; i++) {
ewah_pool_free(pm->v[i].commits);
ewah_pool_free(pm->v[i].bitmap);
}
free(pm->v);
}
struct pseudo_merge_commit_ext {
uint32_t nr;
const unsigned char *ptr;
};
static int pseudo_merge_ext_at(const struct pseudo_merge_map *pm,
struct pseudo_merge_commit_ext *ext, size_t at)
{
if (at >= pm->map_size)
return error(_("extended pseudo-merge read out-of-bounds "
"(%"PRIuMAX" >= %"PRIuMAX")"),
(uintmax_t)at, (uintmax_t)pm->map_size);
if (at + 4 >= pm->map_size)
return error(_("extended pseudo-merge entry is too short "
"(%"PRIuMAX" >= %"PRIuMAX")"),
(uintmax_t)(at + 4), (uintmax_t)pm->map_size);
ext->nr = get_be32(pm->map + at);
ext->ptr = pm->map + at + sizeof(uint32_t);
return 0;
}
struct ewah_bitmap *pseudo_merge_bitmap(const struct pseudo_merge_map *pm,
struct pseudo_merge *merge)
{
if (!merge->loaded_commits)
BUG("cannot use unloaded pseudo-merge bitmap");
if (!merge->loaded_bitmap) {
size_t at = merge->bitmap_at;
merge->bitmap = read_bitmap(pm->map, pm->map_size, &at);
merge->loaded_bitmap = 1;
}
return merge->bitmap;
}
struct pseudo_merge *use_pseudo_merge(const struct pseudo_merge_map *pm,
struct pseudo_merge *merge)
{
if (!merge->loaded_commits) {
size_t pos = merge->at;
merge->commits = read_bitmap(pm->map, pm->map_size, &pos);
merge->bitmap_at = pos;
merge->loaded_commits = 1;
}
return merge;
}
static struct pseudo_merge *pseudo_merge_at(const struct pseudo_merge_map *pm,
struct object_id *oid,
size_t want)
{
size_t lo = 0;
size_t hi = pm->nr;
while (lo < hi) {
size_t mi = lo + (hi - lo) / 2;
size_t got = pm->v[mi].at;
if (got == want)
return use_pseudo_merge(pm, &pm->v[mi]);
else if (got < want)
hi = mi;
else
lo = mi + 1;
}
warning(_("could not find pseudo-merge for commit %s at offset %"PRIuMAX),
oid_to_hex(oid), (uintmax_t)want);
return NULL;
}
struct pseudo_merge_commit {
uint32_t commit_pos;
uint64_t pseudo_merge_ofs;
};
#define PSEUDO_MERGE_COMMIT_RAWSZ (sizeof(uint32_t)+sizeof(uint64_t))
static void read_pseudo_merge_commit_at(struct pseudo_merge_commit *merge,
const unsigned char *at)
{
merge->commit_pos = get_be32(at);
merge->pseudo_merge_ofs = get_be64(at + sizeof(uint32_t));
}
static int nth_pseudo_merge_ext(const struct pseudo_merge_map *pm,
struct pseudo_merge_commit_ext *ext,
struct pseudo_merge_commit *merge,
uint32_t n)
{
size_t ofs;
if (n >= ext->nr)
return error(_("extended pseudo-merge lookup out-of-bounds "
"(%"PRIu32" >= %"PRIu32")"), n, ext->nr);
ofs = get_be64(ext->ptr + st_mult(n, sizeof(uint64_t)));
if (ofs >= pm->map_size)
return error(_("out-of-bounds read: (%"PRIuMAX" >= %"PRIuMAX")"),
(uintmax_t)ofs, (uintmax_t)pm->map_size);
read_pseudo_merge_commit_at(merge, pm->map + ofs);
return 0;
}
static unsigned apply_pseudo_merge(const struct pseudo_merge_map *pm,
struct pseudo_merge *merge,
struct bitmap *result,
struct bitmap *roots)
{
if (merge->satisfied)
return 0;
if (!ewah_bitmap_is_subset(merge->commits, roots ? roots : result))
return 0;
bitmap_or_ewah(result, pseudo_merge_bitmap(pm, merge));
if (roots)
bitmap_or_ewah(roots, pseudo_merge_bitmap(pm, merge));
merge->satisfied = 1;
return 1;
}
static int pseudo_merge_commit_cmp(const void *va, const void *vb)
{
struct pseudo_merge_commit merge;
uint32_t key = *(uint32_t*)va;
read_pseudo_merge_commit_at(&merge, vb);
if (key < merge.commit_pos)
return -1;
if (key > merge.commit_pos)
return 1;
return 0;
}
static struct pseudo_merge_commit *find_pseudo_merge(const struct pseudo_merge_map *pm,
uint32_t pos)
{
if (!pm->commits_nr)
return NULL;
return bsearch(&pos, pm->commits, pm->commits_nr,
PSEUDO_MERGE_COMMIT_RAWSZ, pseudo_merge_commit_cmp);
}
int apply_pseudo_merges_for_commit(const struct pseudo_merge_map *pm,
struct bitmap *result,
struct commit *commit, uint32_t commit_pos)
{
struct pseudo_merge *merge;
struct pseudo_merge_commit *merge_commit;
int ret = 0;
merge_commit = find_pseudo_merge(pm, commit_pos);
if (!merge_commit)
return 0;
if (merge_commit->pseudo_merge_ofs & ((uint64_t)1<<63)) {
struct pseudo_merge_commit_ext ext = { 0 };
off_t ofs = merge_commit->pseudo_merge_ofs & ~((uint64_t)1<<63);
uint32_t i;
if (pseudo_merge_ext_at(pm, &ext, ofs) < -1) {
warning(_("could not read extended pseudo-merge table "
"for commit %s"),
oid_to_hex(&commit->object.oid));
return ret;
}
for (i = 0; i < ext.nr; i++) {
if (nth_pseudo_merge_ext(pm, &ext, merge_commit, i) < 0)
return ret;
merge = pseudo_merge_at(pm, &commit->object.oid,
merge_commit->pseudo_merge_ofs);
if (!merge)
return ret;
if (apply_pseudo_merge(pm, merge, result, NULL))
ret++;
}
} else {
merge = pseudo_merge_at(pm, &commit->object.oid,
merge_commit->pseudo_merge_ofs);
if (!merge)
return ret;
if (apply_pseudo_merge(pm, merge, result, NULL))
ret++;
}
if (ret)
cascade_pseudo_merges(pm, result, NULL);
return ret;
}
int cascade_pseudo_merges(const struct pseudo_merge_map *pm,
struct bitmap *result,
struct bitmap *roots)
{
unsigned any_satisfied;
int ret = 0;
do {
struct pseudo_merge *merge;
uint32_t i;
any_satisfied = 0;
for (i = 0; i < pm->nr; i++) {
merge = use_pseudo_merge(pm, &pm->v[i]);
if (apply_pseudo_merge(pm, merge, result, roots)) {
any_satisfied |= 1;
ret++;
}
}
} while (any_satisfied);
return ret;
}
struct pseudo_merge *pseudo_merge_for_parents(const struct pseudo_merge_map *pm,
struct bitmap *parents)
{
struct pseudo_merge *match = NULL;
size_t i;
if (!pm->nr)
return NULL;
/*
* NOTE: this loop is quadratic in the worst-case (where no
* matching pseudo-merge bitmaps are found), but in practice
* this is OK for a few reasons:
*
* - Rejecting pseudo-merge bitmaps that do not match the
* given commit is done quickly (i.e. `bitmap_equals_ewah()`
* returns early when we know the two bitmaps aren't equal.
*
* - Already matched pseudo-merge bitmaps (which we track with
* the `->satisfied` bit here) are skipped as potential
* candidates.
*
* - The number of pseudo-merges should be small (in the
* hundreds for most repositories).
*
* If in the future this semi-quadratic behavior does become a
* problem, another approach would be to keep track of which
* pseudo-merges are still "viable" after enumerating the
* pseudo-merge commit's parents:
*
* - A pseudo-merge bitmap becomes non-viable when the bit(s)
* corresponding to one or more parent(s) of the given
* commit are not set in a candidate pseudo-merge's commits
* bitmap.
*
* - After processing all bits, enumerate the remaining set of
* viable pseudo-merge bitmaps, and check that their
* popcount() matches the number of parents in the given
* commit.
*/
for (i = 0; i < pm->nr; i++) {
struct pseudo_merge *candidate = use_pseudo_merge(pm, &pm->v[i]);
if (!candidate || candidate->satisfied)
continue;
if (!bitmap_equals_ewah(parents, candidate->commits))
continue;
match = candidate;
match->satisfied = 1;
break;
}
return match;
}