2024-05-24 05:26:26 +08:00
|
|
|
#include "git-compat-util.h"
|
|
|
|
#include "pseudo-merge.h"
|
2024-05-24 05:26:42 +08:00
|
|
|
#include "date.h"
|
|
|
|
#include "oid-array.h"
|
|
|
|
#include "strbuf.h"
|
|
|
|
#include "config.h"
|
|
|
|
#include "string-list.h"
|
|
|
|
#include "refs.h"
|
|
|
|
#include "pack-bitmap.h"
|
|
|
|
#include "commit.h"
|
|
|
|
#include "alloc.h"
|
|
|
|
#include "progress.h"
|
|
|
|
|
|
|
|
#define DEFAULT_PSEUDO_MERGE_DECAY 1.0
|
|
|
|
#define DEFAULT_PSEUDO_MERGE_MAX_MERGES 64
|
|
|
|
#define DEFAULT_PSEUDO_MERGE_SAMPLE_RATE 1
|
|
|
|
#define DEFAULT_PSEUDO_MERGE_THRESHOLD approxidate("1.week.ago")
|
|
|
|
#define DEFAULT_PSEUDO_MERGE_STABLE_THRESHOLD approxidate("1.month.ago")
|
|
|
|
#define DEFAULT_PSEUDO_MERGE_STABLE_SIZE 512
|
|
|
|
|
|
|
|
static double gitexp(double base, int exp)
|
|
|
|
{
|
|
|
|
double result = 1;
|
|
|
|
while (1) {
|
|
|
|
if (exp % 2)
|
|
|
|
result *= base;
|
|
|
|
exp >>= 1;
|
|
|
|
if (!exp)
|
|
|
|
break;
|
|
|
|
base *= base;
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
static uint32_t pseudo_merge_group_size(const struct pseudo_merge_group *group,
|
|
|
|
const struct pseudo_merge_matches *matches,
|
|
|
|
uint32_t i)
|
|
|
|
{
|
|
|
|
double C = 0.0f;
|
|
|
|
uint32_t n;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The size of pseudo-merge groups decays according to a power series,
|
|
|
|
* which looks like:
|
|
|
|
*
|
|
|
|
* f(n) = C * n^-k
|
|
|
|
*
|
|
|
|
* , where 'n' is the n-th pseudo-merge group, 'f(n)' is its size, 'k'
|
|
|
|
* is the decay rate, and 'C' is a scaling value.
|
|
|
|
*
|
|
|
|
* The value of C depends on the number of groups, decay rate, and total
|
|
|
|
* number of commits. It is computed such that if there are M and N
|
|
|
|
* total groups and commits, respectively, that:
|
|
|
|
*
|
|
|
|
* N = f(0) + f(1) + ... f(M-1)
|
|
|
|
*
|
|
|
|
* Rearranging to isolate C, we get:
|
|
|
|
*
|
|
|
|
* N = \sum_{n=1}^M C / n^k
|
|
|
|
*
|
|
|
|
* N / C = \sum_{n=1}^M n^-k
|
|
|
|
*
|
|
|
|
* C = N / \sum_{n=1}^M n^-k
|
|
|
|
*
|
|
|
|
* For example, if we have a decay rate of 'k' being equal to 1.5, 'N'
|
|
|
|
* total commits equal to 10,000, and 'M' being equal to 6 groups, then
|
|
|
|
* the (rounded) group sizes are:
|
|
|
|
*
|
|
|
|
* { 5469, 1934, 1053, 684, 489, 372 }
|
|
|
|
*
|
|
|
|
* increasing the number of total groups, say to 10, scales the group
|
|
|
|
* sizes appropriately:
|
|
|
|
*
|
|
|
|
* { 5012, 1772, 964, 626, 448, 341, 271, 221, 186, 158 }
|
|
|
|
*/
|
|
|
|
for (n = 0; n < group->max_merges; n++)
|
|
|
|
C += 1.0 / gitexp(n + 1, group->decay);
|
|
|
|
C = matches->unstable_nr / C;
|
|
|
|
|
|
|
|
return (uint32_t)((C / gitexp(i + 1, group->decay)) + 0.5);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void pseudo_merge_group_init(struct pseudo_merge_group *group)
|
|
|
|
{
|
|
|
|
memset(group, 0, sizeof(struct pseudo_merge_group));
|
|
|
|
|
|
|
|
strmap_init_with_options(&group->matches, NULL, 0);
|
|
|
|
|
|
|
|
group->decay = DEFAULT_PSEUDO_MERGE_DECAY;
|
|
|
|
group->max_merges = DEFAULT_PSEUDO_MERGE_MAX_MERGES;
|
|
|
|
group->sample_rate = DEFAULT_PSEUDO_MERGE_SAMPLE_RATE;
|
|
|
|
group->threshold = DEFAULT_PSEUDO_MERGE_THRESHOLD;
|
|
|
|
group->stable_threshold = DEFAULT_PSEUDO_MERGE_STABLE_THRESHOLD;
|
|
|
|
group->stable_size = DEFAULT_PSEUDO_MERGE_STABLE_SIZE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int pseudo_merge_config(const char *var, const char *value,
|
|
|
|
const struct config_context *ctx,
|
|
|
|
void *cb_data)
|
|
|
|
{
|
|
|
|
struct string_list *list = cb_data;
|
|
|
|
struct string_list_item *item;
|
|
|
|
struct pseudo_merge_group *group;
|
|
|
|
struct strbuf buf = STRBUF_INIT;
|
|
|
|
const char *sub, *key;
|
|
|
|
size_t sub_len;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
if (parse_config_key(var, "bitmappseudomerge", &sub, &sub_len, &key))
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
if (!sub_len)
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
strbuf_add(&buf, sub, sub_len);
|
|
|
|
|
|
|
|
item = string_list_lookup(list, buf.buf);
|
|
|
|
if (!item) {
|
|
|
|
item = string_list_insert(list, buf.buf);
|
|
|
|
|
|
|
|
item->util = xmalloc(sizeof(struct pseudo_merge_group));
|
|
|
|
pseudo_merge_group_init(item->util);
|
|
|
|
}
|
|
|
|
|
|
|
|
group = item->util;
|
|
|
|
|
|
|
|
if (!strcmp(key, "pattern")) {
|
|
|
|
struct strbuf re = STRBUF_INIT;
|
|
|
|
|
|
|
|
free(group->pattern);
|
|
|
|
if (*value != '^')
|
|
|
|
strbuf_addch(&re, '^');
|
|
|
|
strbuf_addstr(&re, value);
|
|
|
|
|
|
|
|
group->pattern = xcalloc(1, sizeof(regex_t));
|
|
|
|
if (regcomp(group->pattern, re.buf, REG_EXTENDED))
|
|
|
|
die(_("failed to load pseudo-merge regex for %s: '%s'"),
|
|
|
|
sub, re.buf);
|
|
|
|
|
|
|
|
strbuf_release(&re);
|
|
|
|
} else if (!strcmp(key, "decay")) {
|
|
|
|
group->decay = git_config_double(var, value, ctx->kvi);
|
|
|
|
if (group->decay < 0) {
|
|
|
|
warning(_("%s must be non-negative, using default"), var);
|
|
|
|
group->decay = DEFAULT_PSEUDO_MERGE_DECAY;
|
|
|
|
}
|
|
|
|
} else if (!strcmp(key, "samplerate")) {
|
|
|
|
group->sample_rate = git_config_double(var, value, ctx->kvi);
|
|
|
|
if (!(0 <= group->sample_rate && group->sample_rate <= 1)) {
|
|
|
|
warning(_("%s must be between 0 and 1, using default"), var);
|
|
|
|
group->sample_rate = DEFAULT_PSEUDO_MERGE_SAMPLE_RATE;
|
|
|
|
}
|
|
|
|
} else if (!strcmp(key, "threshold")) {
|
|
|
|
if (git_config_expiry_date(&group->threshold, var, value)) {
|
|
|
|
ret = -1;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
} else if (!strcmp(key, "maxmerges")) {
|
|
|
|
group->max_merges = git_config_int(var, value, ctx->kvi);
|
|
|
|
if (group->max_merges < 0) {
|
|
|
|
warning(_("%s must be non-negative, using default"), var);
|
|
|
|
group->max_merges = DEFAULT_PSEUDO_MERGE_MAX_MERGES;
|
|
|
|
}
|
|
|
|
} else if (!strcmp(key, "stablethreshold")) {
|
|
|
|
if (git_config_expiry_date(&group->stable_threshold, var, value)) {
|
|
|
|
ret = -1;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
} else if (!strcmp(key, "stablesize")) {
|
|
|
|
group->stable_size = git_config_int(var, value, ctx->kvi);
|
|
|
|
if (group->stable_size <= 0) {
|
|
|
|
warning(_("%s must be positive, using default"), var);
|
|
|
|
group->stable_size = DEFAULT_PSEUDO_MERGE_STABLE_SIZE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
done:
|
|
|
|
strbuf_release(&buf);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
void load_pseudo_merges_from_config(struct string_list *list)
|
|
|
|
{
|
|
|
|
struct string_list_item *item;
|
|
|
|
|
|
|
|
git_config(pseudo_merge_config, list);
|
|
|
|
|
|
|
|
for_each_string_list_item(item, list) {
|
|
|
|
struct pseudo_merge_group *group = item->util;
|
|
|
|
if (!group->pattern)
|
|
|
|
die(_("pseudo-merge group '%s' missing required pattern"),
|
|
|
|
item->string);
|
|
|
|
if (group->threshold < group->stable_threshold)
|
|
|
|
die(_("pseudo-merge group '%s' has unstable threshold "
|
|
|
|
"before stable one"), item->string);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int find_pseudo_merge_group_for_ref(const char *refname,
|
|
|
|
const struct object_id *oid,
|
|
|
|
int flags UNUSED,
|
|
|
|
void *_data)
|
|
|
|
{
|
|
|
|
struct bitmap_writer *writer = _data;
|
|
|
|
struct object_id peeled;
|
|
|
|
struct commit *c;
|
|
|
|
uint32_t i;
|
|
|
|
int has_bitmap;
|
|
|
|
|
|
|
|
if (!peel_iterated_oid(oid, &peeled))
|
|
|
|
oid = &peeled;
|
|
|
|
|
|
|
|
c = lookup_commit(the_repository, oid);
|
|
|
|
if (!c)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
has_bitmap = bitmap_writer_has_bitmapped_object_id(writer, oid);
|
|
|
|
|
|
|
|
for (i = 0; i < writer->pseudo_merge_groups.nr; i++) {
|
|
|
|
struct pseudo_merge_group *group;
|
|
|
|
struct pseudo_merge_matches *matches;
|
|
|
|
struct strbuf group_name = STRBUF_INIT;
|
|
|
|
regmatch_t captures[16];
|
|
|
|
size_t j;
|
|
|
|
|
|
|
|
group = writer->pseudo_merge_groups.items[i].util;
|
|
|
|
if (regexec(group->pattern, refname, ARRAY_SIZE(captures),
|
|
|
|
captures, 0))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (captures[ARRAY_SIZE(captures) - 1].rm_so != -1)
|
|
|
|
warning(_("pseudo-merge regex from config has too many capture "
|
|
|
|
"groups (max=%"PRIuMAX")"),
|
|
|
|
(uintmax_t)ARRAY_SIZE(captures) - 2);
|
|
|
|
|
|
|
|
for (j = !!group->pattern->re_nsub; j < ARRAY_SIZE(captures); j++) {
|
|
|
|
regmatch_t *match = &captures[j];
|
|
|
|
if (match->rm_so == -1)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (group_name.len)
|
|
|
|
strbuf_addch(&group_name, '-');
|
|
|
|
|
|
|
|
strbuf_add(&group_name, refname + match->rm_so,
|
|
|
|
match->rm_eo - match->rm_so);
|
|
|
|
}
|
|
|
|
|
|
|
|
matches = strmap_get(&group->matches, group_name.buf);
|
|
|
|
if (!matches) {
|
|
|
|
matches = xcalloc(1, sizeof(*matches));
|
|
|
|
strmap_put(&group->matches, strbuf_detach(&group_name, NULL),
|
|
|
|
matches);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (c->date <= group->stable_threshold) {
|
|
|
|
ALLOC_GROW(matches->stable, matches->stable_nr + 1,
|
|
|
|
matches->stable_alloc);
|
|
|
|
matches->stable[matches->stable_nr++] = c;
|
|
|
|
} else if (c->date <= group->threshold && !has_bitmap) {
|
|
|
|
ALLOC_GROW(matches->unstable, matches->unstable_nr + 1,
|
|
|
|
matches->unstable_alloc);
|
|
|
|
matches->unstable[matches->unstable_nr++] = c;
|
|
|
|
}
|
|
|
|
|
|
|
|
strbuf_release(&group_name);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct commit *push_pseudo_merge(struct pseudo_merge_group *group)
|
|
|
|
{
|
|
|
|
struct commit *merge;
|
|
|
|
|
|
|
|
ALLOC_GROW(group->merges, group->merges_nr + 1, group->merges_alloc);
|
|
|
|
|
|
|
|
merge = alloc_commit_node(the_repository);
|
|
|
|
merge->object.parsed = 1;
|
|
|
|
merge->object.flags |= BITMAP_PSEUDO_MERGE;
|
|
|
|
|
|
|
|
group->merges[group->merges_nr++] = merge;
|
|
|
|
|
|
|
|
return merge;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct pseudo_merge_commit_idx *pseudo_merge_idx(kh_oid_map_t *pseudo_merge_commits,
|
|
|
|
const struct object_id *oid)
|
|
|
|
|
|
|
|
{
|
|
|
|
struct pseudo_merge_commit_idx *pmc;
|
|
|
|
int hash_ret;
|
|
|
|
khiter_t hash_pos = kh_put_oid_map(pseudo_merge_commits, *oid,
|
|
|
|
&hash_ret);
|
|
|
|
|
|
|
|
if (hash_ret) {
|
|
|
|
CALLOC_ARRAY(pmc, 1);
|
|
|
|
kh_value(pseudo_merge_commits, hash_pos) = pmc;
|
|
|
|
} else {
|
|
|
|
pmc = kh_value(pseudo_merge_commits, hash_pos);
|
|
|
|
}
|
|
|
|
|
|
|
|
return pmc;
|
|
|
|
}
|
|
|
|
|
|
|
|
#define MIN_PSEUDO_MERGE_SIZE 8
|
|
|
|
|
|
|
|
static void select_pseudo_merges_1(struct bitmap_writer *writer,
|
|
|
|
struct pseudo_merge_group *group,
|
|
|
|
struct pseudo_merge_matches *matches)
|
|
|
|
{
|
|
|
|
uint32_t i, j;
|
|
|
|
uint32_t stable_merges_nr;
|
|
|
|
|
|
|
|
if (!matches->stable_nr && !matches->unstable_nr)
|
|
|
|
return; /* all tips in this group already have bitmaps */
|
|
|
|
|
|
|
|
stable_merges_nr = matches->stable_nr / group->stable_size;
|
|
|
|
if (matches->stable_nr % group->stable_size)
|
|
|
|
stable_merges_nr++;
|
|
|
|
|
|
|
|
/* make stable_merges_nr pseudo merges for stable commits */
|
|
|
|
for (i = 0, j = 0; i < stable_merges_nr; i++) {
|
|
|
|
struct commit *merge;
|
|
|
|
struct commit_list **p;
|
|
|
|
|
|
|
|
merge = push_pseudo_merge(group);
|
|
|
|
p = &merge->parents;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For each pseudo-merge created above, add parents to the
|
|
|
|
* allocated commit node from the stable set of commits
|
|
|
|
* (un-bitmapped, newer than the stable threshold).
|
|
|
|
*/
|
|
|
|
do {
|
|
|
|
struct commit *c;
|
|
|
|
struct pseudo_merge_commit_idx *pmc;
|
|
|
|
|
|
|
|
if (j >= matches->stable_nr)
|
|
|
|
break;
|
|
|
|
|
|
|
|
c = matches->stable[j++];
|
|
|
|
/*
|
|
|
|
* Here and below, make sure that we keep our mapping of
|
|
|
|
* commits -> pseudo-merge(s) which include the key'd
|
|
|
|
* commit up-to-date.
|
|
|
|
*/
|
|
|
|
pmc = pseudo_merge_idx(writer->pseudo_merge_commits,
|
|
|
|
&c->object.oid);
|
|
|
|
|
|
|
|
ALLOC_GROW(pmc->pseudo_merge, pmc->nr + 1, pmc->alloc);
|
|
|
|
|
|
|
|
pmc->pseudo_merge[pmc->nr++] = writer->pseudo_merges_nr;
|
|
|
|
p = commit_list_append(c, p);
|
|
|
|
} while (j % group->stable_size);
|
|
|
|
|
|
|
|
bitmap_writer_push_commit(writer, merge, 1);
|
|
|
|
writer->pseudo_merges_nr++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* make up to group->max_merges pseudo merges for unstable commits */
|
|
|
|
for (i = 0, j = 0; i < group->max_merges; i++) {
|
|
|
|
struct commit *merge;
|
|
|
|
struct commit_list **p;
|
|
|
|
uint32_t size, end;
|
|
|
|
|
|
|
|
merge = push_pseudo_merge(group);
|
|
|
|
p = &merge->parents;
|
|
|
|
|
|
|
|
size = pseudo_merge_group_size(group, matches, i);
|
|
|
|
end = size < MIN_PSEUDO_MERGE_SIZE ? matches->unstable_nr : j + size;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For each pseudo-merge commit created above, add parents to
|
|
|
|
* the allocated commit node from the unstable set of commits
|
|
|
|
* (newer than the stable threshold).
|
|
|
|
*
|
|
|
|
* Account for the sample rate, since not every candidate from
|
|
|
|
* the set of stable commits will be included as a pseudo-merge
|
|
|
|
* parent.
|
|
|
|
*/
|
|
|
|
for (; j < end && j < matches->unstable_nr; j++) {
|
|
|
|
struct commit *c = matches->unstable[j];
|
|
|
|
struct pseudo_merge_commit_idx *pmc;
|
|
|
|
|
|
|
|
if (j % (uint32_t)(1.0 / group->sample_rate))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
pmc = pseudo_merge_idx(writer->pseudo_merge_commits,
|
|
|
|
&c->object.oid);
|
|
|
|
|
|
|
|
ALLOC_GROW(pmc->pseudo_merge, pmc->nr + 1, pmc->alloc);
|
|
|
|
|
|
|
|
pmc->pseudo_merge[pmc->nr++] = writer->pseudo_merges_nr;
|
|
|
|
p = commit_list_append(c, p);
|
|
|
|
}
|
|
|
|
|
|
|
|
bitmap_writer_push_commit(writer, merge, 1);
|
|
|
|
writer->pseudo_merges_nr++;
|
|
|
|
if (end >= matches->unstable_nr)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int commit_date_cmp(const void *va, const void *vb)
|
|
|
|
{
|
|
|
|
timestamp_t a = (*(const struct commit **)va)->date;
|
|
|
|
timestamp_t b = (*(const struct commit **)vb)->date;
|
|
|
|
|
|
|
|
if (a < b)
|
|
|
|
return -1;
|
|
|
|
else if (a > b)
|
|
|
|
return 1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void sort_pseudo_merge_matches(struct pseudo_merge_matches *matches)
|
|
|
|
{
|
|
|
|
QSORT(matches->stable, matches->stable_nr, commit_date_cmp);
|
|
|
|
QSORT(matches->unstable, matches->unstable_nr, commit_date_cmp);
|
|
|
|
}
|
|
|
|
|
|
|
|
void select_pseudo_merges(struct bitmap_writer *writer,
|
|
|
|
struct commit **commits, size_t commits_nr)
|
|
|
|
{
|
|
|
|
struct progress *progress = NULL;
|
|
|
|
uint32_t i;
|
|
|
|
|
|
|
|
if (!writer->pseudo_merge_groups.nr)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (writer->show_progress)
|
|
|
|
progress = start_progress("Selecting pseudo-merge commits",
|
|
|
|
writer->pseudo_merge_groups.nr);
|
|
|
|
|
|
|
|
for_each_ref(find_pseudo_merge_group_for_ref, writer);
|
|
|
|
|
|
|
|
for (i = 0; i < writer->pseudo_merge_groups.nr; i++) {
|
|
|
|
struct pseudo_merge_group *group;
|
|
|
|
struct hashmap_iter iter;
|
|
|
|
struct strmap_entry *e;
|
|
|
|
|
|
|
|
group = writer->pseudo_merge_groups.items[i].util;
|
|
|
|
strmap_for_each_entry(&group->matches, &iter, e) {
|
|
|
|
struct pseudo_merge_matches *matches = e->value;
|
|
|
|
|
|
|
|
sort_pseudo_merge_matches(matches);
|
|
|
|
|
|
|
|
select_pseudo_merges_1(writer, group, matches);
|
|
|
|
}
|
|
|
|
|
|
|
|
display_progress(progress, i + 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
stop_progress(&progress);
|
|
|
|
}
|