#include "git-compat-util.h" #include "pseudo-merge.h" #include "date.h" #include "oid-array.h" #include "strbuf.h" #include "config.h" #include "string-list.h" #include "refs.h" #include "pack-bitmap.h" #include "commit.h" #include "alloc.h" #include "progress.h" #define DEFAULT_PSEUDO_MERGE_DECAY 1.0 #define DEFAULT_PSEUDO_MERGE_MAX_MERGES 64 #define DEFAULT_PSEUDO_MERGE_SAMPLE_RATE 1 #define DEFAULT_PSEUDO_MERGE_THRESHOLD approxidate("1.week.ago") #define DEFAULT_PSEUDO_MERGE_STABLE_THRESHOLD approxidate("1.month.ago") #define DEFAULT_PSEUDO_MERGE_STABLE_SIZE 512 static double gitexp(double base, int exp) { double result = 1; while (1) { if (exp % 2) result *= base; exp >>= 1; if (!exp) break; base *= base; } return result; } static uint32_t pseudo_merge_group_size(const struct pseudo_merge_group *group, const struct pseudo_merge_matches *matches, uint32_t i) { double C = 0.0f; uint32_t n; /* * The size of pseudo-merge groups decays according to a power series, * which looks like: * * f(n) = C * n^-k * * , where 'n' is the n-th pseudo-merge group, 'f(n)' is its size, 'k' * is the decay rate, and 'C' is a scaling value. * * The value of C depends on the number of groups, decay rate, and total * number of commits. It is computed such that if there are M and N * total groups and commits, respectively, that: * * N = f(0) + f(1) + ... f(M-1) * * Rearranging to isolate C, we get: * * N = \sum_{n=1}^M C / n^k * * N / C = \sum_{n=1}^M n^-k * * C = N / \sum_{n=1}^M n^-k * * For example, if we have a decay rate of 'k' being equal to 1.5, 'N' * total commits equal to 10,000, and 'M' being equal to 6 groups, then * the (rounded) group sizes are: * * { 5469, 1934, 1053, 684, 489, 372 } * * increasing the number of total groups, say to 10, scales the group * sizes appropriately: * * { 5012, 1772, 964, 626, 448, 341, 271, 221, 186, 158 } */ for (n = 0; n < group->max_merges; n++) C += 1.0 / gitexp(n + 1, group->decay); C = matches->unstable_nr / C; return (uint32_t)((C / gitexp(i + 1, group->decay)) + 0.5); } static void pseudo_merge_group_init(struct pseudo_merge_group *group) { memset(group, 0, sizeof(struct pseudo_merge_group)); strmap_init_with_options(&group->matches, NULL, 0); group->decay = DEFAULT_PSEUDO_MERGE_DECAY; group->max_merges = DEFAULT_PSEUDO_MERGE_MAX_MERGES; group->sample_rate = DEFAULT_PSEUDO_MERGE_SAMPLE_RATE; group->threshold = DEFAULT_PSEUDO_MERGE_THRESHOLD; group->stable_threshold = DEFAULT_PSEUDO_MERGE_STABLE_THRESHOLD; group->stable_size = DEFAULT_PSEUDO_MERGE_STABLE_SIZE; } static int pseudo_merge_config(const char *var, const char *value, const struct config_context *ctx, void *cb_data) { struct string_list *list = cb_data; struct string_list_item *item; struct pseudo_merge_group *group; struct strbuf buf = STRBUF_INIT; const char *sub, *key; size_t sub_len; int ret = 0; if (parse_config_key(var, "bitmappseudomerge", &sub, &sub_len, &key)) goto done; if (!sub_len) goto done; strbuf_add(&buf, sub, sub_len); item = string_list_lookup(list, buf.buf); if (!item) { item = string_list_insert(list, buf.buf); item->util = xmalloc(sizeof(struct pseudo_merge_group)); pseudo_merge_group_init(item->util); } group = item->util; if (!strcmp(key, "pattern")) { struct strbuf re = STRBUF_INIT; free(group->pattern); if (*value != '^') strbuf_addch(&re, '^'); strbuf_addstr(&re, value); group->pattern = xcalloc(1, sizeof(regex_t)); if (regcomp(group->pattern, re.buf, REG_EXTENDED)) die(_("failed to load pseudo-merge regex for %s: '%s'"), sub, re.buf); strbuf_release(&re); } else if (!strcmp(key, "decay")) { group->decay = git_config_double(var, value, ctx->kvi); if (group->decay < 0) { warning(_("%s must be non-negative, using default"), var); group->decay = DEFAULT_PSEUDO_MERGE_DECAY; } } else if (!strcmp(key, "samplerate")) { group->sample_rate = git_config_double(var, value, ctx->kvi); if (!(0 <= group->sample_rate && group->sample_rate <= 1)) { warning(_("%s must be between 0 and 1, using default"), var); group->sample_rate = DEFAULT_PSEUDO_MERGE_SAMPLE_RATE; } } else if (!strcmp(key, "threshold")) { if (git_config_expiry_date(&group->threshold, var, value)) { ret = -1; goto done; } } else if (!strcmp(key, "maxmerges")) { group->max_merges = git_config_int(var, value, ctx->kvi); if (group->max_merges < 0) { warning(_("%s must be non-negative, using default"), var); group->max_merges = DEFAULT_PSEUDO_MERGE_MAX_MERGES; } } else if (!strcmp(key, "stablethreshold")) { if (git_config_expiry_date(&group->stable_threshold, var, value)) { ret = -1; goto done; } } else if (!strcmp(key, "stablesize")) { group->stable_size = git_config_int(var, value, ctx->kvi); if (group->stable_size <= 0) { warning(_("%s must be positive, using default"), var); group->stable_size = DEFAULT_PSEUDO_MERGE_STABLE_SIZE; } } done: strbuf_release(&buf); return ret; } void load_pseudo_merges_from_config(struct string_list *list) { struct string_list_item *item; git_config(pseudo_merge_config, list); for_each_string_list_item(item, list) { struct pseudo_merge_group *group = item->util; if (!group->pattern) die(_("pseudo-merge group '%s' missing required pattern"), item->string); if (group->threshold < group->stable_threshold) die(_("pseudo-merge group '%s' has unstable threshold " "before stable one"), item->string); } } static int find_pseudo_merge_group_for_ref(const char *refname, const struct object_id *oid, int flags UNUSED, void *_data) { struct bitmap_writer *writer = _data; struct object_id peeled; struct commit *c; uint32_t i; int has_bitmap; if (!peel_iterated_oid(oid, &peeled)) oid = &peeled; c = lookup_commit(the_repository, oid); if (!c) return 0; has_bitmap = bitmap_writer_has_bitmapped_object_id(writer, oid); for (i = 0; i < writer->pseudo_merge_groups.nr; i++) { struct pseudo_merge_group *group; struct pseudo_merge_matches *matches; struct strbuf group_name = STRBUF_INIT; regmatch_t captures[16]; size_t j; group = writer->pseudo_merge_groups.items[i].util; if (regexec(group->pattern, refname, ARRAY_SIZE(captures), captures, 0)) continue; if (captures[ARRAY_SIZE(captures) - 1].rm_so != -1) warning(_("pseudo-merge regex from config has too many capture " "groups (max=%"PRIuMAX")"), (uintmax_t)ARRAY_SIZE(captures) - 2); for (j = !!group->pattern->re_nsub; j < ARRAY_SIZE(captures); j++) { regmatch_t *match = &captures[j]; if (match->rm_so == -1) continue; if (group_name.len) strbuf_addch(&group_name, '-'); strbuf_add(&group_name, refname + match->rm_so, match->rm_eo - match->rm_so); } matches = strmap_get(&group->matches, group_name.buf); if (!matches) { matches = xcalloc(1, sizeof(*matches)); strmap_put(&group->matches, strbuf_detach(&group_name, NULL), matches); } if (c->date <= group->stable_threshold) { ALLOC_GROW(matches->stable, matches->stable_nr + 1, matches->stable_alloc); matches->stable[matches->stable_nr++] = c; } else if (c->date <= group->threshold && !has_bitmap) { ALLOC_GROW(matches->unstable, matches->unstable_nr + 1, matches->unstable_alloc); matches->unstable[matches->unstable_nr++] = c; } strbuf_release(&group_name); } return 0; } static struct commit *push_pseudo_merge(struct pseudo_merge_group *group) { struct commit *merge; ALLOC_GROW(group->merges, group->merges_nr + 1, group->merges_alloc); merge = alloc_commit_node(the_repository); merge->object.parsed = 1; merge->object.flags |= BITMAP_PSEUDO_MERGE; group->merges[group->merges_nr++] = merge; return merge; } static struct pseudo_merge_commit_idx *pseudo_merge_idx(kh_oid_map_t *pseudo_merge_commits, const struct object_id *oid) { struct pseudo_merge_commit_idx *pmc; int hash_ret; khiter_t hash_pos = kh_put_oid_map(pseudo_merge_commits, *oid, &hash_ret); if (hash_ret) { CALLOC_ARRAY(pmc, 1); kh_value(pseudo_merge_commits, hash_pos) = pmc; } else { pmc = kh_value(pseudo_merge_commits, hash_pos); } return pmc; } #define MIN_PSEUDO_MERGE_SIZE 8 static void select_pseudo_merges_1(struct bitmap_writer *writer, struct pseudo_merge_group *group, struct pseudo_merge_matches *matches) { uint32_t i, j; uint32_t stable_merges_nr; if (!matches->stable_nr && !matches->unstable_nr) return; /* all tips in this group already have bitmaps */ stable_merges_nr = matches->stable_nr / group->stable_size; if (matches->stable_nr % group->stable_size) stable_merges_nr++; /* make stable_merges_nr pseudo merges for stable commits */ for (i = 0, j = 0; i < stable_merges_nr; i++) { struct commit *merge; struct commit_list **p; merge = push_pseudo_merge(group); p = &merge->parents; /* * For each pseudo-merge created above, add parents to the * allocated commit node from the stable set of commits * (un-bitmapped, newer than the stable threshold). */ do { struct commit *c; struct pseudo_merge_commit_idx *pmc; if (j >= matches->stable_nr) break; c = matches->stable[j++]; /* * Here and below, make sure that we keep our mapping of * commits -> pseudo-merge(s) which include the key'd * commit up-to-date. */ pmc = pseudo_merge_idx(writer->pseudo_merge_commits, &c->object.oid); ALLOC_GROW(pmc->pseudo_merge, pmc->nr + 1, pmc->alloc); pmc->pseudo_merge[pmc->nr++] = writer->pseudo_merges_nr; p = commit_list_append(c, p); } while (j % group->stable_size); bitmap_writer_push_commit(writer, merge, 1); writer->pseudo_merges_nr++; } /* make up to group->max_merges pseudo merges for unstable commits */ for (i = 0, j = 0; i < group->max_merges; i++) { struct commit *merge; struct commit_list **p; uint32_t size, end; merge = push_pseudo_merge(group); p = &merge->parents; size = pseudo_merge_group_size(group, matches, i); end = size < MIN_PSEUDO_MERGE_SIZE ? matches->unstable_nr : j + size; /* * For each pseudo-merge commit created above, add parents to * the allocated commit node from the unstable set of commits * (newer than the stable threshold). * * Account for the sample rate, since not every candidate from * the set of stable commits will be included as a pseudo-merge * parent. */ for (; j < end && j < matches->unstable_nr; j++) { struct commit *c = matches->unstable[j]; struct pseudo_merge_commit_idx *pmc; if (j % (uint32_t)(1.0 / group->sample_rate)) continue; pmc = pseudo_merge_idx(writer->pseudo_merge_commits, &c->object.oid); ALLOC_GROW(pmc->pseudo_merge, pmc->nr + 1, pmc->alloc); pmc->pseudo_merge[pmc->nr++] = writer->pseudo_merges_nr; p = commit_list_append(c, p); } bitmap_writer_push_commit(writer, merge, 1); writer->pseudo_merges_nr++; if (end >= matches->unstable_nr) break; } } static int commit_date_cmp(const void *va, const void *vb) { timestamp_t a = (*(const struct commit **)va)->date; timestamp_t b = (*(const struct commit **)vb)->date; if (a < b) return -1; else if (a > b) return 1; return 0; } static void sort_pseudo_merge_matches(struct pseudo_merge_matches *matches) { QSORT(matches->stable, matches->stable_nr, commit_date_cmp); QSORT(matches->unstable, matches->unstable_nr, commit_date_cmp); } void select_pseudo_merges(struct bitmap_writer *writer, struct commit **commits, size_t commits_nr) { struct progress *progress = NULL; uint32_t i; if (!writer->pseudo_merge_groups.nr) return; if (writer->show_progress) progress = start_progress("Selecting pseudo-merge commits", writer->pseudo_merge_groups.nr); for_each_ref(find_pseudo_merge_group_for_ref, writer); for (i = 0; i < writer->pseudo_merge_groups.nr; i++) { struct pseudo_merge_group *group; struct hashmap_iter iter; struct strmap_entry *e; group = writer->pseudo_merge_groups.items[i].util; strmap_for_each_entry(&group->matches, &iter, e) { struct pseudo_merge_matches *matches = e->value; sort_pseudo_merge_matches(matches); select_pseudo_merges_1(writer, group, matches); } display_progress(progress, i + 1); } stop_progress(&progress); }