Merge branch 'jt/fetch-negotiator-skipping'

Add a server-side knob to skip commits in exponential/fibbonacci
stride in an attempt to cover wider swath of history with a smaller
number of iterations, potentially accepting a larger packfile
transfer, instead of going back one commit a time during common
ancestor discovery during the "git fetch" transaction.

* jt/fetch-negotiator-skipping:
  negotiator/skipping: skip commits during fetch
This commit is contained in:
Junio C Hamano 2018-08-02 15:30:46 -07:00
commit 7c85ee6c58
8 changed files with 461 additions and 4 deletions

View File

@ -1531,6 +1531,15 @@ fetch.output::
`full` and `compact`. Default value is `full`. See section
OUTPUT in linkgit:git-fetch[1] for detail.
fetch.negotiationAlgorithm::
Control how information about the commits in the local repository is
sent when negotiating the contents of the packfile to be sent by the
server. Set to "skipping" to use an algorithm that skips commits in an
effort to converge faster, but may result in a larger-than-necessary
packfile; any other value instructs Git to use the default algorithm
that never skips commits (unless the server has acknowledged it or one
of its descendants).
format.attach::
Enable multipart/mixed attachments as the default for
'format-patch'. The value can also be a double quoted string

View File

@ -893,6 +893,7 @@ LIB_OBJS += merge-recursive.o
LIB_OBJS += mergesort.o
LIB_OBJS += name-hash.o
LIB_OBJS += negotiator/default.o
LIB_OBJS += negotiator/skipping.o
LIB_OBJS += notes.o
LIB_OBJS += notes-cache.o
LIB_OBJS += notes-merge.o

View File

@ -1,8 +1,14 @@
#include "git-compat-util.h"
#include "fetch-negotiator.h"
#include "negotiator/default.h"
#include "negotiator/skipping.h"
void fetch_negotiator_init(struct fetch_negotiator *negotiator)
void fetch_negotiator_init(struct fetch_negotiator *negotiator,
const char *algorithm)
{
if (algorithm && !strcmp(algorithm, "skipping")) {
skipping_negotiator_init(negotiator);
return;
}
default_negotiator_init(negotiator);
}

View File

@ -52,6 +52,7 @@ struct fetch_negotiator {
void *data;
};
void fetch_negotiator_init(struct fetch_negotiator *negotiator);
void fetch_negotiator_init(struct fetch_negotiator *negotiator,
const char *algorithm);
#endif

View File

@ -35,6 +35,7 @@ static int agent_supported;
static int server_supports_filtering;
static struct lock_file shallow_lock;
static const char *alternate_shallow_file;
static char *negotiation_algorithm;
/* Remember to update object flag allocation in object.h */
#define COMPLETE (1U << 0)
@ -913,7 +914,7 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args,
const char *agent_feature;
int agent_len;
struct fetch_negotiator negotiator;
fetch_negotiator_init(&negotiator);
fetch_negotiator_init(&negotiator, negotiation_algorithm);
sort_ref_list(&ref, ref_compare_name);
QSORT(sought, nr_sought, cmp_ref_by_name);
@ -1324,7 +1325,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
int in_vain = 0;
int haves_to_send = INITIAL_FLUSH;
struct fetch_negotiator negotiator;
fetch_negotiator_init(&negotiator);
fetch_negotiator_init(&negotiator, negotiation_algorithm);
packet_reader_init(&reader, fd[0], NULL, 0,
PACKET_READ_CHOMP_NEWLINE);
@ -1406,6 +1407,8 @@ static void fetch_pack_config(void)
git_config_get_bool("repack.usedeltabaseoffset", &prefer_ofs_delta);
git_config_get_bool("fetch.fsckobjects", &fetch_fsck_objects);
git_config_get_bool("transfer.fsckobjects", &transfer_fsck_objects);
git_config_get_string("fetch.negotiationalgorithm",
&negotiation_algorithm);
git_config(git_default_config, NULL);
}

250
negotiator/skipping.c Normal file
View File

@ -0,0 +1,250 @@
#include "cache.h"
#include "skipping.h"
#include "../commit.h"
#include "../fetch-negotiator.h"
#include "../prio-queue.h"
#include "../refs.h"
#include "../tag.h"
/* Remember to update object flag allocation in object.h */
/*
* Both us and the server know that both parties have this object.
*/
#define COMMON (1U << 2)
/*
* The server has told us that it has this object. We still need to tell the
* server that we have this object (or one of its descendants), but since we are
* going to do that, we do not need to tell the server about its ancestors.
*/
#define ADVERTISED (1U << 3)
/*
* This commit has entered the priority queue.
*/
#define SEEN (1U << 4)
/*
* This commit has left the priority queue.
*/
#define POPPED (1U << 5)
static int marked;
/*
* An entry in the priority queue.
*/
struct entry {
struct commit *commit;
/*
* Used only if commit is not COMMON.
*/
uint16_t original_ttl;
uint16_t ttl;
};
struct data {
struct prio_queue rev_list;
/*
* The number of non-COMMON commits in rev_list.
*/
int non_common_revs;
};
static int compare(const void *a_, const void *b_, void *unused)
{
const struct entry *a = a_;
const struct entry *b = b_;
return compare_commits_by_commit_date(a->commit, b->commit, NULL);
}
static struct entry *rev_list_push(struct data *data, struct commit *commit, int mark)
{
struct entry *entry;
commit->object.flags |= mark | SEEN;
entry = xcalloc(1, sizeof(*entry));
entry->commit = commit;
prio_queue_put(&data->rev_list, entry);
if (!(mark & COMMON))
data->non_common_revs++;
return entry;
}
static int clear_marks(const char *refname, const struct object_id *oid,
int flag, void *cb_data)
{
struct object *o = deref_tag(the_repository, parse_object(the_repository, oid), refname, 0);
if (o && o->type == OBJ_COMMIT)
clear_commit_marks((struct commit *)o,
COMMON | ADVERTISED | SEEN | POPPED);
return 0;
}
/*
* Mark this SEEN commit and all its SEEN ancestors as COMMON.
*/
static void mark_common(struct data *data, struct commit *c)
{
struct commit_list *p;
if (c->object.flags & COMMON)
return;
c->object.flags |= COMMON;
if (!(c->object.flags & POPPED))
data->non_common_revs--;
if (!c->object.parsed)
return;
for (p = c->parents; p; p = p->next) {
if (p->item->object.flags & SEEN)
mark_common(data, p->item);
}
}
/*
* Ensure that the priority queue has an entry for to_push, and ensure that the
* entry has the correct flags and ttl.
*
* This function returns 1 if an entry was found or created, and 0 otherwise
* (because the entry for this commit had already been popped).
*/
static int push_parent(struct data *data, struct entry *entry,
struct commit *to_push)
{
struct entry *parent_entry;
if (to_push->object.flags & SEEN) {
int i;
if (to_push->object.flags & POPPED)
/*
* The entry for this commit has already been popped,
* due to clock skew. Pretend that this parent does not
* exist.
*/
return 0;
/*
* Find the existing entry and use it.
*/
for (i = 0; i < data->rev_list.nr; i++) {
parent_entry = data->rev_list.array[i].data;
if (parent_entry->commit == to_push)
goto parent_found;
}
BUG("missing parent in priority queue");
parent_found:
;
} else {
parent_entry = rev_list_push(data, to_push, 0);
}
if (entry->commit->object.flags & (COMMON | ADVERTISED)) {
mark_common(data, to_push);
} else {
uint16_t new_original_ttl = entry->ttl
? entry->original_ttl : entry->original_ttl * 3 / 2 + 1;
uint16_t new_ttl = entry->ttl
? entry->ttl - 1 : new_original_ttl;
if (parent_entry->original_ttl < new_original_ttl) {
parent_entry->original_ttl = new_original_ttl;
parent_entry->ttl = new_ttl;
}
}
return 1;
}
static const struct object_id *get_rev(struct data *data)
{
struct commit *to_send = NULL;
while (to_send == NULL) {
struct entry *entry;
struct commit *commit;
struct commit_list *p;
int parent_pushed = 0;
if (data->rev_list.nr == 0 || data->non_common_revs == 0)
return NULL;
entry = prio_queue_get(&data->rev_list);
commit = entry->commit;
commit->object.flags |= POPPED;
if (!(commit->object.flags & COMMON))
data->non_common_revs--;
if (!(commit->object.flags & COMMON) && !entry->ttl)
to_send = commit;
parse_commit(commit);
for (p = commit->parents; p; p = p->next)
parent_pushed |= push_parent(data, entry, p->item);
if (!(commit->object.flags & COMMON) && !parent_pushed)
/*
* This commit has no parents, or all of its parents
* have already been popped (due to clock skew), so send
* it anyway.
*/
to_send = commit;
free(entry);
}
return &to_send->object.oid;
}
static void known_common(struct fetch_negotiator *n, struct commit *c)
{
if (c->object.flags & SEEN)
return;
rev_list_push(n->data, c, ADVERTISED);
}
static void add_tip(struct fetch_negotiator *n, struct commit *c)
{
n->known_common = NULL;
if (c->object.flags & SEEN)
return;
rev_list_push(n->data, c, 0);
}
static const struct object_id *next(struct fetch_negotiator *n)
{
n->known_common = NULL;
n->add_tip = NULL;
return get_rev(n->data);
}
static int ack(struct fetch_negotiator *n, struct commit *c)
{
int known_to_be_common = !!(c->object.flags & COMMON);
if (!(c->object.flags & SEEN))
die("received ack for commit %s not sent as 'have'\n",
oid_to_hex(&c->object.oid));
mark_common(n->data, c);
return known_to_be_common;
}
static void release(struct fetch_negotiator *n)
{
clear_prio_queue(&((struct data *)n->data)->rev_list);
FREE_AND_NULL(n->data);
}
void skipping_negotiator_init(struct fetch_negotiator *negotiator)
{
struct data *data;
negotiator->known_common = known_common;
negotiator->add_tip = add_tip;
negotiator->next = next;
negotiator->ack = ack;
negotiator->release = release;
negotiator->data = data = xcalloc(1, sizeof(*data));
data->rev_list.compare = compare;
if (marked)
for_each_ref(clear_marks, NULL);
marked = 1;
}

8
negotiator/skipping.h Normal file
View File

@ -0,0 +1,8 @@
#ifndef NEGOTIATOR_SKIPPING_H
#define NEGOTIATOR_SKIPPING_H
struct fetch_negotiator;
void skipping_negotiator_init(struct fetch_negotiator *negotiator);
#endif

View File

@ -0,0 +1,179 @@
#!/bin/sh
test_description='test skipping fetch negotiator'
. ./test-lib.sh
have_sent () {
while test "$#" -ne 0
do
grep "fetch> have $(git -C client rev-parse $1)" trace
if test $? -ne 0
then
echo "No have $(git -C client rev-parse $1) ($1)"
return 1
fi
shift
done
}
have_not_sent () {
while test "$#" -ne 0
do
grep "fetch> have $(git -C client rev-parse $1)" trace
if test $? -eq 0
then
return 1
fi
shift
done
}
test_expect_success 'commits with no parents are sent regardless of skip distance' '
git init server &&
test_commit -C server to_fetch &&
git init client &&
for i in $(seq 7)
do
test_commit -C client c$i
done &&
# We send: "c7" (skip 1) "c5" (skip 2) "c2" (skip 4). After that, since
# "c1" has no parent, it is still sent as "have" even though it would
# normally be skipped.
test_config -C client fetch.negotiationalgorithm skipping &&
GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch "$(pwd)/server" &&
have_sent c7 c5 c2 c1 &&
have_not_sent c6 c4 c3
'
test_expect_success 'when two skips collide, favor the larger one' '
rm -rf server client trace &&
git init server &&
test_commit -C server to_fetch &&
git init client &&
for i in $(seq 11)
do
test_commit -C client c$i
done &&
git -C client checkout c5 &&
test_commit -C client c5side &&
# Before reaching c5, we send "c5side" (skip 1) and "c11" (skip 1) "c9"
# (skip 2) "c6" (skip 4). The larger skip (skip 4) takes precedence, so
# the next "have" sent will be "c1" (from "c6" skip 4) and not "c4"
# (from "c5side" skip 1).
test_config -C client fetch.negotiationalgorithm skipping &&
GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch "$(pwd)/server" &&
have_sent c5side c11 c9 c6 c1 &&
have_not_sent c10 c8 c7 c5 c4 c3 c2
'
test_expect_success 'use ref advertisement to filter out commits' '
rm -rf server client trace &&
git init server &&
test_commit -C server c1 &&
test_commit -C server c2 &&
test_commit -C server c3 &&
git -C server tag -d c1 c2 c3 &&
git clone server client &&
test_commit -C client c4 &&
test_commit -C client c5 &&
git -C client checkout c4^^ &&
test_commit -C client c2side &&
git -C server checkout --orphan anotherbranch &&
test_commit -C server to_fetch &&
# The server advertising "c3" (as "refs/heads/master") means that we do
# not need to send any ancestors of "c3", but we still need to send "c3"
# itself.
test_config -C client fetch.negotiationalgorithm skipping &&
GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch origin to_fetch &&
have_sent c5 c4^ c2side &&
have_not_sent c4 c4^^ c4^^^
'
test_expect_success 'handle clock skew' '
rm -rf server client trace &&
git init server &&
test_commit -C server to_fetch &&
git init client &&
# 2 regular commits
test_tick=2000000000 &&
test_commit -C client c1 &&
test_commit -C client c2 &&
# 4 old commits
test_tick=1000000000 &&
git -C client checkout c1 &&
test_commit -C client old1 &&
test_commit -C client old2 &&
test_commit -C client old3 &&
test_commit -C client old4 &&
# "c2" and "c1" are popped first, then "old4" to "old1". "old1" would
# normally be skipped, but is treated as a commit without a parent here
# and sent, because (due to clock skew) its only parent has already been
# popped off the priority queue.
test_config -C client fetch.negotiationalgorithm skipping &&
GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch "$(pwd)/server" &&
have_sent c2 c1 old4 old2 old1 &&
have_not_sent old3
'
test_expect_success 'do not send "have" with ancestors of commits that server ACKed' '
rm -rf server client trace &&
git init server &&
test_commit -C server to_fetch &&
git init client &&
for i in $(seq 8)
do
git -C client checkout --orphan b$i &&
test_commit -C client b$i.c0
done &&
for j in $(seq 19)
do
for i in $(seq 8)
do
git -C client checkout b$i &&
test_commit -C client b$i.c$j
done
done &&
# Copy this branch over to the server and add a commit on it so that it
# is reachable but not advertised.
git -C server fetch --no-tags "$(pwd)/client" b1:refs/heads/b1 &&
git -C server checkout b1 &&
test_commit -C server commit-on-b1 &&
test_config -C client fetch.negotiationalgorithm skipping &&
GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch "$(pwd)/server" to_fetch &&
grep " fetch" trace &&
# fetch-pack sends 2 requests each containing 16 "have" lines before
# processing the first response. In these 2 requests, 4 commits from
# each branch are sent. Just check the first branch.
have_sent b1.c19 b1.c17 b1.c14 b1.c9 &&
have_not_sent b1.c18 b1.c16 b1.c15 b1.c13 b1.c12 b1.c11 b1.c10 &&
# While fetch-pack is processing the first response, it should read that
# the server ACKs b1.c19 and b1.c17.
grep "fetch< ACK $(git -C client rev-parse b1.c19) common" trace &&
grep "fetch< ACK $(git -C client rev-parse b1.c17) common" trace &&
# fetch-pack should thus not send any more commits in the b1 branch, but
# should still send the others (in this test, just check b2).
for i in $(seq 0 8)
do
have_not_sent b1.c$i
done &&
have_sent b2.c1 b2.c0
'
test_done