git/upload-pack.c

1578 lines
40 KiB
C
Raw Normal View History

#include "cache.h"
#include "config.h"
#include "refs.h"
#include "pkt-line.h"
#include "sideband.h"
#include "repository.h"
#include "object-store.h"
#include "tag.h"
#include "object.h"
#include "commit.h"
#include "diff.h"
#include "revision.h"
#include "list-objects.h"
#include "list-objects-filter.h"
#include "list-objects-filter-options.h"
#include "run-command.h"
#include "connect.h"
#include "sigchain.h"
#include "version.h"
upload/receive-pack: allow hiding ref hierarchies A repository may have refs that are only used for its internal bookkeeping purposes that should not be exposed to the others that come over the network. Teach upload-pack to omit some refs from its initial advertisement by paying attention to the uploadpack.hiderefs multi-valued configuration variable. Do the same to receive-pack via the receive.hiderefs variable. As a convenient short-hand, allow using transfer.hiderefs to set the value to both of these variables. Any ref that is under the hierarchies listed on the value of these variable is excluded from responses to requests made by "ls-remote", "fetch", etc. (for upload-pack) and "push" (for receive-pack). Because these hidden refs do not count as OUR_REF, an attempt to fetch objects at the tip of them will be rejected, and because these refs do not get advertised, "git push :" will not see local branches that have the same name as them as "matching" ones to be sent. An attempt to update/delete these hidden refs with an explicit refspec, e.g. "git push origin :refs/hidden/22", is rejected. This is not a new restriction. To the pusher, it would appear that there is no such ref, so its push request will conclude with "Now that I sent you all the data, it is time for you to update the refs. I saw that the ref did not exist when I started pushing, and I want the result to point at this commit". The receiving end will apply the compare-and-swap rule to this request and rejects the push with "Well, your update request conflicts with somebody else; I see there is such a ref.", which is the right thing to do. Otherwise a push to a hidden ref will always be "the last one wins", which is not a good default. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-01-19 08:08:30 +08:00
#include "string-list.h"
#include "argv-array.h"
#include "prio-queue.h"
#include "protocol.h"
#include "quote.h"
#include "upload-pack.h"
#include "serve.h"
#include "commit-graph.h"
#include "commit-reach.h"
#include "shallow.h"
/* Remember to update object flag allocation in object.h */
#define THEY_HAVE (1u << 11)
#define OUR_REF (1u << 12)
#define WANTED (1u << 13)
#define COMMON_KNOWN (1u << 14)
#define SHALLOW (1u << 16)
#define NOT_SHALLOW (1u << 17)
#define CLIENT_SHALLOW (1u << 18)
#define HIDDEN_REF (1u << 19)
upload-pack: clear flags before each v2 request Suppose a server has the following commit graph: A B \ / O We create a client by cloning A from the server with depth 1, and add many commits to it (so that future fetches span multiple requests due to lengthy negotiation). If it then fetches B using protocol v2, the fetch spanning multiple requests, the resulting packfile does not contain O even though the client did report that A is shallow. This is because upload_pack_v2() can be called multiple times while processing the same session. During the 2nd and all subsequent invocations, some object flags remain from the previous invocations. In particular, CLIENT_SHALLOW remains, preventing process_shallow() from adding client-reported shallows to the "shallows" array, and hence pack-objects not knowing about these client-reported shallows. Therefore, teach upload_pack_v2() to clear object flags at the start of each invocation. This has some other results: - THEY_HAVE gates addition of objects to have_obj in process_haves(). Previously in upload_pack_v2(), have_obj needed to be static because once an object is added to have_obj, it is never readded and thus we needed to retain the contents of have_obj between invocations. Now that flags are cleared, this is no longer necessary. This patch does not change the behavior of ok_to_give_up() (THEY_HAVE is still set on each "have") and got_oid() (used only in non-v2)); THEY_HAVE is not used in any other function. - WANTED gates addition of objects to want_obj in parse_want() and parse_want_ref(). It is also used in receive_needs(), but that is only used in non-v2. For the same reasons as THEY_HAVE, want_obj no longer needs to be static in upload_pack_v2(). - CLIENT_SHALLOW is changed as discussed above. Clearing of the other 5 flags does not affect functionality in v2. (Note that in non-v2, upload_pack() is only called once per process, so each invocation starts with blank flags anyway.) - OUR_REF is only used in non-v2. - COMMON_KNOWN is only used as a scratch flag in ok_to_give_up(). - SHALLOW is passed to invocations in deepen() and deepen_by_rev_list(), but upload-pack doesn't use it. - NOT_SHALLOW is used by send_shallow() and send_unshallow(), but invocations of those functions are always preceded by code that sets NOT_SHALLOW on the appropriate objects. - HIDDEN_REF is only used in non-v2. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-10-19 04:43:29 +08:00
#define ALL_FLAGS (THEY_HAVE | OUR_REF | WANTED | COMMON_KNOWN | SHALLOW | \
NOT_SHALLOW | CLIENT_SHALLOW | HIDDEN_REF)
static timestamp_t oldest_have;
/* Allow specifying sha1 if it is a ref tip. */
#define ALLOW_TIP_SHA1 01
/* Allow request of a sha1 if it is reachable from a ref (possibly hidden ref). */
#define ALLOW_REACHABLE_SHA1 02
/* Allow request of any sha1. Implies ALLOW_TIP_SHA1 and ALLOW_REACHABLE_SHA1. */
#define ALLOW_ANY_SHA1 07
static unsigned int allow_unadvertised_object_request;
static int shallow_nr;
static struct object_array extra_edge_obj;
static int keepalive = 5;
upload-pack: provide a hook for running pack-objects When upload-pack serves a client request, it turns to pack-objects to do the heavy lifting of creating a packfile. There's no easy way to intercept the call to pack-objects, but there are a few good reasons to want to do so: 1. If you're debugging a client or server issue with fetching, you may want to store a copy of the generated packfile. 2. If you're gathering data from real-world fetches for performance analysis or debugging, storing a copy of the arguments and stdin lets you replay the pack generation at your leisure. 3. You may want to insert a caching layer around pack-objects; it is the most CPU- and memory-intensive part of serving a fetch, and its output is a pure function[1] of its input, making it an ideal place to consolidate identical requests. This patch adds a simple "hook" interface to intercept calls to pack-objects. The new test demonstrates how it can be used for debugging (using it for caching is a straightforward extension; the tricky part is writing the actual caching layer). This hook is unlike the normal hook scripts found in the "hooks/" directory of a repository. Because we promise that upload-pack is safe to run in an untrusted repository, we cannot execute arbitrary code or commands found in the repository (neither in hooks/, nor in the config). So instead, this hook is triggered from a config variable that is explicitly ignored in the per-repo config. The config variable holds the actual shell command to run as the hook. Another approach would be to simply treat it as a boolean: "should I respect the upload-pack hooks in this repo?", and then run the script from "hooks/" as we usually do. However, that isn't as flexible; there's no way to run a hook approved by the site administrator (e.g., in "/etc/gitconfig") on a repository whose contents are not trusted. The approach taken by this patch is more fine-grained, if a little less conventional for git hooks (it does behave similar to other configured commands like diff.external, etc). [1] Pack-objects isn't _actually_ a pure function. Its output depends on the exact packing of the object database, and if multi-threading is used for delta compression, can even differ racily. But for the purposes of caching, that's OK; of the many possible outputs for a given input, it is sufficient only that we output one of them. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-05-19 06:45:37 +08:00
static const char *pack_objects_hook;
static int allow_filter;
static int allow_ref_in_want;
static int allow_sideband_all;
/*
* Please annotate, and if possible group together, fields used only
* for protocol v0 or only for protocol v2.
*/
struct upload_pack_data {
struct string_list symref; /* v0 only */
struct object_array want_obj;
struct object_array have_obj;
struct oid_array haves; /* v2 only */
struct string_list wanted_refs; /* v2 only */
struct object_array shallows;
struct string_list deepen_not;
int depth;
timestamp_t deepen_since;
int deepen_rev_list;
int deepen_relative;
unsigned int timeout; /* v0 only */
enum {
NO_MULTI_ACK = 0,
MULTI_ACK = 1,
MULTI_ACK_DETAILED = 2
} multi_ack; /* v0 only */
/* 0 for no sideband, otherwise DEFAULT_PACKET_MAX or LARGE_PACKET_MAX */
int use_sideband;
struct list_objects_filter_options filter_options;
struct packet_writer writer;
unsigned stateless_rpc : 1; /* v0 only */
unsigned no_done : 1; /* v0 only */
unsigned daemon_mode : 1; /* v0 only */
unsigned filter_capability_requested : 1; /* v0 only */
unsigned use_thin_pack : 1;
unsigned use_ofs_delta : 1;
unsigned no_progress : 1;
unsigned use_include_tag : 1;
unsigned done : 1; /* v2 only */
};
static void upload_pack_data_init(struct upload_pack_data *data)
{
struct string_list symref = STRING_LIST_INIT_DUP;
struct string_list wanted_refs = STRING_LIST_INIT_DUP;
struct object_array want_obj = OBJECT_ARRAY_INIT;
struct object_array have_obj = OBJECT_ARRAY_INIT;
struct oid_array haves = OID_ARRAY_INIT;
struct object_array shallows = OBJECT_ARRAY_INIT;
struct string_list deepen_not = STRING_LIST_INIT_DUP;
memset(data, 0, sizeof(*data));
data->symref = symref;
data->wanted_refs = wanted_refs;
data->want_obj = want_obj;
data->have_obj = have_obj;
data->haves = haves;
data->shallows = shallows;
data->deepen_not = deepen_not;
packet_writer_init(&data->writer, 1);
}
static void upload_pack_data_clear(struct upload_pack_data *data)
{
string_list_clear(&data->symref, 1);
string_list_clear(&data->wanted_refs, 1);
object_array_clear(&data->want_obj);
object_array_clear(&data->have_obj);
oid_array_clear(&data->haves);
object_array_clear(&data->shallows);
string_list_clear(&data->deepen_not, 0);
list_objects_filter_release(&data->filter_options);
}
static void reset_timeout(unsigned int timeout)
{
alarm(timeout);
}
static void send_client_data(int fd, const char *data, ssize_t sz,
int use_sideband)
{
if (use_sideband) {
send_sideband(1, fd, data, sz, use_sideband);
return;
}
if (fd == 3)
/* emergency quit */
fd = 2;
if (fd == 2) {
/* XXX: are we happy to lose stuff here? */
xwrite(fd, data, sz);
return;
}
write_or_die(fd, data, sz);
}
static int write_one_shallow(const struct commit_graft *graft, void *cb_data)
{
FILE *fp = cb_data;
if (graft->nr_parent == -1)
fprintf(fp, "--shallow %s\n", oid_to_hex(&graft->oid));
return 0;
}
static void create_pack_file(struct upload_pack_data *pack_data)
{
struct child_process pack_objects = CHILD_PROCESS_INIT;
char data[8193], progress[128];
char abort_msg[] = "aborting due to possible repository "
"corruption on the remote side.";
int buffered = -1;
ssize_t sz;
int i;
FILE *pipe_fd;
upload-pack: provide a hook for running pack-objects When upload-pack serves a client request, it turns to pack-objects to do the heavy lifting of creating a packfile. There's no easy way to intercept the call to pack-objects, but there are a few good reasons to want to do so: 1. If you're debugging a client or server issue with fetching, you may want to store a copy of the generated packfile. 2. If you're gathering data from real-world fetches for performance analysis or debugging, storing a copy of the arguments and stdin lets you replay the pack generation at your leisure. 3. You may want to insert a caching layer around pack-objects; it is the most CPU- and memory-intensive part of serving a fetch, and its output is a pure function[1] of its input, making it an ideal place to consolidate identical requests. This patch adds a simple "hook" interface to intercept calls to pack-objects. The new test demonstrates how it can be used for debugging (using it for caching is a straightforward extension; the tricky part is writing the actual caching layer). This hook is unlike the normal hook scripts found in the "hooks/" directory of a repository. Because we promise that upload-pack is safe to run in an untrusted repository, we cannot execute arbitrary code or commands found in the repository (neither in hooks/, nor in the config). So instead, this hook is triggered from a config variable that is explicitly ignored in the per-repo config. The config variable holds the actual shell command to run as the hook. Another approach would be to simply treat it as a boolean: "should I respect the upload-pack hooks in this repo?", and then run the script from "hooks/" as we usually do. However, that isn't as flexible; there's no way to run a hook approved by the site administrator (e.g., in "/etc/gitconfig") on a repository whose contents are not trusted. The approach taken by this patch is more fine-grained, if a little less conventional for git hooks (it does behave similar to other configured commands like diff.external, etc). [1] Pack-objects isn't _actually_ a pure function. Its output depends on the exact packing of the object database, and if multi-threading is used for delta compression, can even differ racily. But for the purposes of caching, that's OK; of the many possible outputs for a given input, it is sufficient only that we output one of them. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-05-19 06:45:37 +08:00
if (!pack_objects_hook)
pack_objects.git_cmd = 1;
else {
argv_array_push(&pack_objects.args, pack_objects_hook);
argv_array_push(&pack_objects.args, "git");
pack_objects.use_shell = 1;
}
if (shallow_nr) {
argv_array_push(&pack_objects.args, "--shallow-file");
argv_array_push(&pack_objects.args, "");
}
argv_array_push(&pack_objects.args, "pack-objects");
argv_array_push(&pack_objects.args, "--revs");
if (pack_data->use_thin_pack)
argv_array_push(&pack_objects.args, "--thin");
argv_array_push(&pack_objects.args, "--stdout");
if (shallow_nr)
argv_array_push(&pack_objects.args, "--shallow");
if (!pack_data->no_progress)
argv_array_push(&pack_objects.args, "--progress");
if (pack_data->use_ofs_delta)
argv_array_push(&pack_objects.args, "--delta-base-offset");
if (pack_data->use_include_tag)
argv_array_push(&pack_objects.args, "--include-tag");
if (pack_data->filter_options.choice) {
const char *spec =
expand_list_objects_filter_spec(&pack_data->filter_options);
if (pack_objects.use_shell) {
struct strbuf buf = STRBUF_INIT;
sq_quote_buf(&buf, spec);
argv_array_pushf(&pack_objects.args, "--filter=%s", buf.buf);
strbuf_release(&buf);
} else {
argv_array_pushf(&pack_objects.args, "--filter=%s",
spec);
}
}
upload-pack: start pack-objects before async rev-list In a pthread-enabled version of upload-pack, there's a race condition that can cause a deadlock on the fflush(NULL) we call from run-command. What happens is this: 1. Upload-pack is informed we are doing a shallow clone. 2. We call start_async() to spawn a thread that will generate rev-list results to feed to pack-objects. It gets a file descriptor to a pipe which will eventually hook to pack-objects. 3. The rev-list thread uses fdopen to create a new output stream around the fd we gave it, called pack_pipe. 4. The thread writes results to pack_pipe. Outside of our control, libc is doing locking on the stream. We keep writing until the OS pipe buffer is full, and then we block in write(), still holding the lock. 5. The main thread now uses start_command to spawn pack-objects. Before forking, it calls fflush(NULL) to flush every stdio output buffer. It blocks trying to get the lock on pack_pipe. And we have a deadlock. The thread will block until somebody starts reading from the pipe. But nobody will read from the pipe until we finish flushing to the pipe. To fix this, we swap the start order: we start the pack-objects reader first, and then the rev-list writer after. Thus the problematic fflush(NULL) happens before we even open the new file descriptor (and even if it didn't, flushing should no longer block, as the reader at the end of the pipe is now active). Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-04-07 05:33:33 +08:00
pack_objects.in = -1;
pack_objects.out = -1;
pack_objects.err = -1;
upload-pack: Use finish_{command,async}() instead of waitpid(). upload-pack spawns two processes, rev-list and pack-objects, and carefully monitors their status so that it can report failure to the remote end. This change removes the complicated procedures on the grounds of the following observations: - If everything is OK, rev-list closes its output pipe end, upon which pack-objects (which reads from the pipe) sees EOF and terminates itself, closing its output (and error) pipes. upload-pack reads from both until it sees EOF in both. It collects the exit codes of the child processes (which indicate success) and terminates successfully. - If rev-list sees an error, it closes its output and terminates with failure. pack-objects sees EOF in its input and terminates successfully. Again upload-pack reads its inputs until EOF. When it now collects the exit codes of its child processes, it notices the failure of rev-list and signals failure to the remote end. - If pack-objects sees an error, it terminates with failure. Since this breaks the pipe to rev-list, rev-list is killed with SIGPIPE. upload-pack reads its input until EOF, then collects the exit codes of the child processes, notices their failures, and signals failure to the remote end. - If upload-pack itself dies unexpectedly, pack-objects is killed with SIGPIPE, and subsequently also rev-list. The upshot of this is that precise monitoring of child processes is not required because both terminate if either one of them dies unexpectedly. This allows us to use finish_command() and finish_async() instead of an explicit waitpid(2) call. The change is smaller than it looks because most of it only reduces the indentation of a large part of the inner loop. Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-11-05 03:46:48 +08:00
if (start_command(&pack_objects))
die("git upload-pack: unable to fork git-pack-objects");
pipe_fd = xfdopen(pack_objects.in, "w");
if (shallow_nr)
for_each_commit_graft(write_one_shallow, pipe_fd);
for (i = 0; i < pack_data->want_obj.nr; i++)
fprintf(pipe_fd, "%s\n",
oid_to_hex(&pack_data->want_obj.objects[i].item->oid));
fprintf(pipe_fd, "--not\n");
for (i = 0; i < pack_data->have_obj.nr; i++)
fprintf(pipe_fd, "%s\n",
oid_to_hex(&pack_data->have_obj.objects[i].item->oid));
for (i = 0; i < extra_edge_obj.nr; i++)
fprintf(pipe_fd, "%s\n",
oid_to_hex(&extra_edge_obj.objects[i].item->oid));
fprintf(pipe_fd, "\n");
fflush(pipe_fd);
fclose(pipe_fd);
/* We read from pack_objects.err to capture stderr output for
* progress bar, and pack_objects.out to capture the pack data.
*/
while (1) {
struct pollfd pfd[2];
int pe, pu, pollsize;
int ret;
reset_timeout(pack_data->timeout);
pollsize = 0;
pe = pu = -1;
if (0 <= pack_objects.out) {
pfd[pollsize].fd = pack_objects.out;
pfd[pollsize].events = POLLIN;
pu = pollsize;
pollsize++;
}
if (0 <= pack_objects.err) {
pfd[pollsize].fd = pack_objects.err;
pfd[pollsize].events = POLLIN;
pe = pollsize;
pollsize++;
}
upload-pack: Use finish_{command,async}() instead of waitpid(). upload-pack spawns two processes, rev-list and pack-objects, and carefully monitors their status so that it can report failure to the remote end. This change removes the complicated procedures on the grounds of the following observations: - If everything is OK, rev-list closes its output pipe end, upon which pack-objects (which reads from the pipe) sees EOF and terminates itself, closing its output (and error) pipes. upload-pack reads from both until it sees EOF in both. It collects the exit codes of the child processes (which indicate success) and terminates successfully. - If rev-list sees an error, it closes its output and terminates with failure. pack-objects sees EOF in its input and terminates successfully. Again upload-pack reads its inputs until EOF. When it now collects the exit codes of its child processes, it notices the failure of rev-list and signals failure to the remote end. - If pack-objects sees an error, it terminates with failure. Since this breaks the pipe to rev-list, rev-list is killed with SIGPIPE. upload-pack reads its input until EOF, then collects the exit codes of the child processes, notices their failures, and signals failure to the remote end. - If upload-pack itself dies unexpectedly, pack-objects is killed with SIGPIPE, and subsequently also rev-list. The upshot of this is that precise monitoring of child processes is not required because both terminate if either one of them dies unexpectedly. This allows us to use finish_command() and finish_async() instead of an explicit waitpid(2) call. The change is smaller than it looks because most of it only reduces the indentation of a large part of the inner loop. Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-11-05 03:46:48 +08:00
if (!pollsize)
break;
ret = poll(pfd, pollsize,
keepalive < 0 ? -1 : 1000 * keepalive);
if (ret < 0) {
upload-pack: Use finish_{command,async}() instead of waitpid(). upload-pack spawns two processes, rev-list and pack-objects, and carefully monitors their status so that it can report failure to the remote end. This change removes the complicated procedures on the grounds of the following observations: - If everything is OK, rev-list closes its output pipe end, upon which pack-objects (which reads from the pipe) sees EOF and terminates itself, closing its output (and error) pipes. upload-pack reads from both until it sees EOF in both. It collects the exit codes of the child processes (which indicate success) and terminates successfully. - If rev-list sees an error, it closes its output and terminates with failure. pack-objects sees EOF in its input and terminates successfully. Again upload-pack reads its inputs until EOF. When it now collects the exit codes of its child processes, it notices the failure of rev-list and signals failure to the remote end. - If pack-objects sees an error, it terminates with failure. Since this breaks the pipe to rev-list, rev-list is killed with SIGPIPE. upload-pack reads its input until EOF, then collects the exit codes of the child processes, notices their failures, and signals failure to the remote end. - If upload-pack itself dies unexpectedly, pack-objects is killed with SIGPIPE, and subsequently also rev-list. The upshot of this is that precise monitoring of child processes is not required because both terminate if either one of them dies unexpectedly. This allows us to use finish_command() and finish_async() instead of an explicit waitpid(2) call. The change is smaller than it looks because most of it only reduces the indentation of a large part of the inner loop. Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-11-05 03:46:48 +08:00
if (errno != EINTR) {
error_errno("poll failed, resuming");
upload-pack: Use finish_{command,async}() instead of waitpid(). upload-pack spawns two processes, rev-list and pack-objects, and carefully monitors their status so that it can report failure to the remote end. This change removes the complicated procedures on the grounds of the following observations: - If everything is OK, rev-list closes its output pipe end, upon which pack-objects (which reads from the pipe) sees EOF and terminates itself, closing its output (and error) pipes. upload-pack reads from both until it sees EOF in both. It collects the exit codes of the child processes (which indicate success) and terminates successfully. - If rev-list sees an error, it closes its output and terminates with failure. pack-objects sees EOF in its input and terminates successfully. Again upload-pack reads its inputs until EOF. When it now collects the exit codes of its child processes, it notices the failure of rev-list and signals failure to the remote end. - If pack-objects sees an error, it terminates with failure. Since this breaks the pipe to rev-list, rev-list is killed with SIGPIPE. upload-pack reads its input until EOF, then collects the exit codes of the child processes, notices their failures, and signals failure to the remote end. - If upload-pack itself dies unexpectedly, pack-objects is killed with SIGPIPE, and subsequently also rev-list. The upshot of this is that precise monitoring of child processes is not required because both terminate if either one of them dies unexpectedly. This allows us to use finish_command() and finish_async() instead of an explicit waitpid(2) call. The change is smaller than it looks because most of it only reduces the indentation of a large part of the inner loop. Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-11-05 03:46:48 +08:00
sleep(1);
}
upload-pack: Use finish_{command,async}() instead of waitpid(). upload-pack spawns two processes, rev-list and pack-objects, and carefully monitors their status so that it can report failure to the remote end. This change removes the complicated procedures on the grounds of the following observations: - If everything is OK, rev-list closes its output pipe end, upon which pack-objects (which reads from the pipe) sees EOF and terminates itself, closing its output (and error) pipes. upload-pack reads from both until it sees EOF in both. It collects the exit codes of the child processes (which indicate success) and terminates successfully. - If rev-list sees an error, it closes its output and terminates with failure. pack-objects sees EOF in its input and terminates successfully. Again upload-pack reads its inputs until EOF. When it now collects the exit codes of its child processes, it notices the failure of rev-list and signals failure to the remote end. - If pack-objects sees an error, it terminates with failure. Since this breaks the pipe to rev-list, rev-list is killed with SIGPIPE. upload-pack reads its input until EOF, then collects the exit codes of the child processes, notices their failures, and signals failure to the remote end. - If upload-pack itself dies unexpectedly, pack-objects is killed with SIGPIPE, and subsequently also rev-list. The upshot of this is that precise monitoring of child processes is not required because both terminate if either one of them dies unexpectedly. This allows us to use finish_command() and finish_async() instead of an explicit waitpid(2) call. The change is smaller than it looks because most of it only reduces the indentation of a large part of the inner loop. Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-11-05 03:46:48 +08:00
continue;
}
if (0 <= pe && (pfd[pe].revents & (POLLIN|POLLHUP))) {
/* Status ready; we ship that in the side-band
* or dump to the standard error.
*/
sz = xread(pack_objects.err, progress,
sizeof(progress));
if (0 < sz)
send_client_data(2, progress, sz,
pack_data->use_sideband);
else if (sz == 0) {
close(pack_objects.err);
pack_objects.err = -1;
}
else
goto fail;
/* give priority to status messages */
continue;
}
upload-pack: Use finish_{command,async}() instead of waitpid(). upload-pack spawns two processes, rev-list and pack-objects, and carefully monitors their status so that it can report failure to the remote end. This change removes the complicated procedures on the grounds of the following observations: - If everything is OK, rev-list closes its output pipe end, upon which pack-objects (which reads from the pipe) sees EOF and terminates itself, closing its output (and error) pipes. upload-pack reads from both until it sees EOF in both. It collects the exit codes of the child processes (which indicate success) and terminates successfully. - If rev-list sees an error, it closes its output and terminates with failure. pack-objects sees EOF in its input and terminates successfully. Again upload-pack reads its inputs until EOF. When it now collects the exit codes of its child processes, it notices the failure of rev-list and signals failure to the remote end. - If pack-objects sees an error, it terminates with failure. Since this breaks the pipe to rev-list, rev-list is killed with SIGPIPE. upload-pack reads its input until EOF, then collects the exit codes of the child processes, notices their failures, and signals failure to the remote end. - If upload-pack itself dies unexpectedly, pack-objects is killed with SIGPIPE, and subsequently also rev-list. The upshot of this is that precise monitoring of child processes is not required because both terminate if either one of them dies unexpectedly. This allows us to use finish_command() and finish_async() instead of an explicit waitpid(2) call. The change is smaller than it looks because most of it only reduces the indentation of a large part of the inner loop. Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-11-05 03:46:48 +08:00
if (0 <= pu && (pfd[pu].revents & (POLLIN|POLLHUP))) {
/* Data ready; we keep the last byte to ourselves
* in case we detect broken rev-list, so that we
* can leave the stream corrupted. This is
* unfortunate -- unpack-objects would happily
* accept a valid packdata with trailing garbage,
* so appending garbage after we pass all the
* pack data is not good enough to signal
* breakage to downstream.
*/
char *cp = data;
ssize_t outsz = 0;
if (0 <= buffered) {
*cp++ = buffered;
outsz++;
}
upload-pack: Use finish_{command,async}() instead of waitpid(). upload-pack spawns two processes, rev-list and pack-objects, and carefully monitors their status so that it can report failure to the remote end. This change removes the complicated procedures on the grounds of the following observations: - If everything is OK, rev-list closes its output pipe end, upon which pack-objects (which reads from the pipe) sees EOF and terminates itself, closing its output (and error) pipes. upload-pack reads from both until it sees EOF in both. It collects the exit codes of the child processes (which indicate success) and terminates successfully. - If rev-list sees an error, it closes its output and terminates with failure. pack-objects sees EOF in its input and terminates successfully. Again upload-pack reads its inputs until EOF. When it now collects the exit codes of its child processes, it notices the failure of rev-list and signals failure to the remote end. - If pack-objects sees an error, it terminates with failure. Since this breaks the pipe to rev-list, rev-list is killed with SIGPIPE. upload-pack reads its input until EOF, then collects the exit codes of the child processes, notices their failures, and signals failure to the remote end. - If upload-pack itself dies unexpectedly, pack-objects is killed with SIGPIPE, and subsequently also rev-list. The upshot of this is that precise monitoring of child processes is not required because both terminate if either one of them dies unexpectedly. This allows us to use finish_command() and finish_async() instead of an explicit waitpid(2) call. The change is smaller than it looks because most of it only reduces the indentation of a large part of the inner loop. Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-11-05 03:46:48 +08:00
sz = xread(pack_objects.out, cp,
sizeof(data) - outsz);
if (0 < sz)
;
upload-pack: Use finish_{command,async}() instead of waitpid(). upload-pack spawns two processes, rev-list and pack-objects, and carefully monitors their status so that it can report failure to the remote end. This change removes the complicated procedures on the grounds of the following observations: - If everything is OK, rev-list closes its output pipe end, upon which pack-objects (which reads from the pipe) sees EOF and terminates itself, closing its output (and error) pipes. upload-pack reads from both until it sees EOF in both. It collects the exit codes of the child processes (which indicate success) and terminates successfully. - If rev-list sees an error, it closes its output and terminates with failure. pack-objects sees EOF in its input and terminates successfully. Again upload-pack reads its inputs until EOF. When it now collects the exit codes of its child processes, it notices the failure of rev-list and signals failure to the remote end. - If pack-objects sees an error, it terminates with failure. Since this breaks the pipe to rev-list, rev-list is killed with SIGPIPE. upload-pack reads its input until EOF, then collects the exit codes of the child processes, notices their failures, and signals failure to the remote end. - If upload-pack itself dies unexpectedly, pack-objects is killed with SIGPIPE, and subsequently also rev-list. The upshot of this is that precise monitoring of child processes is not required because both terminate if either one of them dies unexpectedly. This allows us to use finish_command() and finish_async() instead of an explicit waitpid(2) call. The change is smaller than it looks because most of it only reduces the indentation of a large part of the inner loop. Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-11-05 03:46:48 +08:00
else if (sz == 0) {
close(pack_objects.out);
pack_objects.out = -1;
}
upload-pack: Use finish_{command,async}() instead of waitpid(). upload-pack spawns two processes, rev-list and pack-objects, and carefully monitors their status so that it can report failure to the remote end. This change removes the complicated procedures on the grounds of the following observations: - If everything is OK, rev-list closes its output pipe end, upon which pack-objects (which reads from the pipe) sees EOF and terminates itself, closing its output (and error) pipes. upload-pack reads from both until it sees EOF in both. It collects the exit codes of the child processes (which indicate success) and terminates successfully. - If rev-list sees an error, it closes its output and terminates with failure. pack-objects sees EOF in its input and terminates successfully. Again upload-pack reads its inputs until EOF. When it now collects the exit codes of its child processes, it notices the failure of rev-list and signals failure to the remote end. - If pack-objects sees an error, it terminates with failure. Since this breaks the pipe to rev-list, rev-list is killed with SIGPIPE. upload-pack reads its input until EOF, then collects the exit codes of the child processes, notices their failures, and signals failure to the remote end. - If upload-pack itself dies unexpectedly, pack-objects is killed with SIGPIPE, and subsequently also rev-list. The upshot of this is that precise monitoring of child processes is not required because both terminate if either one of them dies unexpectedly. This allows us to use finish_command() and finish_async() instead of an explicit waitpid(2) call. The change is smaller than it looks because most of it only reduces the indentation of a large part of the inner loop. Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-11-05 03:46:48 +08:00
else
goto fail;
sz += outsz;
if (1 < sz) {
buffered = data[sz-1] & 0xFF;
sz--;
}
upload-pack: Use finish_{command,async}() instead of waitpid(). upload-pack spawns two processes, rev-list and pack-objects, and carefully monitors their status so that it can report failure to the remote end. This change removes the complicated procedures on the grounds of the following observations: - If everything is OK, rev-list closes its output pipe end, upon which pack-objects (which reads from the pipe) sees EOF and terminates itself, closing its output (and error) pipes. upload-pack reads from both until it sees EOF in both. It collects the exit codes of the child processes (which indicate success) and terminates successfully. - If rev-list sees an error, it closes its output and terminates with failure. pack-objects sees EOF in its input and terminates successfully. Again upload-pack reads its inputs until EOF. When it now collects the exit codes of its child processes, it notices the failure of rev-list and signals failure to the remote end. - If pack-objects sees an error, it terminates with failure. Since this breaks the pipe to rev-list, rev-list is killed with SIGPIPE. upload-pack reads its input until EOF, then collects the exit codes of the child processes, notices their failures, and signals failure to the remote end. - If upload-pack itself dies unexpectedly, pack-objects is killed with SIGPIPE, and subsequently also rev-list. The upshot of this is that precise monitoring of child processes is not required because both terminate if either one of them dies unexpectedly. This allows us to use finish_command() and finish_async() instead of an explicit waitpid(2) call. The change is smaller than it looks because most of it only reduces the indentation of a large part of the inner loop. Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-11-05 03:46:48 +08:00
else
buffered = -1;
send_client_data(1, data, sz,
pack_data->use_sideband);
upload-pack: Use finish_{command,async}() instead of waitpid(). upload-pack spawns two processes, rev-list and pack-objects, and carefully monitors their status so that it can report failure to the remote end. This change removes the complicated procedures on the grounds of the following observations: - If everything is OK, rev-list closes its output pipe end, upon which pack-objects (which reads from the pipe) sees EOF and terminates itself, closing its output (and error) pipes. upload-pack reads from both until it sees EOF in both. It collects the exit codes of the child processes (which indicate success) and terminates successfully. - If rev-list sees an error, it closes its output and terminates with failure. pack-objects sees EOF in its input and terminates successfully. Again upload-pack reads its inputs until EOF. When it now collects the exit codes of its child processes, it notices the failure of rev-list and signals failure to the remote end. - If pack-objects sees an error, it terminates with failure. Since this breaks the pipe to rev-list, rev-list is killed with SIGPIPE. upload-pack reads its input until EOF, then collects the exit codes of the child processes, notices their failures, and signals failure to the remote end. - If upload-pack itself dies unexpectedly, pack-objects is killed with SIGPIPE, and subsequently also rev-list. The upshot of this is that precise monitoring of child processes is not required because both terminate if either one of them dies unexpectedly. This allows us to use finish_command() and finish_async() instead of an explicit waitpid(2) call. The change is smaller than it looks because most of it only reduces the indentation of a large part of the inner loop. Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-11-05 03:46:48 +08:00
}
/*
* We hit the keepalive timeout without saying anything; send
* an empty message on the data sideband just to let the other
* side know we're still working on it, but don't have any data
* yet.
*
* If we don't have a sideband channel, there's no room in the
* protocol to say anything, so those clients are just out of
* luck.
*/
if (!ret && pack_data->use_sideband) {
static const char buf[] = "0005\1";
write_or_die(1, buf, 5);
}
upload-pack: Use finish_{command,async}() instead of waitpid(). upload-pack spawns two processes, rev-list and pack-objects, and carefully monitors their status so that it can report failure to the remote end. This change removes the complicated procedures on the grounds of the following observations: - If everything is OK, rev-list closes its output pipe end, upon which pack-objects (which reads from the pipe) sees EOF and terminates itself, closing its output (and error) pipes. upload-pack reads from both until it sees EOF in both. It collects the exit codes of the child processes (which indicate success) and terminates successfully. - If rev-list sees an error, it closes its output and terminates with failure. pack-objects sees EOF in its input and terminates successfully. Again upload-pack reads its inputs until EOF. When it now collects the exit codes of its child processes, it notices the failure of rev-list and signals failure to the remote end. - If pack-objects sees an error, it terminates with failure. Since this breaks the pipe to rev-list, rev-list is killed with SIGPIPE. upload-pack reads its input until EOF, then collects the exit codes of the child processes, notices their failures, and signals failure to the remote end. - If upload-pack itself dies unexpectedly, pack-objects is killed with SIGPIPE, and subsequently also rev-list. The upshot of this is that precise monitoring of child processes is not required because both terminate if either one of them dies unexpectedly. This allows us to use finish_command() and finish_async() instead of an explicit waitpid(2) call. The change is smaller than it looks because most of it only reduces the indentation of a large part of the inner loop. Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-11-05 03:46:48 +08:00
}
upload-pack: Use finish_{command,async}() instead of waitpid(). upload-pack spawns two processes, rev-list and pack-objects, and carefully monitors their status so that it can report failure to the remote end. This change removes the complicated procedures on the grounds of the following observations: - If everything is OK, rev-list closes its output pipe end, upon which pack-objects (which reads from the pipe) sees EOF and terminates itself, closing its output (and error) pipes. upload-pack reads from both until it sees EOF in both. It collects the exit codes of the child processes (which indicate success) and terminates successfully. - If rev-list sees an error, it closes its output and terminates with failure. pack-objects sees EOF in its input and terminates successfully. Again upload-pack reads its inputs until EOF. When it now collects the exit codes of its child processes, it notices the failure of rev-list and signals failure to the remote end. - If pack-objects sees an error, it terminates with failure. Since this breaks the pipe to rev-list, rev-list is killed with SIGPIPE. upload-pack reads its input until EOF, then collects the exit codes of the child processes, notices their failures, and signals failure to the remote end. - If upload-pack itself dies unexpectedly, pack-objects is killed with SIGPIPE, and subsequently also rev-list. The upshot of this is that precise monitoring of child processes is not required because both terminate if either one of them dies unexpectedly. This allows us to use finish_command() and finish_async() instead of an explicit waitpid(2) call. The change is smaller than it looks because most of it only reduces the indentation of a large part of the inner loop. Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-11-05 03:46:48 +08:00
if (finish_command(&pack_objects)) {
error("git upload-pack: git-pack-objects died with error.");
upload-pack: Use finish_{command,async}() instead of waitpid(). upload-pack spawns two processes, rev-list and pack-objects, and carefully monitors their status so that it can report failure to the remote end. This change removes the complicated procedures on the grounds of the following observations: - If everything is OK, rev-list closes its output pipe end, upon which pack-objects (which reads from the pipe) sees EOF and terminates itself, closing its output (and error) pipes. upload-pack reads from both until it sees EOF in both. It collects the exit codes of the child processes (which indicate success) and terminates successfully. - If rev-list sees an error, it closes its output and terminates with failure. pack-objects sees EOF in its input and terminates successfully. Again upload-pack reads its inputs until EOF. When it now collects the exit codes of its child processes, it notices the failure of rev-list and signals failure to the remote end. - If pack-objects sees an error, it terminates with failure. Since this breaks the pipe to rev-list, rev-list is killed with SIGPIPE. upload-pack reads its input until EOF, then collects the exit codes of the child processes, notices their failures, and signals failure to the remote end. - If upload-pack itself dies unexpectedly, pack-objects is killed with SIGPIPE, and subsequently also rev-list. The upshot of this is that precise monitoring of child processes is not required because both terminate if either one of them dies unexpectedly. This allows us to use finish_command() and finish_async() instead of an explicit waitpid(2) call. The change is smaller than it looks because most of it only reduces the indentation of a large part of the inner loop. Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-11-05 03:46:48 +08:00
goto fail;
}
upload-pack: Use finish_{command,async}() instead of waitpid(). upload-pack spawns two processes, rev-list and pack-objects, and carefully monitors their status so that it can report failure to the remote end. This change removes the complicated procedures on the grounds of the following observations: - If everything is OK, rev-list closes its output pipe end, upon which pack-objects (which reads from the pipe) sees EOF and terminates itself, closing its output (and error) pipes. upload-pack reads from both until it sees EOF in both. It collects the exit codes of the child processes (which indicate success) and terminates successfully. - If rev-list sees an error, it closes its output and terminates with failure. pack-objects sees EOF in its input and terminates successfully. Again upload-pack reads its inputs until EOF. When it now collects the exit codes of its child processes, it notices the failure of rev-list and signals failure to the remote end. - If pack-objects sees an error, it terminates with failure. Since this breaks the pipe to rev-list, rev-list is killed with SIGPIPE. upload-pack reads its input until EOF, then collects the exit codes of the child processes, notices their failures, and signals failure to the remote end. - If upload-pack itself dies unexpectedly, pack-objects is killed with SIGPIPE, and subsequently also rev-list. The upshot of this is that precise monitoring of child processes is not required because both terminate if either one of them dies unexpectedly. This allows us to use finish_command() and finish_async() instead of an explicit waitpid(2) call. The change is smaller than it looks because most of it only reduces the indentation of a large part of the inner loop. Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-11-05 03:46:48 +08:00
/* flush the data */
if (0 <= buffered) {
data[0] = buffered;
send_client_data(1, data, 1,
pack_data->use_sideband);
upload-pack: Use finish_{command,async}() instead of waitpid(). upload-pack spawns two processes, rev-list and pack-objects, and carefully monitors their status so that it can report failure to the remote end. This change removes the complicated procedures on the grounds of the following observations: - If everything is OK, rev-list closes its output pipe end, upon which pack-objects (which reads from the pipe) sees EOF and terminates itself, closing its output (and error) pipes. upload-pack reads from both until it sees EOF in both. It collects the exit codes of the child processes (which indicate success) and terminates successfully. - If rev-list sees an error, it closes its output and terminates with failure. pack-objects sees EOF in its input and terminates successfully. Again upload-pack reads its inputs until EOF. When it now collects the exit codes of its child processes, it notices the failure of rev-list and signals failure to the remote end. - If pack-objects sees an error, it terminates with failure. Since this breaks the pipe to rev-list, rev-list is killed with SIGPIPE. upload-pack reads its input until EOF, then collects the exit codes of the child processes, notices their failures, and signals failure to the remote end. - If upload-pack itself dies unexpectedly, pack-objects is killed with SIGPIPE, and subsequently also rev-list. The upshot of this is that precise monitoring of child processes is not required because both terminate if either one of them dies unexpectedly. This allows us to use finish_command() and finish_async() instead of an explicit waitpid(2) call. The change is smaller than it looks because most of it only reduces the indentation of a large part of the inner loop. Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-11-05 03:46:48 +08:00
fprintf(stderr, "flushed.\n");
}
if (pack_data->use_sideband)
upload-pack: Use finish_{command,async}() instead of waitpid(). upload-pack spawns two processes, rev-list and pack-objects, and carefully monitors their status so that it can report failure to the remote end. This change removes the complicated procedures on the grounds of the following observations: - If everything is OK, rev-list closes its output pipe end, upon which pack-objects (which reads from the pipe) sees EOF and terminates itself, closing its output (and error) pipes. upload-pack reads from both until it sees EOF in both. It collects the exit codes of the child processes (which indicate success) and terminates successfully. - If rev-list sees an error, it closes its output and terminates with failure. pack-objects sees EOF in its input and terminates successfully. Again upload-pack reads its inputs until EOF. When it now collects the exit codes of its child processes, it notices the failure of rev-list and signals failure to the remote end. - If pack-objects sees an error, it terminates with failure. Since this breaks the pipe to rev-list, rev-list is killed with SIGPIPE. upload-pack reads its input until EOF, then collects the exit codes of the child processes, notices their failures, and signals failure to the remote end. - If upload-pack itself dies unexpectedly, pack-objects is killed with SIGPIPE, and subsequently also rev-list. The upshot of this is that precise monitoring of child processes is not required because both terminate if either one of them dies unexpectedly. This allows us to use finish_command() and finish_async() instead of an explicit waitpid(2) call. The change is smaller than it looks because most of it only reduces the indentation of a large part of the inner loop. Signed-off-by: Johannes Sixt <johannes.sixt@telecom.at> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-11-05 03:46:48 +08:00
packet_flush(1);
return;
fail:
send_client_data(3, abort_msg, sizeof(abort_msg),
pack_data->use_sideband);
die("git upload-pack: %s", abort_msg);
}
static int got_oid(const char *hex, struct object_id *oid,
struct object_array *have_obj)
{
struct object *o;
int we_knew_they_have = 0;
if (get_oid_hex(hex, oid))
die("git upload-pack: expected SHA1 object, got '%s'", hex);
if (!has_object_file(oid))
return -1;
o = parse_object(the_repository, oid);
if (!o)
die("oops (%s)", oid_to_hex(oid));
2006-08-13 13:16:51 +08:00
if (o->type == OBJ_COMMIT) {
struct commit_list *parents;
struct commit *commit = (struct commit *)o;
if (o->flags & THEY_HAVE)
we_knew_they_have = 1;
else
o->flags |= THEY_HAVE;
if (!oldest_have || (commit->date < oldest_have))
oldest_have = commit->date;
for (parents = commit->parents;
parents;
parents = parents->next)
parents->item->object.flags |= THEY_HAVE;
}
if (!we_knew_they_have) {
add_object_array(o, NULL, have_obj);
return 1;
}
return 0;
}
static int ok_to_give_up(const struct object_array *have_obj,
struct object_array *want_obj)
{
commit-reach: make can_all_from_reach... linear The can_all_from_reach_with_flags() algorithm is currently quadratic in the worst case, because it calls the reachable() method for every 'from' without tracking which commits have already been walked or which can already reach a commit in 'to'. Rewrite the algorithm to walk each commit a constant number of times. We also add some optimizations that should work for the main consumer of this method: fetch negotitation (haves/wants). The first step includes using a depth-first-search (DFS) from each 'from' commit, sorted by ascending generation number. We do not walk beyond the minimum generation number or the minimum commit date. This DFS is likely to be faster than the existing reachable() method because we expect previous ref values to be along the first-parent history. If we find a target commit, then we mark everything in the DFS stack as a RESULT. This expands the set of targets for the other 'from' commits. We also mark the visited commits using 'assign_flag' to prevent re- walking the same commits. We still need to clear our flags at the end, which is why we will have a total of three visits to each commit. Performance was measured on the Linux repository using 'test-tool reach can_all_from_reach'. The input included rows seeded by tag values. The "small" case included X-rows as v4.[0-9]* and Y-rows as v3.[0-9]*. This mimics a (very large) fetch that says "I have all major v3 releases and want all major v4 releases." The "large" case included X-rows as "v4.*" and Y-rows as "v3.*". This adds all release-candidate tags to the set, which does not greatly increase the number of objects that are considered, but does increase the number of 'from' commits, demonstrating the quadratic nature of the previous code. Small Case: Before: 1.52 s After: 0.26 s Large Case: Before: 3.50 s After: 0.27 s Note how the time increases between the two cases in the two versions. The new code increases relative to the number of commits that need to be walked, but not directly relative to the number of 'from' commits. Signed-off-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-21 00:33:28 +08:00
uint32_t min_generation = GENERATION_NUMBER_ZERO;
if (!have_obj->nr)
return 0;
return can_all_from_reach_with_flag(want_obj, THEY_HAVE,
commit-reach: make can_all_from_reach... linear The can_all_from_reach_with_flags() algorithm is currently quadratic in the worst case, because it calls the reachable() method for every 'from' without tracking which commits have already been walked or which can already reach a commit in 'to'. Rewrite the algorithm to walk each commit a constant number of times. We also add some optimizations that should work for the main consumer of this method: fetch negotitation (haves/wants). The first step includes using a depth-first-search (DFS) from each 'from' commit, sorted by ascending generation number. We do not walk beyond the minimum generation number or the minimum commit date. This DFS is likely to be faster than the existing reachable() method because we expect previous ref values to be along the first-parent history. If we find a target commit, then we mark everything in the DFS stack as a RESULT. This expands the set of targets for the other 'from' commits. We also mark the visited commits using 'assign_flag' to prevent re- walking the same commits. We still need to clear our flags at the end, which is why we will have a total of three visits to each commit. Performance was measured on the Linux repository using 'test-tool reach can_all_from_reach'. The input included rows seeded by tag values. The "small" case included X-rows as v4.[0-9]* and Y-rows as v3.[0-9]*. This mimics a (very large) fetch that says "I have all major v3 releases and want all major v4 releases." The "large" case included X-rows as "v4.*" and Y-rows as "v3.*". This adds all release-candidate tags to the set, which does not greatly increase the number of objects that are considered, but does increase the number of 'from' commits, demonstrating the quadratic nature of the previous code. Small Case: Before: 1.52 s After: 0.26 s Large Case: Before: 3.50 s After: 0.27 s Note how the time increases between the two cases in the two versions. The new code increases relative to the number of commits that need to be walked, but not directly relative to the number of 'from' commits. Signed-off-by: Derrick Stolee <dstolee@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-21 00:33:28 +08:00
COMMON_KNOWN, oldest_have,
min_generation);
}
static int get_common_commits(struct upload_pack_data *data,
struct packet_reader *reader)
{
struct object_id oid;
char last_hex[GIT_MAX_HEXSZ + 1];
int got_common = 0;
int got_other = 0;
int sent_ready = 0;
save_commit_buffer = 0;
for (;;) {
const char *arg;
reset_timeout(data->timeout);
if (packet_reader_read(reader) != PACKET_READ_NORMAL) {
if (data->multi_ack == MULTI_ACK_DETAILED
&& got_common
&& !got_other
&& ok_to_give_up(&data->have_obj, &data->want_obj)) {
sent_ready = 1;
packet_write_fmt(1, "ACK %s ready\n", last_hex);
}
if (data->have_obj.nr == 0 || data->multi_ack)
packet_write_fmt(1, "NAK\n");
if (data->no_done && sent_ready) {
packet_write_fmt(1, "ACK %s\n", last_hex);
return 0;
}
if (data->stateless_rpc)
exit(0);
got_common = 0;
got_other = 0;
continue;
}
if (skip_prefix(reader->line, "have ", &arg)) {
switch (got_oid(arg, &oid, &data->have_obj)) {
case -1: /* they have what we do not */
got_other = 1;
if (data->multi_ack
&& ok_to_give_up(&data->have_obj, &data->want_obj)) {
const char *hex = oid_to_hex(&oid);
if (data->multi_ack == MULTI_ACK_DETAILED) {
sent_ready = 1;
packet_write_fmt(1, "ACK %s ready\n", hex);
} else
packet_write_fmt(1, "ACK %s continue\n", hex);
Add multi_ack_detailed capability to fetch-pack/upload-pack When multi_ack_detailed is enabled the ACK continue messages returned by the remote upload-pack are broken out to describe the different states within the peer. This permits the client to better understand the server's in-memory state. The fetch-pack/upload-pack protocol now looks like: NAK --------------------------------- Always sent in response to "done" if there was no common base selected from the "have" lines (or no have lines were sent). * no multi_ack or multi_ack_detailed: Sent when the client has sent a pkt-line flush ("0000") and the server has not yet found a common base object. * either multi_ack or multi_ack_detailed: Always sent in response to a pkt-line flush. ACK %s ----------------------------------- * no multi_ack or multi_ack_detailed: Sent in response to "have" when the object exists on the remote side and is therefore an object in common between the peers. The argument is the SHA-1 of the common object. * either multi_ack or multi_ack_detailed: Sent in response to "done" if there are common objects. The argument is the last SHA-1 determined to be common. ACK %s continue ----------------------------------- * multi_ack only: Sent in response to "have". The remote side wants the client to consider this object as common, and immediately stop transmitting additional "have" lines for objects that are reachable from it. The reason the client should stop is not given, but is one of the two cases below available under multi_ack_detailed. ACK %s common ----------------------------------- * multi_ack_detailed only: Sent in response to "have". Both sides have this object. Like with "ACK %s continue" above the client should stop sending have lines reachable for objects from the argument. ACK %s ready ----------------------------------- * multi_ack_detailed only: Sent in response to "have". The client should stop transmitting objects which are reachable from the argument, and send "done" soon to get the objects. If the remote side has the specified object, it should first send an "ACK %s common" message prior to sending "ACK %s ready". Clients may still submit additional "have" lines if there are more side branches for the client to explore that might be added to the common set and reduce the number of objects to transfer. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-10-31 08:47:25 +08:00
}
break;
default:
got_common = 1;
oid_to_hex_r(last_hex, &oid);
if (data->multi_ack == MULTI_ACK_DETAILED)
packet_write_fmt(1, "ACK %s common\n", last_hex);
else if (data->multi_ack)
packet_write_fmt(1, "ACK %s continue\n", last_hex);
else if (data->have_obj.nr == 1)
packet_write_fmt(1, "ACK %s\n", last_hex);
break;
}
continue;
}
if (!strcmp(reader->line, "done")) {
if (data->have_obj.nr > 0) {
if (data->multi_ack)
packet_write_fmt(1, "ACK %s\n", last_hex);
return 0;
}
packet_write_fmt(1, "NAK\n");
return -1;
}
die("git upload-pack: expected SHA1 list, got '%s'", reader->line);
}
}
static int is_our_ref(struct object *o)
{
int allow_hidden_ref = (allow_unadvertised_object_request &
(ALLOW_TIP_SHA1 | ALLOW_REACHABLE_SHA1));
return o->flags & ((allow_hidden_ref ? HIDDEN_REF : 0) | OUR_REF);
}
/*
* on successful case, it's up to the caller to close cmd->out
*/
static int do_reachable_revlist(struct child_process *cmd,
struct object_array *src,
struct object_array *reachable)
{
static const char *argv[] = {
"rev-list", "--stdin", NULL,
};
struct object *o;
char namebuf[GIT_MAX_HEXSZ + 2]; /* ^ + hash + LF */
int i;
const unsigned hexsz = the_hash_algo->hexsz;
cmd->argv = argv;
cmd->git_cmd = 1;
cmd->no_stderr = 1;
cmd->in = -1;
cmd->out = -1;
/*
* If the next rev-list --stdin encounters an unknown commit,
* it terminates, which will cause SIGPIPE in the write loop
* below.
*/
sigchain_push(SIGPIPE, SIG_IGN);
if (start_command(cmd))
goto error;
namebuf[0] = '^';
namebuf[hexsz + 1] = '\n';
for (i = get_max_object_index(); 0 < i; ) {
o = get_indexed_object(--i);
if (!o)
continue;
if (reachable && o->type == OBJ_COMMIT)
o->flags &= ~TMP_MARK;
if (!is_our_ref(o))
continue;
memcpy(namebuf + 1, oid_to_hex(&o->oid), hexsz);
if (write_in_full(cmd->in, namebuf, hexsz + 2) < 0)
goto error;
}
namebuf[hexsz] = '\n';
for (i = 0; i < src->nr; i++) {
o = src->objects[i].item;
if (is_our_ref(o)) {
if (reachable)
add_object_array(o, NULL, reachable);
continue;
}
if (reachable && o->type == OBJ_COMMIT)
o->flags |= TMP_MARK;
memcpy(namebuf, oid_to_hex(&o->oid), hexsz);
if (write_in_full(cmd->in, namebuf, hexsz + 1) < 0)
goto error;
}
close(cmd->in);
cmd->in = -1;
sigchain_pop(SIGPIPE);
return 0;
error:
sigchain_pop(SIGPIPE);
if (cmd->in >= 0)
close(cmd->in);
if (cmd->out >= 0)
close(cmd->out);
return -1;
}
static int get_reachable_list(struct object_array *src,
struct object_array *reachable)
{
struct child_process cmd = CHILD_PROCESS_INIT;
int i;
struct object *o;
char namebuf[GIT_MAX_HEXSZ + 2]; /* ^ + hash + LF */
const unsigned hexsz = the_hash_algo->hexsz;
if (do_reachable_revlist(&cmd, src, reachable) < 0)
return -1;
while ((i = read_in_full(cmd.out, namebuf, hexsz + 1)) == hexsz + 1) {
struct object_id oid;
const char *p;
if (parse_oid_hex(namebuf, &oid, &p) || *p != '\n')
break;
o = lookup_object(the_repository, &oid);
if (o && o->type == OBJ_COMMIT) {
o->flags &= ~TMP_MARK;
}
}
for (i = get_max_object_index(); 0 < i; i--) {
o = get_indexed_object(i - 1);
if (o && o->type == OBJ_COMMIT &&
(o->flags & TMP_MARK)) {
add_object_array(o, NULL, reachable);
o->flags &= ~TMP_MARK;
}
}
close(cmd.out);
if (finish_command(&cmd))
return -1;
return 0;
}
static int has_unreachable(struct object_array *src)
{
struct child_process cmd = CHILD_PROCESS_INIT;
char buf[1];
int i;
if (do_reachable_revlist(&cmd, src, NULL) < 0)
return 1;
/*
* The commits out of the rev-list are not ancestors of
* our ref.
*/
i = read_in_full(cmd.out, buf, 1);
if (i)
goto error;
close(cmd.out);
cmd.out = -1;
/*
* rev-list may have died by encountering a bad commit
* in the history, in which case we do want to bail out
* even when it showed no commit.
*/
if (finish_command(&cmd))
goto error;
/* All the non-tip ones are ancestors of what we advertised */
return 0;
error:
sigchain_pop(SIGPIPE);
if (cmd.out >= 0)
close(cmd.out);
return 1;
}
static void check_non_tip(struct upload_pack_data *data)
{
int i;
/*
* In the normal in-process case without
* uploadpack.allowReachableSHA1InWant,
* non-tip requests can never happen.
*/
if (!data->stateless_rpc
&& !(allow_unadvertised_object_request & ALLOW_REACHABLE_SHA1))
goto error;
if (!has_unreachable(&data->want_obj))
/* All the non-tip ones are ancestors of what we advertised */
return;
error:
/* Pick one of them (we know there at least is one) */
for (i = 0; i < data->want_obj.nr; i++) {
struct object *o = data->want_obj.objects[i].item;
upload-pack: send ERR packet for non-tip objects Commit bdb31eada7 (upload-pack: report "not our ref" to client, 2017-02-23) catches the case where a client asks for an object we don't have, and issues a message that the client can show to the user (in addition to dying and writing to stderr). There's a similar case (with the same message) when the client asks for an object which we _do_ have, but which isn't a ref tip (or isn't reachable, when uploadpack.allowReachableSHA1InWant is true). Let's give that one the same treatment, for the same reason (namely that it's more informative to the client than just hanging up, since they won't see our stderr over some protocols). There are two tests here. We cover it most directly in t5530 by invoking upload-pack, which matches the existing "not our ref" test. But a more end-to-end check is that "git fetch" actually shows the message to the client. We're already checking in t5516 that this case fails, so we can just check stderr there, too. Note that even after we started ignoring SIGPIPE in 8bf4becf0c, this could in theory still be racy as described in that commit (because we die() on write failures before pumping the connection for any ERR packets). In practice this should be OK for this case. The server will not actually check reachability until it has received our whole group of "want" lines. And since we have no objects in the repository, we won't send any "have" lines, meaning we're always waiting to read the server response. Note also that this case cannot happen in the v2 protocol, since it allows any available object to be requested. However, we don't have to take any steps to protect against the upcoming GIT_TEST_PROTOCOL_VERSION in our tests: - the tests in t5516 would already need to be skipped under v2, and that is covered by ab0c5f5096 (tests: always test fetch of unreachable with v0, 2019-02-25) - the tests in t5530 invoke upload-pack directly, which will continue to default to v0. Eventually we may have a test setting which uses v2 even for bare upload-pack calls, but we can't override it here until we know what the setting looks like. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-04-13 13:53:34 +08:00
if (!is_our_ref(o)) {
packet_writer_error(&data->writer,
upload-pack: send ERR packet for non-tip objects Commit bdb31eada7 (upload-pack: report "not our ref" to client, 2017-02-23) catches the case where a client asks for an object we don't have, and issues a message that the client can show to the user (in addition to dying and writing to stderr). There's a similar case (with the same message) when the client asks for an object which we _do_ have, but which isn't a ref tip (or isn't reachable, when uploadpack.allowReachableSHA1InWant is true). Let's give that one the same treatment, for the same reason (namely that it's more informative to the client than just hanging up, since they won't see our stderr over some protocols). There are two tests here. We cover it most directly in t5530 by invoking upload-pack, which matches the existing "not our ref" test. But a more end-to-end check is that "git fetch" actually shows the message to the client. We're already checking in t5516 that this case fails, so we can just check stderr there, too. Note that even after we started ignoring SIGPIPE in 8bf4becf0c, this could in theory still be racy as described in that commit (because we die() on write failures before pumping the connection for any ERR packets). In practice this should be OK for this case. The server will not actually check reachability until it has received our whole group of "want" lines. And since we have no objects in the repository, we won't send any "have" lines, meaning we're always waiting to read the server response. Note also that this case cannot happen in the v2 protocol, since it allows any available object to be requested. However, we don't have to take any steps to protect against the upcoming GIT_TEST_PROTOCOL_VERSION in our tests: - the tests in t5516 would already need to be skipped under v2, and that is covered by ab0c5f5096 (tests: always test fetch of unreachable with v0, 2019-02-25) - the tests in t5530 invoke upload-pack directly, which will continue to default to v0. Eventually we may have a test setting which uses v2 even for bare upload-pack calls, but we can't override it here until we know what the setting looks like. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-04-13 13:53:34 +08:00
"upload-pack: not our ref %s",
oid_to_hex(&o->oid));
die("git upload-pack: not our ref %s",
oid_to_hex(&o->oid));
upload-pack: send ERR packet for non-tip objects Commit bdb31eada7 (upload-pack: report "not our ref" to client, 2017-02-23) catches the case where a client asks for an object we don't have, and issues a message that the client can show to the user (in addition to dying and writing to stderr). There's a similar case (with the same message) when the client asks for an object which we _do_ have, but which isn't a ref tip (or isn't reachable, when uploadpack.allowReachableSHA1InWant is true). Let's give that one the same treatment, for the same reason (namely that it's more informative to the client than just hanging up, since they won't see our stderr over some protocols). There are two tests here. We cover it most directly in t5530 by invoking upload-pack, which matches the existing "not our ref" test. But a more end-to-end check is that "git fetch" actually shows the message to the client. We're already checking in t5516 that this case fails, so we can just check stderr there, too. Note that even after we started ignoring SIGPIPE in 8bf4becf0c, this could in theory still be racy as described in that commit (because we die() on write failures before pumping the connection for any ERR packets). In practice this should be OK for this case. The server will not actually check reachability until it has received our whole group of "want" lines. And since we have no objects in the repository, we won't send any "have" lines, meaning we're always waiting to read the server response. Note also that this case cannot happen in the v2 protocol, since it allows any available object to be requested. However, we don't have to take any steps to protect against the upcoming GIT_TEST_PROTOCOL_VERSION in our tests: - the tests in t5516 would already need to be skipped under v2, and that is covered by ab0c5f5096 (tests: always test fetch of unreachable with v0, 2019-02-25) - the tests in t5530 invoke upload-pack directly, which will continue to default to v0. Eventually we may have a test setting which uses v2 even for bare upload-pack calls, but we can't override it here until we know what the setting looks like. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-04-13 13:53:34 +08:00
}
}
}
static void send_shallow(struct packet_writer *writer,
struct commit_list *result)
{
while (result) {
struct object *object = &result->item->object;
if (!(object->flags & (CLIENT_SHALLOW|NOT_SHALLOW))) {
packet_writer_write(writer, "shallow %s",
oid_to_hex(&object->oid));
register_shallow(the_repository, &object->oid);
shallow_nr++;
}
result = result->next;
}
}
static void send_unshallow(struct packet_writer *writer,
const struct object_array *shallows,
struct object_array *want_obj)
{
int i;
for (i = 0; i < shallows->nr; i++) {
struct object *object = shallows->objects[i].item;
if (object->flags & NOT_SHALLOW) {
struct commit_list *parents;
packet_writer_write(writer, "unshallow %s",
oid_to_hex(&object->oid));
object->flags &= ~CLIENT_SHALLOW;
/*
* We want to _register_ "object" as shallow, but we
* also need to traverse object's parents to deepen a
* shallow clone. Unregister it for now so we can
* parse and add the parents to the want list, then
* re-register it.
*/
unregister_shallow(&object->oid);
object->parsed = 0;
parse_commit_or_die((struct commit *)object);
parents = ((struct commit *)object)->parents;
while (parents) {
add_object_array(&parents->item->object,
NULL, want_obj);
parents = parents->next;
}
add_object_array(object, NULL, &extra_edge_obj);
}
/* make sure commit traversal conforms to client */
register_shallow(the_repository, &object->oid);
}
}
static int check_ref(const char *refname_full, const struct object_id *oid,
int flag, void *cb_data);
static void deepen(struct packet_writer *writer, int depth, int deepen_relative,
struct object_array *shallows, struct object_array *want_obj)
{
if (depth == INFINITE_DEPTH && !is_repository_shallow(the_repository)) {
int i;
for (i = 0; i < shallows->nr; i++) {
struct object *object = shallows->objects[i].item;
object->flags |= NOT_SHALLOW;
}
fetch, upload-pack: --deepen=N extends shallow boundary by N commits In git-fetch, --depth argument is always relative with the latest remote refs. This makes it a bit difficult to cover this use case, where the user wants to make the shallow history, say 3 levels deeper. It would work if remote refs have not moved yet, but nobody can guarantee that, especially when that use case is performed a couple months after the last clone or "git fetch --depth". Also, modifying shallow boundary using --depth does not work well with clones created by --since or --not. This patch fixes that. A new argument --deepen=<N> will add <N> more (*) parent commits to the current history regardless of where remote refs are. Have/Want negotiation is still respected. So if remote refs move, the server will send two chunks: one between "have" and "want" and another to extend shallow history. In theory, the client could send no "want"s in order to get the second chunk only. But the protocol does not allow that. Either you send no want lines, which means ls-remote; or you have to send at least one want line that carries deep-relative to the server.. The main work was done by Dongcan Jiang. I fixed it up here and there. And of course all the bugs belong to me. (*) We could even support --deepen=<N> where <N> is negative. In that case we can cut some history from the shallow clone. This operation (and --depth=<shorter depth>) does not require interaction with remote side (and more complicated to implement as a result). Helped-by: Duy Nguyen <pclouds@gmail.com> Helped-by: Eric Sunshine <sunshine@sunshineco.com> Helped-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Dongcan Jiang <dongcan.jiang@gmail.com> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-12 18:54:09 +08:00
} else if (deepen_relative) {
struct object_array reachable_shallows = OBJECT_ARRAY_INIT;
struct commit_list *result;
/*
* Checking for reachable shallows requires that our refs be
* marked with OUR_REF.
*/
head_ref_namespaced(check_ref, NULL);
for_each_namespaced_ref(check_ref, NULL);
fetch, upload-pack: --deepen=N extends shallow boundary by N commits In git-fetch, --depth argument is always relative with the latest remote refs. This makes it a bit difficult to cover this use case, where the user wants to make the shallow history, say 3 levels deeper. It would work if remote refs have not moved yet, but nobody can guarantee that, especially when that use case is performed a couple months after the last clone or "git fetch --depth". Also, modifying shallow boundary using --depth does not work well with clones created by --since or --not. This patch fixes that. A new argument --deepen=<N> will add <N> more (*) parent commits to the current history regardless of where remote refs are. Have/Want negotiation is still respected. So if remote refs move, the server will send two chunks: one between "have" and "want" and another to extend shallow history. In theory, the client could send no "want"s in order to get the second chunk only. But the protocol does not allow that. Either you send no want lines, which means ls-remote; or you have to send at least one want line that carries deep-relative to the server.. The main work was done by Dongcan Jiang. I fixed it up here and there. And of course all the bugs belong to me. (*) We could even support --deepen=<N> where <N> is negative. In that case we can cut some history from the shallow clone. This operation (and --depth=<shorter depth>) does not require interaction with remote side (and more complicated to implement as a result). Helped-by: Duy Nguyen <pclouds@gmail.com> Helped-by: Eric Sunshine <sunshine@sunshineco.com> Helped-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Dongcan Jiang <dongcan.jiang@gmail.com> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-12 18:54:09 +08:00
get_reachable_list(shallows, &reachable_shallows);
result = get_shallow_commits(&reachable_shallows,
depth + 1,
SHALLOW, NOT_SHALLOW);
send_shallow(writer, result);
fetch, upload-pack: --deepen=N extends shallow boundary by N commits In git-fetch, --depth argument is always relative with the latest remote refs. This makes it a bit difficult to cover this use case, where the user wants to make the shallow history, say 3 levels deeper. It would work if remote refs have not moved yet, but nobody can guarantee that, especially when that use case is performed a couple months after the last clone or "git fetch --depth". Also, modifying shallow boundary using --depth does not work well with clones created by --since or --not. This patch fixes that. A new argument --deepen=<N> will add <N> more (*) parent commits to the current history regardless of where remote refs are. Have/Want negotiation is still respected. So if remote refs move, the server will send two chunks: one between "have" and "want" and another to extend shallow history. In theory, the client could send no "want"s in order to get the second chunk only. But the protocol does not allow that. Either you send no want lines, which means ls-remote; or you have to send at least one want line that carries deep-relative to the server.. The main work was done by Dongcan Jiang. I fixed it up here and there. And of course all the bugs belong to me. (*) We could even support --deepen=<N> where <N> is negative. In that case we can cut some history from the shallow clone. This operation (and --depth=<shorter depth>) does not require interaction with remote side (and more complicated to implement as a result). Helped-by: Duy Nguyen <pclouds@gmail.com> Helped-by: Eric Sunshine <sunshine@sunshineco.com> Helped-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Dongcan Jiang <dongcan.jiang@gmail.com> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-12 18:54:09 +08:00
free_commit_list(result);
object_array_clear(&reachable_shallows);
} else {
struct commit_list *result;
result = get_shallow_commits(want_obj, depth,
SHALLOW, NOT_SHALLOW);
send_shallow(writer, result);
free_commit_list(result);
}
send_unshallow(writer, shallows, want_obj);
}
static void deepen_by_rev_list(struct packet_writer *writer, int ac,
const char **av,
struct object_array *shallows,
struct object_array *want_obj)
{
struct commit_list *result;
upload-pack: disable commit graph more gently for shallow traversal When the client has asked for certain shallow options like "deepen-since", we do a custom rev-list walk that pretends to be shallow. Before doing so, we have to disable the commit-graph, since it is not compatible with the shallow view of the repository. That's handled by 829a321569 (commit-graph: close_commit_graph before shallow walk, 2018-08-20). That commit literally closes and frees our repo->objects->commit_graph struct. That creates an interesting problem for commits that have _already_ been parsed using the commit graph. Their commit->object.parsed flag is set, their commit->graph_pos is set, but their commit->maybe_tree may still be NULL. When somebody later calls repo_get_commit_tree(), we see that we haven't loaded the tree oid yet and try to get it from the commit graph. But since it has been freed, we segfault! So the root of the issue is a data dependency between the commit's lazy-load of the tree oid and the fact that the commit graph can go away mid-process. How can we resolve it? There are a couple of general approaches: 1. The obvious answer is to avoid loading the tree from the graph when we see that it's NULL. But then what do we return for the tree oid? If we return NULL, our caller in do_traverse() will rightly complain that we have no tree. We'd have to fallback to loading the actual commit object and re-parsing it. That requires teaching parse_commit_buffer() to understand re-parsing (i.e., not starting from a clean slate and not leaking any allocated bits like parent list pointers). 2. When we close the commit graph, walk through the set of in-memory objects and clear any graph_pos pointers. But this means we also have to "unparse" any such commits so that we know they still need to open the commit object to fill in their trees. So it's no less complicated than (1), and is more expensive (since we clear objects we might not later need). 3. Stop freeing the commit-graph struct. Continue to let it be used for lazy-loads of tree oids, but let upload-pack specify that it shouldn't be used for further commit parsing. 4. Push the whole shallow rev-list out to its own sub-process, with the commit-graph disabled from the start, giving it a clean memory space to work from. I've chosen (3) here. Options (1) and (2) would work, but are non-trivial to implement. Option (4) is more expensive, and I'm not sure how complicated it is (shelling out for the actual rev-list part is easy, but we do then parse the resulting commits internally, and I'm not clear which parts need to be handling shallow-ness). The new test in t5500 triggers this segfault, but see the comments there for how horribly intimate it has to be with how both upload-pack and commit graphs work. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-09-12 22:44:45 +08:00
disable_commit_graph(the_repository);
result = get_shallow_commits_by_rev_list(ac, av, SHALLOW, NOT_SHALLOW);
send_shallow(writer, result);
free_commit_list(result);
send_unshallow(writer, shallows, want_obj);
}
/* Returns 1 if a shallow list is sent or 0 otherwise */
static int send_shallow_list(struct packet_writer *writer,
int depth, int deepen_rev_list,
timestamp_t deepen_since,
struct string_list *deepen_not,
int deepen_relative,
struct object_array *shallows,
struct object_array *want_obj)
{
int ret = 0;
if (depth > 0 && deepen_rev_list)
die("git upload-pack: deepen and deepen-since (or deepen-not) cannot be used together");
if (depth > 0) {
deepen(writer, depth, deepen_relative, shallows, want_obj);
ret = 1;
} else if (deepen_rev_list) {
struct argv_array av = ARGV_ARRAY_INIT;
int i;
argv_array_push(&av, "rev-list");
if (deepen_since)
argv_array_pushf(&av, "--max-age=%"PRItime, deepen_since);
if (deepen_not->nr) {
argv_array_push(&av, "--not");
for (i = 0; i < deepen_not->nr; i++) {
struct string_list_item *s = deepen_not->items + i;
argv_array_push(&av, s->string);
}
argv_array_push(&av, "--not");
}
for (i = 0; i < want_obj->nr; i++) {
struct object *o = want_obj->objects[i].item;
argv_array_push(&av, oid_to_hex(&o->oid));
}
deepen_by_rev_list(writer, av.argc, av.argv, shallows, want_obj);
argv_array_clear(&av);
ret = 1;
} else {
if (shallows->nr > 0) {
int i;
for (i = 0; i < shallows->nr; i++)
register_shallow(the_repository,
&shallows->objects[i].item->oid);
}
}
shallow_nr += shallows->nr;
return ret;
}
static int process_shallow(const char *line, struct object_array *shallows)
{
const char *arg;
if (skip_prefix(line, "shallow ", &arg)) {
struct object_id oid;
struct object *object;
if (get_oid_hex(arg, &oid))
die("invalid shallow line: %s", line);
object = parse_object(the_repository, &oid);
if (!object)
return 1;
if (object->type != OBJ_COMMIT)
die("invalid shallow object %s", oid_to_hex(&oid));
if (!(object->flags & CLIENT_SHALLOW)) {
object->flags |= CLIENT_SHALLOW;
add_object_array(object, NULL, shallows);
}
return 1;
}
return 0;
}
static int process_deepen(const char *line, int *depth)
{
const char *arg;
if (skip_prefix(line, "deepen ", &arg)) {
char *end = NULL;
*depth = (int)strtol(arg, &end, 0);
if (!end || *end || *depth <= 0)
die("Invalid deepen: %s", line);
return 1;
}
return 0;
}
static int process_deepen_since(const char *line, timestamp_t *deepen_since, int *deepen_rev_list)
{
const char *arg;
if (skip_prefix(line, "deepen-since ", &arg)) {
char *end = NULL;
*deepen_since = parse_timestamp(arg, &end, 0);
if (!end || *end || !deepen_since ||
/* revisions.c's max_age -1 is special */
*deepen_since == -1)
die("Invalid deepen-since: %s", line);
*deepen_rev_list = 1;
return 1;
}
return 0;
}
static int process_deepen_not(const char *line, struct string_list *deepen_not, int *deepen_rev_list)
{
const char *arg;
if (skip_prefix(line, "deepen-not ", &arg)) {
char *ref = NULL;
struct object_id oid;
if (expand_ref(the_repository, arg, strlen(arg), &oid, &ref) != 1)
die("git upload-pack: ambiguous deepen-not: %s", line);
string_list_append(deepen_not, ref);
free(ref);
*deepen_rev_list = 1;
return 1;
}
return 0;
}
static void receive_needs(struct upload_pack_data *data,
struct packet_reader *reader)
{
int has_non_tip = 0;
shallow_nr = 0;
for (;;) {
struct object *o;
const char *features;
struct object_id oid_buf;
const char *arg;
reset_timeout(data->timeout);
if (packet_reader_read(reader) != PACKET_READ_NORMAL)
break;
if (process_shallow(reader->line, &data->shallows))
continue;
if (process_deepen(reader->line, &data->depth))
continue;
if (process_deepen_since(reader->line, &data->deepen_since, &data->deepen_rev_list))
continue;
if (process_deepen_not(reader->line, &data->deepen_not, &data->deepen_rev_list))
continue;
if (skip_prefix(reader->line, "filter ", &arg)) {
if (!data->filter_capability_requested)
die("git upload-pack: filtering capability not negotiated");
list_objects_filter_die_if_populated(&data->filter_options);
parse_list_objects_filter(&data->filter_options, arg);
continue;
}
if (!skip_prefix(reader->line, "want ", &arg) ||
parse_oid_hex(arg, &oid_buf, &features))
die("git upload-pack: protocol error, "
"expected to get object ID, not '%s'", reader->line);
fetch, upload-pack: --deepen=N extends shallow boundary by N commits In git-fetch, --depth argument is always relative with the latest remote refs. This makes it a bit difficult to cover this use case, where the user wants to make the shallow history, say 3 levels deeper. It would work if remote refs have not moved yet, but nobody can guarantee that, especially when that use case is performed a couple months after the last clone or "git fetch --depth". Also, modifying shallow boundary using --depth does not work well with clones created by --since or --not. This patch fixes that. A new argument --deepen=<N> will add <N> more (*) parent commits to the current history regardless of where remote refs are. Have/Want negotiation is still respected. So if remote refs move, the server will send two chunks: one between "have" and "want" and another to extend shallow history. In theory, the client could send no "want"s in order to get the second chunk only. But the protocol does not allow that. Either you send no want lines, which means ls-remote; or you have to send at least one want line that carries deep-relative to the server.. The main work was done by Dongcan Jiang. I fixed it up here and there. And of course all the bugs belong to me. (*) We could even support --deepen=<N> where <N> is negative. In that case we can cut some history from the shallow clone. This operation (and --depth=<shorter depth>) does not require interaction with remote side (and more complicated to implement as a result). Helped-by: Duy Nguyen <pclouds@gmail.com> Helped-by: Eric Sunshine <sunshine@sunshineco.com> Helped-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Dongcan Jiang <dongcan.jiang@gmail.com> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-12 18:54:09 +08:00
if (parse_feature_request(features, "deepen-relative"))
data->deepen_relative = 1;
if (parse_feature_request(features, "multi_ack_detailed"))
data->multi_ack = MULTI_ACK_DETAILED;
else if (parse_feature_request(features, "multi_ack"))
data->multi_ack = MULTI_ACK;
if (parse_feature_request(features, "no-done"))
data->no_done = 1;
if (parse_feature_request(features, "thin-pack"))
data->use_thin_pack = 1;
if (parse_feature_request(features, "ofs-delta"))
data->use_ofs_delta = 1;
if (parse_feature_request(features, "side-band-64k"))
data->use_sideband = LARGE_PACKET_MAX;
else if (parse_feature_request(features, "side-band"))
data->use_sideband = DEFAULT_PACKET_MAX;
if (parse_feature_request(features, "no-progress"))
data->no_progress = 1;
if (parse_feature_request(features, "include-tag"))
data->use_include_tag = 1;
if (allow_filter && parse_feature_request(features, "filter"))
data->filter_capability_requested = 1;
o = parse_object(the_repository, &oid_buf);
if (!o) {
packet_writer_error(&data->writer,
"upload-pack: not our ref %s",
oid_to_hex(&oid_buf));
die("git upload-pack: not our ref %s",
oid_to_hex(&oid_buf));
}
if (!(o->flags & WANTED)) {
o->flags |= WANTED;
if (!((allow_unadvertised_object_request & ALLOW_ANY_SHA1) == ALLOW_ANY_SHA1
|| is_our_ref(o)))
has_non_tip = 1;
add_object_array(o, NULL, &data->want_obj);
}
}
/*
* We have sent all our refs already, and the other end
* should have chosen out of them. When we are operating
* in the stateless RPC mode, however, their choice may
* have been based on the set of older refs advertised
* by another process that handled the initial request.
*/
if (has_non_tip)
check_non_tip(data);
if (!data->use_sideband && data->daemon_mode)
data->no_progress = 1;
if (data->depth == 0 && !data->deepen_rev_list && data->shallows.nr == 0)
return;
if (send_shallow_list(&data->writer,
data->depth,
data->deepen_rev_list,
data->deepen_since,
&data->deepen_not,
data->deepen_relative,
&data->shallows,
&data->want_obj))
packet_flush(1);
}
upload/receive-pack: allow hiding ref hierarchies A repository may have refs that are only used for its internal bookkeeping purposes that should not be exposed to the others that come over the network. Teach upload-pack to omit some refs from its initial advertisement by paying attention to the uploadpack.hiderefs multi-valued configuration variable. Do the same to receive-pack via the receive.hiderefs variable. As a convenient short-hand, allow using transfer.hiderefs to set the value to both of these variables. Any ref that is under the hierarchies listed on the value of these variable is excluded from responses to requests made by "ls-remote", "fetch", etc. (for upload-pack) and "push" (for receive-pack). Because these hidden refs do not count as OUR_REF, an attempt to fetch objects at the tip of them will be rejected, and because these refs do not get advertised, "git push :" will not see local branches that have the same name as them as "matching" ones to be sent. An attempt to update/delete these hidden refs with an explicit refspec, e.g. "git push origin :refs/hidden/22", is rejected. This is not a new restriction. To the pusher, it would appear that there is no such ref, so its push request will conclude with "Now that I sent you all the data, it is time for you to update the refs. I saw that the ref did not exist when I started pushing, and I want the result to point at this commit". The receiving end will apply the compare-and-swap rule to this request and rejects the push with "Well, your update request conflicts with somebody else; I see there is such a ref.", which is the right thing to do. Otherwise a push to a hidden ref will always be "the last one wins", which is not a good default. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-01-19 08:08:30 +08:00
/* return non-zero if the ref is hidden, otherwise 0 */
static int mark_our_ref(const char *refname, const char *refname_full,
const struct object_id *oid)
{
struct object *o = lookup_unknown_object(oid);
upload/receive-pack: allow hiding ref hierarchies A repository may have refs that are only used for its internal bookkeeping purposes that should not be exposed to the others that come over the network. Teach upload-pack to omit some refs from its initial advertisement by paying attention to the uploadpack.hiderefs multi-valued configuration variable. Do the same to receive-pack via the receive.hiderefs variable. As a convenient short-hand, allow using transfer.hiderefs to set the value to both of these variables. Any ref that is under the hierarchies listed on the value of these variable is excluded from responses to requests made by "ls-remote", "fetch", etc. (for upload-pack) and "push" (for receive-pack). Because these hidden refs do not count as OUR_REF, an attempt to fetch objects at the tip of them will be rejected, and because these refs do not get advertised, "git push :" will not see local branches that have the same name as them as "matching" ones to be sent. An attempt to update/delete these hidden refs with an explicit refspec, e.g. "git push origin :refs/hidden/22", is rejected. This is not a new restriction. To the pusher, it would appear that there is no such ref, so its push request will conclude with "Now that I sent you all the data, it is time for you to update the refs. I saw that the ref did not exist when I started pushing, and I want the result to point at this commit". The receiving end will apply the compare-and-swap rule to this request and rejects the push with "Well, your update request conflicts with somebody else; I see there is such a ref.", which is the right thing to do. Otherwise a push to a hidden ref will always be "the last one wins", which is not a good default. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-01-19 08:08:30 +08:00
if (ref_is_hidden(refname, refname_full)) {
o->flags |= HIDDEN_REF;
upload/receive-pack: allow hiding ref hierarchies A repository may have refs that are only used for its internal bookkeeping purposes that should not be exposed to the others that come over the network. Teach upload-pack to omit some refs from its initial advertisement by paying attention to the uploadpack.hiderefs multi-valued configuration variable. Do the same to receive-pack via the receive.hiderefs variable. As a convenient short-hand, allow using transfer.hiderefs to set the value to both of these variables. Any ref that is under the hierarchies listed on the value of these variable is excluded from responses to requests made by "ls-remote", "fetch", etc. (for upload-pack) and "push" (for receive-pack). Because these hidden refs do not count as OUR_REF, an attempt to fetch objects at the tip of them will be rejected, and because these refs do not get advertised, "git push :" will not see local branches that have the same name as them as "matching" ones to be sent. An attempt to update/delete these hidden refs with an explicit refspec, e.g. "git push origin :refs/hidden/22", is rejected. This is not a new restriction. To the pusher, it would appear that there is no such ref, so its push request will conclude with "Now that I sent you all the data, it is time for you to update the refs. I saw that the ref did not exist when I started pushing, and I want the result to point at this commit". The receiving end will apply the compare-and-swap rule to this request and rejects the push with "Well, your update request conflicts with somebody else; I see there is such a ref.", which is the right thing to do. Otherwise a push to a hidden ref will always be "the last one wins", which is not a good default. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-01-19 08:08:30 +08:00
return 1;
}
o->flags |= OUR_REF;
return 0;
}
static int check_ref(const char *refname_full, const struct object_id *oid,
int flag, void *cb_data)
{
const char *refname = strip_namespace(refname_full);
mark_our_ref(refname, refname_full, oid);
return 0;
}
static void format_symref_info(struct strbuf *buf, struct string_list *symref)
{
struct string_list_item *item;
if (!symref->nr)
return;
for_each_string_list_item(item, symref)
strbuf_addf(buf, " symref=%s:%s", item->string, (char *)item->util);
}
static int send_ref(const char *refname, const struct object_id *oid,
int flag, void *cb_data)
{
static const char *capabilities = "multi_ack thin-pack side-band"
fetch, upload-pack: --deepen=N extends shallow boundary by N commits In git-fetch, --depth argument is always relative with the latest remote refs. This makes it a bit difficult to cover this use case, where the user wants to make the shallow history, say 3 levels deeper. It would work if remote refs have not moved yet, but nobody can guarantee that, especially when that use case is performed a couple months after the last clone or "git fetch --depth". Also, modifying shallow boundary using --depth does not work well with clones created by --since or --not. This patch fixes that. A new argument --deepen=<N> will add <N> more (*) parent commits to the current history regardless of where remote refs are. Have/Want negotiation is still respected. So if remote refs move, the server will send two chunks: one between "have" and "want" and another to extend shallow history. In theory, the client could send no "want"s in order to get the second chunk only. But the protocol does not allow that. Either you send no want lines, which means ls-remote; or you have to send at least one want line that carries deep-relative to the server.. The main work was done by Dongcan Jiang. I fixed it up here and there. And of course all the bugs belong to me. (*) We could even support --deepen=<N> where <N> is negative. In that case we can cut some history from the shallow clone. This operation (and --depth=<shorter depth>) does not require interaction with remote side (and more complicated to implement as a result). Helped-by: Duy Nguyen <pclouds@gmail.com> Helped-by: Eric Sunshine <sunshine@sunshineco.com> Helped-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Dongcan Jiang <dongcan.jiang@gmail.com> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-12 18:54:09 +08:00
" side-band-64k ofs-delta shallow deepen-since deepen-not"
" deepen-relative no-progress include-tag multi_ack_detailed";
const char *refname_nons = strip_namespace(refname);
struct object_id peeled;
struct upload_pack_data *data = cb_data;
if (mark_our_ref(refname_nons, refname, oid))
upload/receive-pack: allow hiding ref hierarchies A repository may have refs that are only used for its internal bookkeeping purposes that should not be exposed to the others that come over the network. Teach upload-pack to omit some refs from its initial advertisement by paying attention to the uploadpack.hiderefs multi-valued configuration variable. Do the same to receive-pack via the receive.hiderefs variable. As a convenient short-hand, allow using transfer.hiderefs to set the value to both of these variables. Any ref that is under the hierarchies listed on the value of these variable is excluded from responses to requests made by "ls-remote", "fetch", etc. (for upload-pack) and "push" (for receive-pack). Because these hidden refs do not count as OUR_REF, an attempt to fetch objects at the tip of them will be rejected, and because these refs do not get advertised, "git push :" will not see local branches that have the same name as them as "matching" ones to be sent. An attempt to update/delete these hidden refs with an explicit refspec, e.g. "git push origin :refs/hidden/22", is rejected. This is not a new restriction. To the pusher, it would appear that there is no such ref, so its push request will conclude with "Now that I sent you all the data, it is time for you to update the refs. I saw that the ref did not exist when I started pushing, and I want the result to point at this commit". The receiving end will apply the compare-and-swap rule to this request and rejects the push with "Well, your update request conflicts with somebody else; I see there is such a ref.", which is the right thing to do. Otherwise a push to a hidden ref will always be "the last one wins", which is not a good default. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-01-19 08:08:30 +08:00
return 0;
if (capabilities) {
struct strbuf symref_info = STRBUF_INIT;
format_symref_info(&symref_info, &data->symref);
packet_write_fmt(1, "%s %s%c%s%s%s%s%s%s agent=%s\n",
oid_to_hex(oid), refname_nons,
0, capabilities,
(allow_unadvertised_object_request & ALLOW_TIP_SHA1) ?
" allow-tip-sha1-in-want" : "",
(allow_unadvertised_object_request & ALLOW_REACHABLE_SHA1) ?
" allow-reachable-sha1-in-want" : "",
data->stateless_rpc ? " no-done" : "",
symref_info.buf,
allow_filter ? " filter" : "",
git_user_agent_sanitized());
strbuf_release(&symref_info);
} else {
packet_write_fmt(1, "%s %s\n", oid_to_hex(oid), refname_nons);
}
capabilities = NULL;
if (!peel_ref(refname, &peeled))
packet_write_fmt(1, "%s %s^{}\n", oid_to_hex(&peeled), refname_nons);
return 0;
}
static int find_symref(const char *refname, const struct object_id *oid,
int flag, void *cb_data)
{
const char *symref_target;
struct string_list_item *item;
if ((flag & REF_ISSYMREF) == 0)
return 0;
symref_target = resolve_ref_unsafe(refname, 0, NULL, &flag);
if (!symref_target || (flag & REF_ISSYMREF) == 0)
die("'%s' is a symref but it is not?", refname);
upload-pack: strip namespace from symref data Since 7171d8c15f (upload-pack: send symbolic ref information as capability, 2013-09-17), we've sent cloning and fetching clients special information about which branch HEAD is pointing to, so that they don't have to guess based on matching up commit ids. However, this feature has never worked properly with the GIT_NAMESPACE feature. Because upload-pack uses head_ref_namespaced(find_symref), we do find and report on refs/namespaces/foo/HEAD instead of the actual HEAD of the repo. This makes sense, since the branch pointed to by the top-level HEAD may not be advertised at all. But we do two things wrong: 1. We report the full name refs/namespaces/foo/HEAD, instead of just HEAD. Meaning no client is going to bother doing anything with that symref, since we're not otherwise advertising it. 2. We report the symref destination using its full name (e.g., refs/namespaces/foo/refs/heads/master). That's similarly useless to the client, who only saw "refs/heads/master" in the advertisement. We should be stripping the namespace prefix off of both places (which this patch fixes). Likely nobody noticed because we tend to do the right thing anyway. Bug (1) means that we said nothing about HEAD (just refs/namespace/foo/HEAD). And so the client half of the code, from a45b5f0552 (connect: annotate refs with their symref information in get_remote_head(), 2013-09-17), does not annotate HEAD, and we use the fallback in guess_remote_head(), matching refs by object id. Which is usually right. It only falls down in ambiguous cases, like the one laid out in the included test. This also means that we don't have to worry about breaking anybody who was putting pre-stripped names into their namespace symrefs when we fix bug (2). Because of bug (1), nobody would have been using the symref we advertised in the first place (not to mention that those symrefs would have appeared broken for any non-namespaced access). Note that we have separate fixes here for the v0 and v2 protocols. The symref advertisement moved in v2 to be a part of the ls-refs command. This actually gets part (1) right, since the symref annotation piggy-backs on the existing ref advertisement, which is properly stripped. But it still needs a fix for part (2). The included tests cover both protocols. Reported-by: Bryan Turner <bturner@atlassian.com> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-05-23 14:11:21 +08:00
item = string_list_append(cb_data, strip_namespace(refname));
item->util = xstrdup(strip_namespace(symref_target));
return 0;
}
static int upload_pack_config(const char *var, const char *value, void *cb_data)
upload/receive-pack: allow hiding ref hierarchies A repository may have refs that are only used for its internal bookkeeping purposes that should not be exposed to the others that come over the network. Teach upload-pack to omit some refs from its initial advertisement by paying attention to the uploadpack.hiderefs multi-valued configuration variable. Do the same to receive-pack via the receive.hiderefs variable. As a convenient short-hand, allow using transfer.hiderefs to set the value to both of these variables. Any ref that is under the hierarchies listed on the value of these variable is excluded from responses to requests made by "ls-remote", "fetch", etc. (for upload-pack) and "push" (for receive-pack). Because these hidden refs do not count as OUR_REF, an attempt to fetch objects at the tip of them will be rejected, and because these refs do not get advertised, "git push :" will not see local branches that have the same name as them as "matching" ones to be sent. An attempt to update/delete these hidden refs with an explicit refspec, e.g. "git push origin :refs/hidden/22", is rejected. This is not a new restriction. To the pusher, it would appear that there is no such ref, so its push request will conclude with "Now that I sent you all the data, it is time for you to update the refs. I saw that the ref did not exist when I started pushing, and I want the result to point at this commit". The receiving end will apply the compare-and-swap rule to this request and rejects the push with "Well, your update request conflicts with somebody else; I see there is such a ref.", which is the right thing to do. Otherwise a push to a hidden ref will always be "the last one wins", which is not a good default. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-01-19 08:08:30 +08:00
{
if (!strcmp("uploadpack.allowtipsha1inwant", var)) {
if (git_config_bool(var, value))
allow_unadvertised_object_request |= ALLOW_TIP_SHA1;
else
allow_unadvertised_object_request &= ~ALLOW_TIP_SHA1;
} else if (!strcmp("uploadpack.allowreachablesha1inwant", var)) {
if (git_config_bool(var, value))
allow_unadvertised_object_request |= ALLOW_REACHABLE_SHA1;
else
allow_unadvertised_object_request &= ~ALLOW_REACHABLE_SHA1;
} else if (!strcmp("uploadpack.allowanysha1inwant", var)) {
if (git_config_bool(var, value))
allow_unadvertised_object_request |= ALLOW_ANY_SHA1;
else
allow_unadvertised_object_request &= ~ALLOW_ANY_SHA1;
} else if (!strcmp("uploadpack.keepalive", var)) {
keepalive = git_config_int(var, value);
if (!keepalive)
keepalive = -1;
} else if (!strcmp("uploadpack.allowfilter", var)) {
allow_filter = git_config_bool(var, value);
} else if (!strcmp("uploadpack.allowrefinwant", var)) {
allow_ref_in_want = git_config_bool(var, value);
} else if (!strcmp("uploadpack.allowsidebandall", var)) {
allow_sideband_all = git_config_bool(var, value);
Honor core.precomposeUnicode in more places On Mac's HFS where git sets core.precomposeUnicode to true automatically by git init/clone, when a user creates a simple unicode refname (in NFC format) such as españa: $ git branch españa different commands would display the branch name differently. For example, git branch, git log --decorate, and git fast-export all used 65 73 70 61 c3 b1 61 (or "espa\xc3\xb1a") (NFC form) while show-ref would use 65 73 70 61 6e cc 83 61 (or "espan\xcc\x83a") (NFD form). A stress test for git filter-repo was tripped up by this inconsistency, though digging in I found that the problems could compound; for example, if the user ran $ git pack-refs --all and then tried to check out the branch, they would be met with: $ git checkout españa error: pathspec 'españa' did not match any file(s) known to git $ git checkout españa -- fatal: invalid reference: españa $ git branch españa * master Note that the user could run the `git branch` command first and copy and paste the `españa` portion of the output and still see the same two errors. Also, if the user added --no-prune to the pack-refs command, then they would see three branches: master, españa, and españa (those last two are NFC vs. NFD forms, even if they render the same). Further, if the user had the `españa` branch checked out before running `git pack-refs --all`, the user would be greeted with (note that I'm trimming trailing output with an ellipsis): $ git rev-parse HEAD fatal: ambiguous argument 'HEAD': unknown revision or path... $ git status On branch españa No commits yet... Or worse, if the user didn't check this stuff first, running `git commit` will create a new commit with all changes of all of history being squashed into it. In addition to pack-refs, one could also get into this state with upload-pack or anything that calls either pack-refs or upload-pack (e.g. gc or clone). Add code in a few places (pack-refs, show-ref, upload-pack) to check and honor the setting of core.precomposeUnicode to avoid these bugs. Signed-off-by: Elijah Newren <newren@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-04-25 22:58:54 +08:00
} else if (!strcmp("core.precomposeunicode", var)) {
precomposed_unicode = git_config_bool(var, value);
}
upload-pack: fix broken if/else chain in config callback The upload_pack_config() callback uses an if/else chain like: if (!strcmp(var, "a")) ... else if (!strcmp(var, "b")) ... etc This works as long as the conditions are mutually exclusive, but one of them is not. 20b20a22f8 (upload-pack: provide a hook for running pack-objects, 2016-05-18) added: else if (current_config_scope() != CONFIG_SCOPE_REPO) { ... check some more options ... } That was fine in that commit, because it came at the end of the chain. But later, 10ac85c785 (upload-pack: add object filtering for partial clone, 2017-12-08) did this: else if (current_config_scope() != CONFIG_SCOPE_REPO) { ... check some more options ... } else if (!strcmp("uploadpack.allowfilter", var)) ... We'd always check the scope condition first, meaning we'd _only_ respect allowfilter when it's in the repo config. You can see this with: git -c uploadpack.allowfilter=true upload-pack . | head -1 which will not advertise the filter capability (but will after this patch). We never noticed because: - our tests always set it in the repo config - in protocol v2, we use a different code path that actually calls repo_config_get_bool() separately, so that _does_ work. Real-world people experimenting with this may be using v2. The more recent uploadpack.allowrefinwant option is in the same boat. There are a few possible fixes: 1. Bump the scope conditional back to the bottom of the chain. But that just means somebody else is likely to make the same mistake later. 2. Make the conditional more like the others. I.e.: else if (!current_config_scope() != CONFIG_SCOPE_REPO && !strcmp(var, "uploadpack.notallowedinrepo")) This works, but the idea of the original structure was that we may grow multiple sensitive options like this. 3. Pull it out of the chain entirely. The chain mostly serves to avoid extra strcmp() calls after we've found a match. But it's not worth caring about those. In the worst case, when there isn't a match, we're already hitting every strcmp (and this happens regularly for stuff like "core.bare", etc). This patch does (3). Signed-off-by: Jeff King <peff@peff.net> Reviewed-by: Josh Steadmon <steadmon@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-10-24 15:27:52 +08:00
if (current_config_scope() != CONFIG_SCOPE_LOCAL &&
current_config_scope() != CONFIG_SCOPE_WORKTREE) {
upload-pack: fix broken if/else chain in config callback The upload_pack_config() callback uses an if/else chain like: if (!strcmp(var, "a")) ... else if (!strcmp(var, "b")) ... etc This works as long as the conditions are mutually exclusive, but one of them is not. 20b20a22f8 (upload-pack: provide a hook for running pack-objects, 2016-05-18) added: else if (current_config_scope() != CONFIG_SCOPE_REPO) { ... check some more options ... } That was fine in that commit, because it came at the end of the chain. But later, 10ac85c785 (upload-pack: add object filtering for partial clone, 2017-12-08) did this: else if (current_config_scope() != CONFIG_SCOPE_REPO) { ... check some more options ... } else if (!strcmp("uploadpack.allowfilter", var)) ... We'd always check the scope condition first, meaning we'd _only_ respect allowfilter when it's in the repo config. You can see this with: git -c uploadpack.allowfilter=true upload-pack . | head -1 which will not advertise the filter capability (but will after this patch). We never noticed because: - our tests always set it in the repo config - in protocol v2, we use a different code path that actually calls repo_config_get_bool() separately, so that _does_ work. Real-world people experimenting with this may be using v2. The more recent uploadpack.allowrefinwant option is in the same boat. There are a few possible fixes: 1. Bump the scope conditional back to the bottom of the chain. But that just means somebody else is likely to make the same mistake later. 2. Make the conditional more like the others. I.e.: else if (!current_config_scope() != CONFIG_SCOPE_REPO && !strcmp(var, "uploadpack.notallowedinrepo")) This works, but the idea of the original structure was that we may grow multiple sensitive options like this. 3. Pull it out of the chain entirely. The chain mostly serves to avoid extra strcmp() calls after we've found a match. But it's not worth caring about those. In the worst case, when there isn't a match, we're already hitting every strcmp (and this happens regularly for stuff like "core.bare", etc). This patch does (3). Signed-off-by: Jeff King <peff@peff.net> Reviewed-by: Josh Steadmon <steadmon@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-10-24 15:27:52 +08:00
if (!strcmp("uploadpack.packobjectshook", var))
return git_config_string(&pack_objects_hook, var, value);
}
upload/receive-pack: allow hiding ref hierarchies A repository may have refs that are only used for its internal bookkeeping purposes that should not be exposed to the others that come over the network. Teach upload-pack to omit some refs from its initial advertisement by paying attention to the uploadpack.hiderefs multi-valued configuration variable. Do the same to receive-pack via the receive.hiderefs variable. As a convenient short-hand, allow using transfer.hiderefs to set the value to both of these variables. Any ref that is under the hierarchies listed on the value of these variable is excluded from responses to requests made by "ls-remote", "fetch", etc. (for upload-pack) and "push" (for receive-pack). Because these hidden refs do not count as OUR_REF, an attempt to fetch objects at the tip of them will be rejected, and because these refs do not get advertised, "git push :" will not see local branches that have the same name as them as "matching" ones to be sent. An attempt to update/delete these hidden refs with an explicit refspec, e.g. "git push origin :refs/hidden/22", is rejected. This is not a new restriction. To the pusher, it would appear that there is no such ref, so its push request will conclude with "Now that I sent you all the data, it is time for you to update the refs. I saw that the ref did not exist when I started pushing, and I want the result to point at this commit". The receiving end will apply the compare-and-swap rule to this request and rejects the push with "Well, your update request conflicts with somebody else; I see there is such a ref.", which is the right thing to do. Otherwise a push to a hidden ref will always be "the last one wins", which is not a good default. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-01-19 08:08:30 +08:00
return parse_hide_refs_config(var, value, "uploadpack");
}
void upload_pack(struct upload_pack_options *options)
{
struct packet_reader reader;
struct upload_pack_data data;
upload_pack_data_init(&data);
git_config(upload_pack_config, &data);
data.stateless_rpc = options->stateless_rpc;
data.daemon_mode = options->daemon_mode;
data.timeout = options->timeout;
head_ref_namespaced(find_symref, &data.symref);
if (options->advertise_refs || !data.stateless_rpc) {
reset_timeout(data.timeout);
head_ref_namespaced(send_ref, &data);
for_each_namespaced_ref(send_ref, &data);
advertise_shallow_grafts(1);
packet_flush(1);
} else {
head_ref_namespaced(check_ref, NULL);
for_each_namespaced_ref(check_ref, NULL);
}
if (!options->advertise_refs) {
packet_reader_init(&reader, 0, NULL, 0,
PACKET_READ_CHOMP_NEWLINE |
PACKET_READ_DIE_ON_ERR_PACKET);
receive_needs(&data, &reader);
if (data.want_obj.nr) {
get_common_commits(&data, &reader);
create_pack_file(&data);
}
}
upload_pack_data_clear(&data);
}
static int parse_want(struct packet_writer *writer, const char *line,
struct object_array *want_obj)
{
const char *arg;
if (skip_prefix(line, "want ", &arg)) {
struct object_id oid;
struct object *o;
if (get_oid_hex(arg, &oid))
die("git upload-pack: protocol error, "
"expected to get oid, not '%s'", line);
o = parse_object(the_repository, &oid);
if (!o) {
packet_writer_error(writer,
"upload-pack: not our ref %s",
oid_to_hex(&oid));
die("git upload-pack: not our ref %s",
oid_to_hex(&oid));
}
if (!(o->flags & WANTED)) {
o->flags |= WANTED;
add_object_array(o, NULL, want_obj);
}
return 1;
}
return 0;
}
static int parse_want_ref(struct packet_writer *writer, const char *line,
struct string_list *wanted_refs,
struct object_array *want_obj)
{
const char *arg;
if (skip_prefix(line, "want-ref ", &arg)) {
struct object_id oid;
struct string_list_item *item;
struct object *o;
if (read_ref(arg, &oid)) {
packet_writer_error(writer, "unknown ref %s", arg);
die("unknown ref %s", arg);
}
item = string_list_append(wanted_refs, arg);
item->util = oiddup(&oid);
o = parse_object_or_die(&oid, arg);
if (!(o->flags & WANTED)) {
o->flags |= WANTED;
add_object_array(o, NULL, want_obj);
}
return 1;
}
return 0;
}
static int parse_have(const char *line, struct oid_array *haves)
{
const char *arg;
if (skip_prefix(line, "have ", &arg)) {
struct object_id oid;
if (get_oid_hex(arg, &oid))
die("git upload-pack: expected SHA1 object, got '%s'", arg);
oid_array_append(haves, &oid);
return 1;
}
return 0;
}
static void process_args(struct packet_reader *request,
struct upload_pack_data *data)
{
while (packet_reader_read(request) == PACKET_READ_NORMAL) {
const char *arg = request->line;
const char *p;
/* process want */
if (parse_want(&data->writer, arg, &data->want_obj))
continue;
if (allow_ref_in_want &&
parse_want_ref(&data->writer, arg, &data->wanted_refs,
&data->want_obj))
continue;
/* process have line */
if (parse_have(arg, &data->haves))
continue;
/* process args like thin-pack */
if (!strcmp(arg, "thin-pack")) {
data->use_thin_pack = 1;
continue;
}
if (!strcmp(arg, "ofs-delta")) {
data->use_ofs_delta = 1;
continue;
}
if (!strcmp(arg, "no-progress")) {
data->no_progress = 1;
continue;
}
if (!strcmp(arg, "include-tag")) {
data->use_include_tag = 1;
continue;
}
if (!strcmp(arg, "done")) {
data->done = 1;
continue;
}
/* Shallow related arguments */
if (process_shallow(arg, &data->shallows))
continue;
if (process_deepen(arg, &data->depth))
continue;
if (process_deepen_since(arg, &data->deepen_since,
&data->deepen_rev_list))
continue;
if (process_deepen_not(arg, &data->deepen_not,
&data->deepen_rev_list))
continue;
if (!strcmp(arg, "deepen-relative")) {
data->deepen_relative = 1;
continue;
}
if (allow_filter && skip_prefix(arg, "filter ", &p)) {
list_objects_filter_die_if_populated(&data->filter_options);
parse_list_objects_filter(&data->filter_options, p);
continue;
}
if ((git_env_bool("GIT_TEST_SIDEBAND_ALL", 0) ||
allow_sideband_all) &&
!strcmp(arg, "sideband-all")) {
data->writer.use_sideband = 1;
continue;
}
/* ignore unknown lines maybe? */
die("unexpected line: '%s'", arg);
}
if (request->status != PACKET_READ_FLUSH)
die(_("expected flush after fetch arguments"));
}
static int process_haves(struct oid_array *haves, struct oid_array *common,
struct object_array *have_obj)
{
int i;
/* Process haves */
for (i = 0; i < haves->nr; i++) {
const struct object_id *oid = &haves->oid[i];
struct object *o;
int we_knew_they_have = 0;
if (!has_object_file(oid))
continue;
oid_array_append(common, oid);
o = parse_object(the_repository, oid);
if (!o)
die("oops (%s)", oid_to_hex(oid));
if (o->type == OBJ_COMMIT) {
struct commit_list *parents;
struct commit *commit = (struct commit *)o;
if (o->flags & THEY_HAVE)
we_knew_they_have = 1;
else
o->flags |= THEY_HAVE;
if (!oldest_have || (commit->date < oldest_have))
oldest_have = commit->date;
for (parents = commit->parents;
parents;
parents = parents->next)
parents->item->object.flags |= THEY_HAVE;
}
if (!we_knew_they_have)
add_object_array(o, NULL, have_obj);
}
return 0;
}
static int send_acks(struct packet_writer *writer, struct oid_array *acks,
const struct object_array *have_obj,
struct object_array *want_obj)
{
int i;
packet_writer_write(writer, "acknowledgments\n");
/* Send Acks */
if (!acks->nr)
packet_writer_write(writer, "NAK\n");
for (i = 0; i < acks->nr; i++) {
packet_writer_write(writer, "ACK %s\n",
oid_to_hex(&acks->oid[i]));
}
if (ok_to_give_up(have_obj, want_obj)) {
/* Send Ready */
packet_writer_write(writer, "ready\n");
return 1;
}
return 0;
}
static int process_haves_and_send_acks(struct upload_pack_data *data)
{
struct oid_array common = OID_ARRAY_INIT;
int ret = 0;
process_haves(&data->haves, &common, &data->have_obj);
if (data->done) {
ret = 1;
} else if (send_acks(&data->writer, &common,
&data->have_obj, &data->want_obj)) {
packet_writer_delim(&data->writer);
ret = 1;
} else {
/* Add Flush */
packet_writer_flush(&data->writer);
ret = 0;
}
oid_array_clear(&data->haves);
oid_array_clear(&common);
return ret;
}
static void send_wanted_ref_info(struct upload_pack_data *data)
{
const struct string_list_item *item;
if (!data->wanted_refs.nr)
return;
packet_writer_write(&data->writer, "wanted-refs\n");
for_each_string_list_item(item, &data->wanted_refs) {
packet_writer_write(&data->writer, "%s %s\n",
oid_to_hex(item->util),
item->string);
}
packet_writer_delim(&data->writer);
}
static void send_shallow_info(struct upload_pack_data *data)
{
/* No shallow info needs to be sent */
if (!data->depth && !data->deepen_rev_list && !data->shallows.nr &&
!is_repository_shallow(the_repository))
return;
packet_writer_write(&data->writer, "shallow-info\n");
if (!send_shallow_list(&data->writer, data->depth,
data->deepen_rev_list,
data->deepen_since, &data->deepen_not,
data->deepen_relative,
&data->shallows, &data->want_obj) &&
is_repository_shallow(the_repository))
deepen(&data->writer, INFINITE_DEPTH, data->deepen_relative,
&data->shallows, &data->want_obj);
packet_delim(1);
}
enum fetch_state {
FETCH_PROCESS_ARGS = 0,
FETCH_SEND_ACKS,
FETCH_SEND_PACK,
FETCH_DONE,
};
int upload_pack_v2(struct repository *r, struct argv_array *keys,
struct packet_reader *request)
{
enum fetch_state state = FETCH_PROCESS_ARGS;
struct upload_pack_data data;
upload-pack: clear flags before each v2 request Suppose a server has the following commit graph: A B \ / O We create a client by cloning A from the server with depth 1, and add many commits to it (so that future fetches span multiple requests due to lengthy negotiation). If it then fetches B using protocol v2, the fetch spanning multiple requests, the resulting packfile does not contain O even though the client did report that A is shallow. This is because upload_pack_v2() can be called multiple times while processing the same session. During the 2nd and all subsequent invocations, some object flags remain from the previous invocations. In particular, CLIENT_SHALLOW remains, preventing process_shallow() from adding client-reported shallows to the "shallows" array, and hence pack-objects not knowing about these client-reported shallows. Therefore, teach upload_pack_v2() to clear object flags at the start of each invocation. This has some other results: - THEY_HAVE gates addition of objects to have_obj in process_haves(). Previously in upload_pack_v2(), have_obj needed to be static because once an object is added to have_obj, it is never readded and thus we needed to retain the contents of have_obj between invocations. Now that flags are cleared, this is no longer necessary. This patch does not change the behavior of ok_to_give_up() (THEY_HAVE is still set on each "have") and got_oid() (used only in non-v2)); THEY_HAVE is not used in any other function. - WANTED gates addition of objects to want_obj in parse_want() and parse_want_ref(). It is also used in receive_needs(), but that is only used in non-v2. For the same reasons as THEY_HAVE, want_obj no longer needs to be static in upload_pack_v2(). - CLIENT_SHALLOW is changed as discussed above. Clearing of the other 5 flags does not affect functionality in v2. (Note that in non-v2, upload_pack() is only called once per process, so each invocation starts with blank flags anyway.) - OUR_REF is only used in non-v2. - COMMON_KNOWN is only used as a scratch flag in ok_to_give_up(). - SHALLOW is passed to invocations in deepen() and deepen_by_rev_list(), but upload-pack doesn't use it. - NOT_SHALLOW is used by send_shallow() and send_unshallow(), but invocations of those functions are always preceded by code that sets NOT_SHALLOW on the appropriate objects. - HIDDEN_REF is only used in non-v2. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-10-19 04:43:29 +08:00
clear_object_flags(ALL_FLAGS);
upload_pack_data_init(&data);
data.use_sideband = LARGE_PACKET_MAX;
git_config(upload_pack_config, &data);
while (state != FETCH_DONE) {
switch (state) {
case FETCH_PROCESS_ARGS:
process_args(request, &data);
if (!data.want_obj.nr) {
/*
* Request didn't contain any 'want' lines,
* guess they didn't want anything.
*/
state = FETCH_DONE;
} else if (data.haves.nr) {
/*
* Request had 'have' lines, so lets ACK them.
*/
state = FETCH_SEND_ACKS;
} else {
/*
* Request had 'want's but no 'have's so we can
* immedietly go to construct and send a pack.
*/
state = FETCH_SEND_PACK;
}
break;
case FETCH_SEND_ACKS:
if (process_haves_and_send_acks(&data))
state = FETCH_SEND_PACK;
else
state = FETCH_DONE;
break;
case FETCH_SEND_PACK:
send_wanted_ref_info(&data);
send_shallow_info(&data);
packet_writer_write(&data.writer, "packfile\n");
create_pack_file(&data);
state = FETCH_DONE;
break;
case FETCH_DONE:
continue;
}
}
upload_pack_data_clear(&data);
return 0;
}
int upload_pack_advertise(struct repository *r,
struct strbuf *value)
{
if (value) {
int allow_filter_value;
int allow_ref_in_want;
int allow_sideband_all_value;
strbuf_addstr(value, "shallow");
if (!repo_config_get_bool(the_repository,
"uploadpack.allowfilter",
&allow_filter_value) &&
allow_filter_value)
strbuf_addstr(value, " filter");
if (!repo_config_get_bool(the_repository,
"uploadpack.allowrefinwant",
&allow_ref_in_want) &&
allow_ref_in_want)
strbuf_addstr(value, " ref-in-want");
if (git_env_bool("GIT_TEST_SIDEBAND_ALL", 0) ||
(!repo_config_get_bool(the_repository,
"uploadpack.allowsidebandall",
&allow_sideband_all_value) &&
allow_sideband_all_value))
strbuf_addstr(value, " sideband-all");
}
return 1;
}