git/transport.c

1557 lines
41 KiB
C
Raw Normal View History

#include "cache.h"
#include "config.h"
#include "transport.h"
#include "hook.h"
#include "pkt-line.h"
#include "fetch-pack.h"
#include "remote.h"
#include "connect.h"
#include "send-pack.h"
#include "walker.h"
#include "bundle.h"
#include "dir.h"
#include "refs.h"
#include "refspec.h"
#include "branch.h"
#include "url.h"
#include "submodule.h"
#include "string-list.h"
#include "oid-array.h"
#include "sigchain.h"
#include "transport-internal.h"
#include "protocol.h"
#include "object-store.h"
#include "color.h"
static int transport_use_color = -1;
static char transport_colors[][COLOR_MAXLEN] = {
GIT_COLOR_RESET,
GIT_COLOR_RED /* REJECTED */
};
enum color_transport {
TRANSPORT_COLOR_RESET = 0,
TRANSPORT_COLOR_REJECTED = 1
};
static int transport_color_config(void)
{
const char *keys[] = {
"color.transport.reset",
"color.transport.rejected"
}, *key = "color.transport";
char *value;
int i;
static int initialized;
if (initialized)
return 0;
initialized = 1;
if (!git_config_get_string(key, &value))
transport_use_color = git_config_colorbool(key, value);
if (!want_color_stderr(transport_use_color))
return 0;
for (i = 0; i < ARRAY_SIZE(keys); i++)
if (!git_config_get_string(keys[i], &value)) {
if (!value)
return config_error_nonbool(keys[i]);
if (color_parse(value, transport_colors[i]) < 0)
return -1;
}
return 0;
}
static const char *transport_get_color(enum color_transport ix)
{
if (want_color_stderr(transport_use_color))
return transport_colors[ix];
return "";
}
static void set_upstreams(struct transport *transport, struct ref *refs,
int pretend)
{
struct ref *ref;
for (ref = refs; ref; ref = ref->next) {
const char *localname;
const char *tmp;
const char *remotename;
int flag = 0;
/*
* Check suitability for tracking. Must be successful /
* already up-to-date ref create/modify (not delete).
*/
if (ref->status != REF_STATUS_OK &&
ref->status != REF_STATUS_UPTODATE)
continue;
if (!ref->peer_ref)
continue;
if (is_null_oid(&ref->new_oid))
continue;
/* Follow symbolic refs (mainly for HEAD). */
localname = ref->peer_ref->name;
remotename = ref->name;
tmp = resolve_ref_unsafe(localname, RESOLVE_REF_READING,
NULL, &flag);
if (tmp && flag & REF_ISSYMREF &&
starts_with(tmp, "refs/heads/"))
localname = tmp;
/* Both source and destination must be local branches. */
if (!localname || !starts_with(localname, "refs/heads/"))
continue;
if (!remotename || !starts_with(remotename, "refs/heads/"))
continue;
if (!pretend) {
int flag = transport->verbose < 0 ? 0 : BRANCH_CONFIG_VERBOSE;
install_branch_config(flag, localname + 11,
transport->remote->name, remotename);
} else if (transport->verbose >= 0)
printf(_("Would set upstream of '%s' to '%s' of '%s'\n"),
localname + 11, remotename + 11,
transport->remote->name);
}
}
struct bundle_transport_data {
int fd;
struct bundle_header header;
unsigned get_refs_from_bundle_called : 1;
};
static void get_refs_from_bundle_inner(struct transport *transport)
{
struct bundle_transport_data *data = transport->data;
data->get_refs_from_bundle_called = 1;
if (data->fd > 0)
close(data->fd);
data->fd = read_bundle_header(transport->url, &data->header);
if (data->fd < 0)
die(_("could not read bundle '%s'"), transport->url);
transport->hash_algo = data->header.hash_algo;
}
static struct ref *get_refs_from_bundle(struct transport *transport,
int for_push,
struct transport_ls_refs_options *transport_options)
{
struct bundle_transport_data *data = transport->data;
struct ref *result = NULL;
int i;
if (for_push)
return NULL;
get_refs_from_bundle_inner(transport);
for (i = 0; i < data->header.references.nr; i++) {
struct string_list_item *e = data->header.references.items + i;
const char *name = e->string;
struct ref *ref = alloc_ref(name);
struct object_id *oid = e->util;
oidcpy(&ref->old_oid, oid);
ref->next = result;
result = ref;
}
return result;
}
static int fetch_refs_from_bundle(struct transport *transport,
fetch-pack: unify ref in and out param When a user fetches: - at least one up-to-date ref and at least one non-up-to-date ref, - using HTTP with protocol v0 (or something else that uses the fetch command of a remote helper) some refs might not be updated after the fetch. This bug was introduced in commit 989b8c4452 ("fetch-pack: put shallow info in output parameter", 2018-06-28) which allowed transports to report the refs that they have fetched in a new out-parameter "fetched_refs". If they do so, transport_fetch_refs() makes this information available to its caller. Users of "fetched_refs" rely on the following 3 properties: (1) it is the complete list of refs that was passed to transport_fetch_refs(), (2) it has shallow information (REF_STATUS_REJECT_SHALLOW set if relevant), and (3) it has updated OIDs if ref-in-want was used (introduced after 989b8c4452). In an effort to satisfy (1), whenever transport_fetch_refs() filters the refs sent to the transport, it re-adds the filtered refs to whatever the transport supplies before returning it to the user. However, the implementation in 989b8c4452 unconditionally re-adds the filtered refs without checking if the transport refrained from reporting anything in "fetched_refs" (which it is allowed to do), resulting in an incomplete list, no longer satisfying (1). An earlier effort to resolve this [1] solved the issue by readding the filtered refs only if the transport did not refrain from reporting in "fetched_refs", but after further discussion, it seems that the better solution is to revert the API change that introduced "fetched_refs". This API change was first suggested as part of a ref-in-want implementation that allowed for ref patterns and, thus, there could be drastic differences between the input refs and the refs actually fetched [2]; we eventually decided to only allow exact ref names, but this API change remained even though its necessity was decreased. Therefore, revert this API change by reverting commit 989b8c4452, and make receive_wanted_refs() update the OIDs in the sought array (like how update_shallow() updates shallow information in the sought array) instead. A test is also included to show that the user-visible bug discussed at the beginning of this commit message no longer exists. [1] https://public-inbox.org/git/20180801171806.GA122458@google.com/ [2] https://public-inbox.org/git/86a128c5fb710a41791e7183207c4d64889f9307.1485381677.git.jonathantanmy@google.com/ Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-08-02 04:13:20 +08:00
int nr_heads, struct ref **to_fetch)
{
struct bundle_transport_data *data = transport->data;
struct strvec extra_index_pack_args = STRVEC_INIT;
int ret;
if (transport->progress)
strvec_push(&extra_index_pack_args, "-v");
if (!data->get_refs_from_bundle_called)
get_refs_from_bundle_inner(transport);
ret = unbundle(the_repository, &data->header, data->fd,
&extra_index_pack_args);
transport->hash_algo = data->header.hash_algo;
return ret;
}
static int close_bundle(struct transport *transport)
{
struct bundle_transport_data *data = transport->data;
if (data->fd > 0)
close(data->fd);
bundle_header_release(&data->header);
free(data);
return 0;
}
struct git_transport_data {
struct git_transport_options options;
struct child_process *conn;
int fd[2];
unsigned got_remote_heads : 1;
enum protocol_version version;
struct oid_array extra_have;
struct oid_array shallow;
};
static int set_git_option(struct git_transport_options *opts,
const char *name, const char *value)
{
if (!strcmp(name, TRANS_OPT_UPLOADPACK)) {
opts->uploadpack = value;
return 0;
} else if (!strcmp(name, TRANS_OPT_RECEIVEPACK)) {
opts->receivepack = value;
return 0;
} else if (!strcmp(name, TRANS_OPT_THIN)) {
opts->thin = !!value;
return 0;
} else if (!strcmp(name, TRANS_OPT_FOLLOWTAGS)) {
opts->followtags = !!value;
return 0;
} else if (!strcmp(name, TRANS_OPT_KEEP)) {
opts->keep = !!value;
return 0;
} else if (!strcmp(name, TRANS_OPT_UPDATE_SHALLOW)) {
opts->update_shallow = !!value;
return 0;
} else if (!strcmp(name, TRANS_OPT_DEPTH)) {
if (!value)
opts->depth = 0;
else {
char *end;
opts->depth = strtol(value, &end, 0);
if (*end)
die(_("transport: invalid depth option '%s'"), value);
}
return 0;
} else if (!strcmp(name, TRANS_OPT_DEEPEN_SINCE)) {
opts->deepen_since = value;
return 0;
} else if (!strcmp(name, TRANS_OPT_DEEPEN_NOT)) {
opts->deepen_not = (const struct string_list *)value;
return 0;
fetch, upload-pack: --deepen=N extends shallow boundary by N commits In git-fetch, --depth argument is always relative with the latest remote refs. This makes it a bit difficult to cover this use case, where the user wants to make the shallow history, say 3 levels deeper. It would work if remote refs have not moved yet, but nobody can guarantee that, especially when that use case is performed a couple months after the last clone or "git fetch --depth". Also, modifying shallow boundary using --depth does not work well with clones created by --since or --not. This patch fixes that. A new argument --deepen=<N> will add <N> more (*) parent commits to the current history regardless of where remote refs are. Have/Want negotiation is still respected. So if remote refs move, the server will send two chunks: one between "have" and "want" and another to extend shallow history. In theory, the client could send no "want"s in order to get the second chunk only. But the protocol does not allow that. Either you send no want lines, which means ls-remote; or you have to send at least one want line that carries deep-relative to the server.. The main work was done by Dongcan Jiang. I fixed it up here and there. And of course all the bugs belong to me. (*) We could even support --deepen=<N> where <N> is negative. In that case we can cut some history from the shallow clone. This operation (and --depth=<shorter depth>) does not require interaction with remote side (and more complicated to implement as a result). Helped-by: Duy Nguyen <pclouds@gmail.com> Helped-by: Eric Sunshine <sunshine@sunshineco.com> Helped-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Dongcan Jiang <dongcan.jiang@gmail.com> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-12 18:54:09 +08:00
} else if (!strcmp(name, TRANS_OPT_DEEPEN_RELATIVE)) {
opts->deepen_relative = !!value;
return 0;
} else if (!strcmp(name, TRANS_OPT_FROM_PROMISOR)) {
opts->from_promisor = !!value;
return 0;
} else if (!strcmp(name, TRANS_OPT_LIST_OBJECTS_FILTER)) {
list_objects_filter_die_if_populated(&opts->filter_options);
parse_list_objects_filter(&opts->filter_options, value);
return 0;
} else if (!strcmp(name, TRANS_OPT_REFETCH)) {
opts->refetch = !!value;
return 0;
} else if (!strcmp(name, TRANS_OPT_REJECT_SHALLOW)) {
opts->reject_shallow = !!value;
return 0;
}
return 1;
}
static int connect_setup(struct transport *transport, int for_push)
{
struct git_transport_data *data = transport->data;
int flags = transport->verbose > 0 ? CONNECT_VERBOSE : 0;
if (data->conn)
return 0;
switch (transport->family) {
case TRANSPORT_FAMILY_ALL: break;
case TRANSPORT_FAMILY_IPV4: flags |= CONNECT_IPV4; break;
case TRANSPORT_FAMILY_IPV6: flags |= CONNECT_IPV6; break;
}
data->conn = git_connect(data->fd, transport->url,
for_push ? data->options.receivepack :
data->options.uploadpack,
flags);
return 0;
}
static void die_if_server_options(struct transport *transport)
{
if (!transport->server_options || !transport->server_options->nr)
return;
advise(_("see protocol.version in 'git help config' for more details"));
die(_("server options require protocol version 2 or later"));
}
/*
* Obtains the protocol version from the transport and writes it to
* transport->data->version, first connecting if not already connected.
*
* If the protocol version is one that allows skipping the listing of remote
* refs, and must_list_refs is 0, the listing of remote refs is skipped and
* this function returns NULL. Otherwise, this function returns the list of
* remote refs.
*/
static struct ref *handshake(struct transport *transport, int for_push,
struct transport_ls_refs_options *options,
int must_list_refs)
{
struct git_transport_data *data = transport->data;
struct ref *refs = NULL;
struct packet_reader reader;
int sid_len;
const char *server_sid;
connect_setup(transport, for_push);
packet_reader_init(&reader, data->fd[0], NULL, 0,
PACKET_READ_CHOMP_NEWLINE |
PACKET_READ_GENTLE_ON_EOF |
PACKET_READ_DIE_ON_ERR_PACKET);
data->version = discover_version(&reader);
switch (data->version) {
case protocol_v2:
if (server_feature_v2("session-id", &server_sid))
trace2_data_string("transfer", NULL, "server-sid", server_sid);
if (must_list_refs)
get_remote_refs(data->fd[1], &reader, &refs, for_push,
options,
stateless-connect: send response end packet Currently, remote-curl acts as a proxy and blindly forwards packets between an HTTP server and fetch-pack. In the case of a stateless RPC connection where the connection is terminated before the transaction is complete, remote-curl will blindly forward the packets before waiting on more input from fetch-pack. Meanwhile, fetch-pack will read the transaction and continue reading, expecting more input to continue the transaction. This results in a deadlock between the two processes. This can be seen in the following command which does not terminate: $ git -c protocol.version=2 clone https://github.com/git/git.git --shallow-since=20151012 Cloning into 'git'... whereas the v1 version does terminate as expected: $ git -c protocol.version=1 clone https://github.com/git/git.git --shallow-since=20151012 Cloning into 'git'... fatal: the remote end hung up unexpectedly Instead of blindly forwarding packets, make remote-curl insert a response end packet after proxying the responses from the remote server when using stateless_connect(). On the RPC client side, ensure that each response ends as described. A separate control packet is chosen because we need to be able to differentiate between what the remote server sends and remote-curl's control packets. By ensuring in the remote-curl code that a server cannot send response end packets, we prevent a malicious server from being able to perform a denial of service attack in which they spoof a response end packet and cause the described deadlock to happen. Reported-by: Force Charlie <charlieio@outlook.com> Helped-by: Jeff King <peff@peff.net> Signed-off-by: Denton Liu <liu.denton@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-05-19 18:54:00 +08:00
transport->server_options,
transport->stateless_rpc);
break;
case protocol_v1:
case protocol_v0:
die_if_server_options(transport);
get_remote_heads(&reader, &refs,
for_push ? REF_NORMAL : 0,
&data->extra_have,
&data->shallow);
server_sid = server_feature_value("session-id", &sid_len);
if (server_sid) {
char *sid = xstrndup(server_sid, sid_len);
trace2_data_string("transfer", NULL, "server-sid", sid);
free(sid);
}
break;
case protocol_unknown_version:
BUG("unknown protocol version");
}
data->got_remote_heads = 1;
transport->hash_algo = reader.hash_algo;
if (reader.line_peeked)
BUG("buffer must be empty at the end of handshake()");
return refs;
}
static struct ref *get_refs_via_connect(struct transport *transport, int for_push,
struct transport_ls_refs_options *options)
{
return handshake(transport, for_push, options, 1);
}
static int fetch_refs_via_pack(struct transport *transport,
fetch-pack: unify ref in and out param When a user fetches: - at least one up-to-date ref and at least one non-up-to-date ref, - using HTTP with protocol v0 (or something else that uses the fetch command of a remote helper) some refs might not be updated after the fetch. This bug was introduced in commit 989b8c4452 ("fetch-pack: put shallow info in output parameter", 2018-06-28) which allowed transports to report the refs that they have fetched in a new out-parameter "fetched_refs". If they do so, transport_fetch_refs() makes this information available to its caller. Users of "fetched_refs" rely on the following 3 properties: (1) it is the complete list of refs that was passed to transport_fetch_refs(), (2) it has shallow information (REF_STATUS_REJECT_SHALLOW set if relevant), and (3) it has updated OIDs if ref-in-want was used (introduced after 989b8c4452). In an effort to satisfy (1), whenever transport_fetch_refs() filters the refs sent to the transport, it re-adds the filtered refs to whatever the transport supplies before returning it to the user. However, the implementation in 989b8c4452 unconditionally re-adds the filtered refs without checking if the transport refrained from reporting anything in "fetched_refs" (which it is allowed to do), resulting in an incomplete list, no longer satisfying (1). An earlier effort to resolve this [1] solved the issue by readding the filtered refs only if the transport did not refrain from reporting in "fetched_refs", but after further discussion, it seems that the better solution is to revert the API change that introduced "fetched_refs". This API change was first suggested as part of a ref-in-want implementation that allowed for ref patterns and, thus, there could be drastic differences between the input refs and the refs actually fetched [2]; we eventually decided to only allow exact ref names, but this API change remained even though its necessity was decreased. Therefore, revert this API change by reverting commit 989b8c4452, and make receive_wanted_refs() update the OIDs in the sought array (like how update_shallow() updates shallow information in the sought array) instead. A test is also included to show that the user-visible bug discussed at the beginning of this commit message no longer exists. [1] https://public-inbox.org/git/20180801171806.GA122458@google.com/ [2] https://public-inbox.org/git/86a128c5fb710a41791e7183207c4d64889f9307.1485381677.git.jonathantanmy@google.com/ Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-08-02 04:13:20 +08:00
int nr_heads, struct ref **to_fetch)
{
int ret = 0;
struct git_transport_data *data = transport->data;
struct ref *refs = NULL;
struct fetch_pack_args args;
struct ref *refs_tmp = NULL;
memset(&args, 0, sizeof(args));
args.uploadpack = data->options.uploadpack;
args.keep_pack = data->options.keep;
args.lock_pack = 1;
args.use_thin_pack = data->options.thin;
args.include_tag = data->options.followtags;
make "git push -v" actually verbose Providing a single "-v" to "git push" currently does nothing. Giving two flags ("git push -v -v") turns on the first level of verbosity. This is caused by a regression introduced in 8afd8dc (push: support multiple levels of verbosity, 2010-02-24). Before the series containing 8afd8dc, the verbosity handling for fetching and pushing was completely separate. Commit bde873c refactored the verbosity handling out of the fetch side, and then 8afd8dc converted push to use the refactored code. However, the fetch and push sides numbered and passed along their verbosity levels differently. For both, a verbosity level of "-1" meant "quiet", and "0" meant "default output". But from there they differed. For fetch, a verbosity level of "1" indicated to the "fetch" program that it should make the status table slightly more verbose, showing up-to-date entries. A verbosity level of "2" meant that we should pass a verbose flag to the transport; in the case of fetch-pack, this displays protocol debugging information. As a result, the refactored code in bde873c checks for "verbosity >= 2", and only then passes it on to the transport. From the transport code's perspective, a verbosity of 0 or 1 both meant "0". Push, on the other hand, does not show its own status table; that is always handled by the transport layer or below (originally send-pack itself, but these days it is done by the transport code). So a verbosity level of 1 meant that we should pass the verbose flag to send-pack, so that it knows we want a verbose status table. However, once 8afd8dc switched it to the refactored fetch code, a verbosity level of 1 was now being ignored. Thus, you needed to artificially bump the verbosity to 2 (via "-v -v") to have any effect. We can fix this by letting the transport code know about the true verbosity level (i.e., let it distinguish level 0 or 1). We then have to also make an adjustment to any transport methods that assumed "verbose > 0" meant they could spew lots of debugging information. Before, they could only get "0" or "2", but now they will also receive "1". They need to adjust their condition for turning on such spew from "verbose > 0" to "verbose > 1". Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-12-17 17:37:15 +08:00
args.verbose = (transport->verbose > 1);
args.quiet = (transport->verbose < 0);
args.no_progress = !transport->progress;
args.depth = data->options.depth;
args.deepen_since = data->options.deepen_since;
args.deepen_not = data->options.deepen_not;
fetch, upload-pack: --deepen=N extends shallow boundary by N commits In git-fetch, --depth argument is always relative with the latest remote refs. This makes it a bit difficult to cover this use case, where the user wants to make the shallow history, say 3 levels deeper. It would work if remote refs have not moved yet, but nobody can guarantee that, especially when that use case is performed a couple months after the last clone or "git fetch --depth". Also, modifying shallow boundary using --depth does not work well with clones created by --since or --not. This patch fixes that. A new argument --deepen=<N> will add <N> more (*) parent commits to the current history regardless of where remote refs are. Have/Want negotiation is still respected. So if remote refs move, the server will send two chunks: one between "have" and "want" and another to extend shallow history. In theory, the client could send no "want"s in order to get the second chunk only. But the protocol does not allow that. Either you send no want lines, which means ls-remote; or you have to send at least one want line that carries deep-relative to the server.. The main work was done by Dongcan Jiang. I fixed it up here and there. And of course all the bugs belong to me. (*) We could even support --deepen=<N> where <N> is negative. In that case we can cut some history from the shallow clone. This operation (and --depth=<shorter depth>) does not require interaction with remote side (and more complicated to implement as a result). Helped-by: Duy Nguyen <pclouds@gmail.com> Helped-by: Eric Sunshine <sunshine@sunshineco.com> Helped-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Dongcan Jiang <dongcan.jiang@gmail.com> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-12 18:54:09 +08:00
args.deepen_relative = data->options.deepen_relative;
clone: open a shortcut for connectivity check In order to make sure the cloned repository is good, we run "rev-list --objects --not --all $new_refs" on the repository. This is expensive on large repositories. This patch attempts to mitigate the impact in this special case. In the "good" clone case, we only have one pack. If all of the following are met, we can be sure that all objects reachable from the new refs exist, which is the intention of running "rev-list ...": - all refs point to an object in the pack - there are no dangling pointers in any object in the pack - no objects in the pack point to objects outside the pack The second and third checks can be done with the help of index-pack as a slight variation of --strict check (which introduces a new condition for the shortcut: pack transfer must be used and the number of objects large enough to call index-pack). The first is checked in check_everything_connected after we get an "ok" from index-pack. "index-pack + new checks" is still faster than the current "index-pack + rev-list", which is the whole point of this patch. If any of the conditions fail, we fall back to the good old but expensive "rev-list ..". In that case it's even more expensive because we have to pay for the new checks in index-pack. But that should only happen when the other side is either buggy or malicious. Cloning linux-2.6 over file:// before after real 3m25.693s 2m53.050s user 5m2.037s 4m42.396s sys 0m13.750s 0m16.574s A more realistic test with ssh:// over wireless before after real 11m26.629s 10m4.213s user 5m43.196s 5m19.444s sys 0m35.812s 0m37.630s This shortcut is not applied to shallow clones, partly because shallow clones should have no more objects than a usual fetch and the cost of rev-list is acceptable, partly to avoid dealing with corner cases when grafting is involved. This shortcut does not apply to unpack-objects code path either because the number of objects must be small in order to trigger that code path. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-05-26 09:16:17 +08:00
args.check_self_contained_and_connected =
data->options.check_self_contained_and_connected;
args.cloning = transport->cloning;
args.update_shallow = data->options.update_shallow;
args.from_promisor = data->options.from_promisor;
args.filter_options = data->options.filter_options;
args.refetch = data->options.refetch;
args.stateless_rpc = transport->stateless_rpc;
args.server_options = transport->server_options;
args.negotiation_tips = data->options.negotiation_tips;
args.reject_shallow_remote = transport->smart_options->reject_shallow;
if (!data->got_remote_heads) {
int i;
int must_list_refs = 0;
for (i = 0; i < nr_heads; i++) {
if (!to_fetch[i]->exact_oid) {
must_list_refs = 1;
break;
}
}
refs_tmp = handshake(transport, 0, NULL, must_list_refs);
}
if (data->version == protocol_unknown_version)
BUG("unknown protocol version");
else if (data->version <= protocol_v1)
die_if_server_options(transport);
fetch: teach independent negotiation (no packfile) Currently, the packfile negotiation step within a Git fetch cannot be done independent of sending the packfile, even though there is at least one application wherein this is useful. Therefore, make it possible for this negotiation step to be done independently. A subsequent commit will use this for one such application - push negotiation. This feature is for protocol v2 only. (An implementation for protocol v0 would require a separate implementation in the fetch, transport, and transport helper code.) In the protocol, the main hindrance towards independent negotiation is that the server can unilaterally decide to send the packfile. This is solved by a "wait-for-done" argument: the server will then wait for the client to say "done". In practice, the client will never say it; instead it will cease requests once it is satisfied. In the client, the main change lies in the transport and transport helper code. fetch_refs_via_pack() performs everything needed - protocol version and capability checks, and the negotiation itself. There are 2 code paths that do not go through fetch_refs_via_pack() that needed to be individually excluded: the bundle transport (excluded through requiring smart_options, which the bundle transport doesn't support) and transport helpers that do not support takeover. If or when we support independent negotiation for protocol v0, we will need to modify these 2 code paths to support it. But for now, report failure if independent negotiation is requested in these cases. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-05-05 05:16:01 +08:00
if (data->options.acked_commits) {
if (data->version < protocol_v2) {
warning(_("--negotiate-only requires protocol v2"));
ret = -1;
} else if (!server_supports_feature("fetch", "wait-for-done", 0)) {
warning(_("server does not support wait-for-done"));
ret = -1;
} else {
negotiate_using_fetch(data->options.negotiation_tips,
transport->server_options,
transport->stateless_rpc,
data->fd,
data->options.acked_commits);
ret = 0;
}
goto cleanup;
}
refs = fetch_pack(&args, data->fd,
refs_tmp ? refs_tmp : transport->remote_refs,
to_fetch, nr_heads, &data->shallow,
&transport->pack_lockfiles, data->version);
data->got_remote_heads = 0;
clone: open a shortcut for connectivity check In order to make sure the cloned repository is good, we run "rev-list --objects --not --all $new_refs" on the repository. This is expensive on large repositories. This patch attempts to mitigate the impact in this special case. In the "good" clone case, we only have one pack. If all of the following are met, we can be sure that all objects reachable from the new refs exist, which is the intention of running "rev-list ...": - all refs point to an object in the pack - there are no dangling pointers in any object in the pack - no objects in the pack point to objects outside the pack The second and third checks can be done with the help of index-pack as a slight variation of --strict check (which introduces a new condition for the shortcut: pack transfer must be used and the number of objects large enough to call index-pack). The first is checked in check_everything_connected after we get an "ok" from index-pack. "index-pack + new checks" is still faster than the current "index-pack + rev-list", which is the whole point of this patch. If any of the conditions fail, we fall back to the good old but expensive "rev-list ..". In that case it's even more expensive because we have to pay for the new checks in index-pack. But that should only happen when the other side is either buggy or malicious. Cloning linux-2.6 over file:// before after real 3m25.693s 2m53.050s user 5m2.037s 4m42.396s sys 0m13.750s 0m16.574s A more realistic test with ssh:// over wireless before after real 11m26.629s 10m4.213s user 5m43.196s 5m19.444s sys 0m35.812s 0m37.630s This shortcut is not applied to shallow clones, partly because shallow clones should have no more objects than a usual fetch and the cost of rev-list is acceptable, partly to avoid dealing with corner cases when grafting is involved. This shortcut does not apply to unpack-objects code path either because the number of objects must be small in order to trigger that code path. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-05-26 09:16:17 +08:00
data->options.self_contained_and_connected =
args.self_contained_and_connected;
fetch-pack: write shallow, then check connectivity When fetching, connectivity is checked after the shallow file is updated. There are 2 issues with this: (1) the connectivity check is only performed up to ancestors of existing refs (which is not thorough enough if we were deepening an existing ref in the first place), and (2) there is no rollback of the shallow file if the connectivity check fails. To solve (1), update the connectivity check to check the ancestry chain completely in the case of a deepening fetch by refraining from passing "--not --all" when invoking rev-list in connected.c. To solve (2), have fetch_pack() perform its own connectivity check before updating the shallow file. To support existing use cases in which "git fetch-pack" is used to download objects without much regard as to the connectivity of the resulting objects with respect to the existing repository, the connectivity check is only done if necessary (that is, the fetch is not a clone, and the fetch involves shallow/deepen functionality). "git fetch" still performs its own connectivity check, preserving correctness but sometimes performing redundant work. This redundancy is mitigated by the fact that fetch_pack() reports if it has performed a connectivity check itself, and if the transport supports connect or stateless-connect, it will bubble up that report so that "git fetch" knows not to perform the connectivity check in such a case. This was noticed when a user tried to deepen an existing repository by fetching with --no-shallow from a server that did not send all necessary objects - the connectivity check as run by "git fetch" succeeded, but a subsequent "git fsck" failed. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-03 06:08:43 +08:00
data->options.connectivity_checked = args.connectivity_checked;
if (refs == NULL)
ret = -1;
if (report_unmatched_refs(to_fetch, nr_heads))
ret = -1;
fetch: teach independent negotiation (no packfile) Currently, the packfile negotiation step within a Git fetch cannot be done independent of sending the packfile, even though there is at least one application wherein this is useful. Therefore, make it possible for this negotiation step to be done independently. A subsequent commit will use this for one such application - push negotiation. This feature is for protocol v2 only. (An implementation for protocol v0 would require a separate implementation in the fetch, transport, and transport helper code.) In the protocol, the main hindrance towards independent negotiation is that the server can unilaterally decide to send the packfile. This is solved by a "wait-for-done" argument: the server will then wait for the client to say "done". In practice, the client will never say it; instead it will cease requests once it is satisfied. In the client, the main change lies in the transport and transport helper code. fetch_refs_via_pack() performs everything needed - protocol version and capability checks, and the negotiation itself. There are 2 code paths that do not go through fetch_refs_via_pack() that needed to be individually excluded: the bundle transport (excluded through requiring smart_options, which the bundle transport doesn't support) and transport helpers that do not support takeover. If or when we support independent negotiation for protocol v0, we will need to modify these 2 code paths to support it. But for now, report failure if independent negotiation is requested in these cases. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-05-05 05:16:01 +08:00
cleanup:
close(data->fd[0]);
fetch-pack: signal v2 server that we are done making requests When fetching with the v0 protocol over ssh (or a local upload-pack with pipes), the server closes the connection as soon as it is finished sending the pack. So even though the client may still be operating on the data via index-pack (e.g., resolving deltas, checking connectivity, etc), the server has released all resources. With the v2 protocol, however, the server considers the ssh session only as a transport, with individual requests coming over it. After sending the pack, it goes back to its main loop, waiting for another request to come from the client. As a result, the ssh session hangs around until the client process ends, which may be much later (because resolving deltas, etc, may consume a lot of CPU). This is bad for two reasons: - it's consuming resources on the server to leave open a connection that won't see any more use - if something bad happens to the ssh connection in the meantime (say, it gets killed by the network because it's idle, as happened in a real-world report), then ssh will exit non-zero, and we'll propagate the error up the stack. The server is correct here not to hang up after serving the pack. The v2 protocol's design is meant to allow multiple requests like this, and hanging up would be the wrong thing for a hypothetical client which was planning to make more requests (though in practice, the git.git client never would, and I doubt any other implementations would either). The right thing is instead for the client to signal to the server that it's not interested in making more requests. We can do that by closing the pipe descriptor we use to write to ssh. This will propagate to the server upload-pack as an EOF when it tries to read the next request (and then it will close its half, and the whole connection will go away). It's important to do this "half duplex" shutdown, because we have to do it _before_ we actually receive the pack. This is an artifact of the way fetch-pack and index-pack (or unpack-objects) interact. We hand the connection off to index-pack (really, a sideband demuxer which feeds it), and then wait until it returns. And it doesn't do that until it has resolved all of the deltas in the pack, even though it was done reading from the server long before. So just closing the connection fully after index-pack returns would be too late; we'd have held it open much longer than was necessary. And teaching index-pack to close the connection is awkward. It's not even seeing the whole conversation (the sideband demuxer is, but it doesn't actually know what's in the packets, or when the end comes). Note that this close() is happening deep within the transport code. It's possible that a caller would want to perform other operations over the same ssh transport after receiving the pack. But as of the current code, none of the callers do, and there haven't been discussions of any plans to change this. If we need to support that later, we can probably do so by passing down a flag for "you're the last request on the transport; it's OK to close" instead of the code just assuming that's true. The description above all discusses v2 ssh, so it's worth thinking about how this interacts with other protocols: - in v0 protocols, we could do the same half-duplex shutdown (it just goes into the v0 do_fetch_pack() instead). This does work, but since it doesn't have the same persistence problem in the first place, there's little reason to change it at this point. - local fetches against git-upload-pack on the same machine will behave the same as ssh (they are talking over two pipes, and see EOF on their input pipe) - fetches against git-daemon will run this same code, and close one of the descriptors. In practice, this won't do anything, since there our two descriptors are dups of each other, and not part of a half-duplex pair. The right thing would probably be to call shutdown(SHUT_WR) on it. I didn't bother with that here. It doesn't face the same error-code problem (since it's just a TCP connection), so it's really only an optimization problem. And git:// is not that widely used these days, and has less impact on server resources than an ssh termination. - v2 http doesn't suffer from this problem in the first place, as our pipes terminate at a local git-remote-https, which is passing data along as individual requests via curl. Probably curl is keeping the TCP/TLS connection open for more requests, and we might be able to tell it manually "hey, we are done making requests now". But I think that's much less important. It again doesn't suffer from the error-code problem, and HTTP keepalive is pretty well understood (importantly, the timeouts can be set low, because clients like curl know how to reconnect for subsequent requests if necessary). So it's probably not worth figuring out how to tell curl that we're done (though if we do, this patch is probably the first step anyway; fetch-pack closes the pipe back to remote-https, which would be the signal that it should tell curl we're done). The code is pretty straightforward. We close the pipe at the right moment, and set it to -1 to mark it as invalid. I modified the later cleanup code to avoid calling close(-1). That's not strictly necessary, since close(-1) is a noop, but hopefully makes things a bit more obvious to a reader. I suspect that trying to call more transport functions after the close() (e.g., calling transport_fetch_refs() again) would fail, as it's not smart enough to realize we need to re-open the ssh connection. But that's already true when v0 is in use. And no current callers want to do that (and again, the solution is probably a flag in the transport code to keep things open, which can be added later). There's no test here, as the situation it covers is inherently racy (the question is when upload-pack exits, compared to when index-pack finishes resolving deltas and exits). The rather gross shell snippet below does recreate the problematic situation; when run on a sufficiently-large repository (git.git works fine), it kills an "idle" upload-pack while the client is resolving deltas, leading to a failed clone. ( git clone --no-local --progress . foo.git 2>&1 echo >&2 "clone exit code=$?" ) | tr '\r' '\n' | while read line do case "$done,$line" in ,Resolving*) echo "hit resolving deltas; killing upload-pack" killall -9 git-upload-pack done=t ;; esac done Reported-by: Greg Pflaum <greg.pflaum@pnp-hcl.com> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-05-20 00:11:05 +08:00
if (data->fd[1] >= 0)
close(data->fd[1]);
fetch: teach independent negotiation (no packfile) Currently, the packfile negotiation step within a Git fetch cannot be done independent of sending the packfile, even though there is at least one application wherein this is useful. Therefore, make it possible for this negotiation step to be done independently. A subsequent commit will use this for one such application - push negotiation. This feature is for protocol v2 only. (An implementation for protocol v0 would require a separate implementation in the fetch, transport, and transport helper code.) In the protocol, the main hindrance towards independent negotiation is that the server can unilaterally decide to send the packfile. This is solved by a "wait-for-done" argument: the server will then wait for the client to say "done". In practice, the client will never say it; instead it will cease requests once it is satisfied. In the client, the main change lies in the transport and transport helper code. fetch_refs_via_pack() performs everything needed - protocol version and capability checks, and the negotiation itself. There are 2 code paths that do not go through fetch_refs_via_pack() that needed to be individually excluded: the bundle transport (excluded through requiring smart_options, which the bundle transport doesn't support) and transport helpers that do not support takeover. If or when we support independent negotiation for protocol v0, we will need to modify these 2 code paths to support it. But for now, report failure if independent negotiation is requested in these cases. Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-05-05 05:16:01 +08:00
if (finish_connect(data->conn))
ret = -1;
data->conn = NULL;
free_refs(refs_tmp);
fetch-pack: unify ref in and out param When a user fetches: - at least one up-to-date ref and at least one non-up-to-date ref, - using HTTP with protocol v0 (or something else that uses the fetch command of a remote helper) some refs might not be updated after the fetch. This bug was introduced in commit 989b8c4452 ("fetch-pack: put shallow info in output parameter", 2018-06-28) which allowed transports to report the refs that they have fetched in a new out-parameter "fetched_refs". If they do so, transport_fetch_refs() makes this information available to its caller. Users of "fetched_refs" rely on the following 3 properties: (1) it is the complete list of refs that was passed to transport_fetch_refs(), (2) it has shallow information (REF_STATUS_REJECT_SHALLOW set if relevant), and (3) it has updated OIDs if ref-in-want was used (introduced after 989b8c4452). In an effort to satisfy (1), whenever transport_fetch_refs() filters the refs sent to the transport, it re-adds the filtered refs to whatever the transport supplies before returning it to the user. However, the implementation in 989b8c4452 unconditionally re-adds the filtered refs without checking if the transport refrained from reporting anything in "fetched_refs" (which it is allowed to do), resulting in an incomplete list, no longer satisfying (1). An earlier effort to resolve this [1] solved the issue by readding the filtered refs only if the transport did not refrain from reporting in "fetched_refs", but after further discussion, it seems that the better solution is to revert the API change that introduced "fetched_refs". This API change was first suggested as part of a ref-in-want implementation that allowed for ref patterns and, thus, there could be drastic differences between the input refs and the refs actually fetched [2]; we eventually decided to only allow exact ref names, but this API change remained even though its necessity was decreased. Therefore, revert this API change by reverting commit 989b8c4452, and make receive_wanted_refs() update the OIDs in the sought array (like how update_shallow() updates shallow information in the sought array) instead. A test is also included to show that the user-visible bug discussed at the beginning of this commit message no longer exists. [1] https://public-inbox.org/git/20180801171806.GA122458@google.com/ [2] https://public-inbox.org/git/86a128c5fb710a41791e7183207c4d64889f9307.1485381677.git.jonathantanmy@google.com/ Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-08-02 04:13:20 +08:00
free_refs(refs);
return ret;
}
static int push_had_errors(struct ref *ref)
{
for (; ref; ref = ref->next) {
switch (ref->status) {
case REF_STATUS_NONE:
case REF_STATUS_UPTODATE:
case REF_STATUS_OK:
break;
default:
return 1;
}
}
return 0;
}
int transport_refs_pushed(struct ref *ref)
{
for (; ref; ref = ref->next) {
switch(ref->status) {
case REF_STATUS_NONE:
case REF_STATUS_UPTODATE:
break;
default:
return 1;
}
}
return 0;
}
static void update_one_tracking_ref(struct remote *remote, char *refname,
struct object_id *new_oid, int deletion,
int verbose)
{
struct refspec_item rs;
memset(&rs, 0, sizeof(rs));
rs.src = refname;
rs.dst = NULL;
if (!remote_find_tracking(remote, &rs)) {
if (verbose)
fprintf(stderr, "updating local tracking ref '%s'\n", rs.dst);
if (deletion)
delete_ref(NULL, rs.dst, NULL, 0);
else
update_ref("update by push", rs.dst, new_oid,
NULL, 0, 0);
free(rs.dst);
}
}
void transport_update_tracking_ref(struct remote *remote, struct ref *ref, int verbose)
{
char *refname;
struct object_id *new_oid;
struct ref_push_report *report;
if (ref->status != REF_STATUS_OK && ref->status != REF_STATUS_UPTODATE)
return;
report = ref->report;
if (!report)
update_one_tracking_ref(remote, ref->name, &ref->new_oid,
ref->deletion, verbose);
else
for (; report; report = report->next) {
refname = report->ref_name ? (char *)report->ref_name : ref->name;
new_oid = report->new_oid ? report->new_oid : &ref->new_oid;
update_one_tracking_ref(remote, refname, new_oid,
is_null_oid(new_oid), verbose);
}
}
static void print_ref_status(char flag, const char *summary,
struct ref *to, struct ref *from, const char *msg,
2020-08-27 23:45:46 +08:00
struct ref_push_report *report,
int porcelain, int summary_width)
{
2020-08-27 23:45:46 +08:00
const char *to_name;
if (report && report->ref_name)
to_name = report->ref_name;
else
to_name = to->name;
if (porcelain) {
if (from)
2020-08-27 23:45:46 +08:00
fprintf(stdout, "%c\t%s:%s\t", flag, from->name, to_name);
else
2020-08-27 23:45:46 +08:00
fprintf(stdout, "%c\t:%s\t", flag, to_name);
if (msg)
fprintf(stdout, "%s (%s)\n", summary, msg);
else
fprintf(stdout, "%s\n", summary);
} else {
const char *red = "", *reset = "";
if (push_had_errors(to)) {
red = transport_get_color(TRANSPORT_COLOR_REJECTED);
reset = transport_get_color(TRANSPORT_COLOR_RESET);
}
fprintf(stderr, " %s%c %-*s%s ", red, flag, summary_width,
summary, reset);
if (from)
2020-08-27 23:45:46 +08:00
fprintf(stderr, "%s -> %s",
prettify_refname(from->name),
prettify_refname(to_name));
else
2020-08-27 23:45:46 +08:00
fputs(prettify_refname(to_name), stderr);
if (msg) {
fputs(" (", stderr);
fputs(msg, stderr);
fputc(')', stderr);
}
fputc('\n', stderr);
}
}
2020-08-27 23:45:46 +08:00
static void print_ok_ref_status(struct ref *ref,
struct ref_push_report *report,
int porcelain, int summary_width)
{
2020-08-27 23:45:46 +08:00
struct object_id *old_oid;
struct object_id *new_oid;
const char *ref_name;
int forced_update;
if (report && report->old_oid)
old_oid = report->old_oid;
else
old_oid = &ref->old_oid;
if (report && report->new_oid)
new_oid = report->new_oid;
else
new_oid = &ref->new_oid;
if (report && report->forced_update)
forced_update = report->forced_update;
else
forced_update = ref->forced_update;
if (report && report->ref_name)
ref_name = report->ref_name;
else
ref_name = ref->name;
if (ref->deletion)
print_ref_status('-', "[deleted]", ref, NULL, NULL,
2020-08-27 23:45:46 +08:00
report, porcelain, summary_width);
else if (is_null_oid(old_oid))
print_ref_status('*',
2020-08-27 23:45:46 +08:00
(starts_with(ref_name, "refs/tags/")
? "[new tag]"
2020-08-27 23:45:46 +08:00
: (starts_with(ref_name, "refs/heads/")
? "[new branch]"
: "[new reference]")),
2020-08-27 23:45:46 +08:00
ref, ref->peer_ref, NULL,
report, porcelain, summary_width);
else {
struct strbuf quickref = STRBUF_INIT;
char type;
const char *msg;
2020-08-27 23:45:46 +08:00
strbuf_add_unique_abbrev(&quickref, old_oid,
DEFAULT_ABBREV);
2020-08-27 23:45:46 +08:00
if (forced_update) {
strbuf_addstr(&quickref, "...");
type = '+';
msg = "forced update";
} else {
strbuf_addstr(&quickref, "..");
type = ' ';
msg = NULL;
}
2020-08-27 23:45:46 +08:00
strbuf_add_unique_abbrev(&quickref, new_oid,
DEFAULT_ABBREV);
print_ref_status(type, quickref.buf, ref, ref->peer_ref, msg,
2020-08-27 23:45:46 +08:00
report, porcelain, summary_width);
strbuf_release(&quickref);
}
}
2020-08-27 23:45:46 +08:00
static int print_one_push_report(struct ref *ref, const char *dest, int count,
struct ref_push_report *report,
int porcelain, int summary_width)
{
if (!count) {
char *url = transport_anonymize_url(dest);
fprintf(porcelain ? stdout : stderr, "To %s\n", url);
free(url);
}
switch(ref->status) {
case REF_STATUS_NONE:
print_ref_status('X', "[no match]", ref, NULL, NULL,
2020-08-27 23:45:46 +08:00
report, porcelain, summary_width);
break;
case REF_STATUS_REJECT_NODELETE:
print_ref_status('!', "[rejected]", ref, NULL,
"remote does not support deleting refs",
2020-08-27 23:45:46 +08:00
report, porcelain, summary_width);
break;
case REF_STATUS_UPTODATE:
print_ref_status('=', "[up to date]", ref,
2020-08-27 23:45:46 +08:00
ref->peer_ref, NULL,
report, porcelain, summary_width);
break;
case REF_STATUS_REJECT_NONFASTFORWARD:
print_ref_status('!', "[rejected]", ref, ref->peer_ref,
2020-08-27 23:45:46 +08:00
"non-fast-forward",
report, porcelain, summary_width);
break;
case REF_STATUS_REJECT_ALREADY_EXISTS:
print_ref_status('!', "[rejected]", ref, ref->peer_ref,
2020-08-27 23:45:46 +08:00
"already exists",
report, porcelain, summary_width);
break;
push: introduce REJECT_FETCH_FIRST and REJECT_NEEDS_FORCE When we push to update an existing ref, if: * the object at the tip of the remote is not a commit; or * the object we are pushing is not a commit, it won't be correct to suggest to fetch, integrate and push again, as the old and new objects will not "merge". We should explain that the push must be forced when there is a non-committish object is involved in such a case. If we do not have the current object at the tip of the remote, we do not even know that object, when fetched, is something that can be merged. In such a case, suggesting to pull first just like non-fast-forward case may not be technically correct, but in practice, most such failures are seen when you try to push your work to a branch without knowing that somebody else already pushed to update the same branch since you forked, so "pull first" would work as a suggestion most of the time. And if the object at the tip is not a commit, "pull first" will fail, without making any permanent damage. As a side effect, it also makes the error message the user will get during the next "push" attempt easier to understand, now the user is aware that a non-commit object is involved. In these cases, the current code already rejects such a push on the client end, but we used the same error and advice messages as the ones used when rejecting a non-fast-forward push, i.e. pull from there and integrate before pushing again. Introduce new rejection reasons and reword the messages appropriately. [jc: with help by Peff on message details] Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-01-24 05:55:30 +08:00
case REF_STATUS_REJECT_FETCH_FIRST:
print_ref_status('!', "[rejected]", ref, ref->peer_ref,
2020-08-27 23:45:46 +08:00
"fetch first",
report, porcelain, summary_width);
push: introduce REJECT_FETCH_FIRST and REJECT_NEEDS_FORCE When we push to update an existing ref, if: * the object at the tip of the remote is not a commit; or * the object we are pushing is not a commit, it won't be correct to suggest to fetch, integrate and push again, as the old and new objects will not "merge". We should explain that the push must be forced when there is a non-committish object is involved in such a case. If we do not have the current object at the tip of the remote, we do not even know that object, when fetched, is something that can be merged. In such a case, suggesting to pull first just like non-fast-forward case may not be technically correct, but in practice, most such failures are seen when you try to push your work to a branch without knowing that somebody else already pushed to update the same branch since you forked, so "pull first" would work as a suggestion most of the time. And if the object at the tip is not a commit, "pull first" will fail, without making any permanent damage. As a side effect, it also makes the error message the user will get during the next "push" attempt easier to understand, now the user is aware that a non-commit object is involved. In these cases, the current code already rejects such a push on the client end, but we used the same error and advice messages as the ones used when rejecting a non-fast-forward push, i.e. pull from there and integrate before pushing again. Introduce new rejection reasons and reword the messages appropriately. [jc: with help by Peff on message details] Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-01-24 05:55:30 +08:00
break;
case REF_STATUS_REJECT_NEEDS_FORCE:
print_ref_status('!', "[rejected]", ref, ref->peer_ref,
2020-08-27 23:45:46 +08:00
"needs force",
report, porcelain, summary_width);
push: introduce REJECT_FETCH_FIRST and REJECT_NEEDS_FORCE When we push to update an existing ref, if: * the object at the tip of the remote is not a commit; or * the object we are pushing is not a commit, it won't be correct to suggest to fetch, integrate and push again, as the old and new objects will not "merge". We should explain that the push must be forced when there is a non-committish object is involved in such a case. If we do not have the current object at the tip of the remote, we do not even know that object, when fetched, is something that can be merged. In such a case, suggesting to pull first just like non-fast-forward case may not be technically correct, but in practice, most such failures are seen when you try to push your work to a branch without knowing that somebody else already pushed to update the same branch since you forked, so "pull first" would work as a suggestion most of the time. And if the object at the tip is not a commit, "pull first" will fail, without making any permanent damage. As a side effect, it also makes the error message the user will get during the next "push" attempt easier to understand, now the user is aware that a non-commit object is involved. In these cases, the current code already rejects such a push on the client end, but we used the same error and advice messages as the ones used when rejecting a non-fast-forward push, i.e. pull from there and integrate before pushing again. Introduce new rejection reasons and reword the messages appropriately. [jc: with help by Peff on message details] Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-01-24 05:55:30 +08:00
break;
case REF_STATUS_REJECT_STALE:
print_ref_status('!', "[rejected]", ref, ref->peer_ref,
2020-08-27 23:45:46 +08:00
"stale info",
report, porcelain, summary_width);
break;
case REF_STATUS_REJECT_REMOTE_UPDATED:
print_ref_status('!', "[rejected]", ref, ref->peer_ref,
"remote ref updated since checkout",
report, porcelain, summary_width);
break;
case REF_STATUS_REJECT_SHALLOW:
print_ref_status('!', "[rejected]", ref, ref->peer_ref,
"new shallow roots not allowed",
2020-08-27 23:45:46 +08:00
report, porcelain, summary_width);
break;
case REF_STATUS_REMOTE_REJECT:
print_ref_status('!', "[remote rejected]", ref,
ref->deletion ? NULL : ref->peer_ref,
2020-08-27 23:45:46 +08:00
ref->remote_status,
report, porcelain, summary_width);
break;
case REF_STATUS_EXPECTING_REPORT:
print_ref_status('!', "[remote failure]", ref,
ref->deletion ? NULL : ref->peer_ref,
"remote failed to report status",
2020-08-27 23:45:46 +08:00
report, porcelain, summary_width);
break;
case REF_STATUS_ATOMIC_PUSH_FAILED:
print_ref_status('!', "[rejected]", ref, ref->peer_ref,
2020-08-27 23:45:46 +08:00
"atomic push failed",
report, porcelain, summary_width);
break;
case REF_STATUS_OK:
2020-08-27 23:45:46 +08:00
print_ok_ref_status(ref, report, porcelain, summary_width);
break;
}
return 1;
}
2020-08-27 23:45:46 +08:00
static int print_one_push_status(struct ref *ref, const char *dest, int count,
int porcelain, int summary_width)
{
struct ref_push_report *report;
int n = 0;
if (!ref->report)
return print_one_push_report(ref, dest, count,
NULL, porcelain, summary_width);
for (report = ref->report; report; report = report->next)
print_one_push_report(ref, dest, count + n++,
report, porcelain, summary_width);
return n;
}
static int measure_abbrev(const struct object_id *oid, int sofar)
{
char hex[GIT_MAX_HEXSZ + 1];
int w = find_unique_abbrev_r(hex, oid, DEFAULT_ABBREV);
return (w < sofar) ? sofar : w;
}
int transport_summary_width(const struct ref *refs)
{
int maxw = -1;
for (; refs; refs = refs->next) {
maxw = measure_abbrev(&refs->old_oid, maxw);
maxw = measure_abbrev(&refs->new_oid, maxw);
}
if (maxw < 0)
maxw = FALLBACK_DEFAULT_ABBREV;
return (2 * maxw + 3);
}
void transport_print_push_status(const char *dest, struct ref *refs,
int verbose, int porcelain, unsigned int *reject_reasons)
{
struct ref *ref;
int n = 0;
push: Provide situational hints for non-fast-forward errors Pushing a non-fast-forward update to a remote repository will result in an error, but the hint text doesn't provide the correct resolution in every case. Give better resolution advice in three push scenarios: 1) If you push your current branch and it triggers a non-fast-forward error, you should merge remote changes with 'git pull' before pushing again. 2) If you push to a shared repository others push to, and your local tracking branches are not kept up to date, the 'matching refs' default will generate non-fast-forward errors on outdated branches. If this is your workflow, the 'matching refs' default is not for you. Consider setting the 'push.default' configuration variable to 'current' or 'upstream' to ensure only your current branch is pushed. 3) If you explicitly specify a ref that is not your current branch or push matching branches with ':', you will generate a non-fast-forward error if any pushed branch tip is out of date. You should checkout the offending branch and merge remote changes before pushing again. Teach transport.c to recognize these scenarios and configure push.c to hint for them. If 'git push's default behavior changes or we discover more scenarios, extension is easy. Standardize on the advice API and add three new advice variables, 'pushNonFFCurrent', 'pushNonFFDefault', and 'pushNonFFMatching'. Setting any of these to 'false' will disable their affiliated advice. Setting 'pushNonFastForward' to false will disable all three, thus preserving the config option for users who already set it, but guaranteeing new users won't disable push advice accidentally. Based-on-patch-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Christopher Tiwald <christiwald@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-03-20 12:31:33 +08:00
char *head;
int summary_width = transport_summary_width(refs);
push: Provide situational hints for non-fast-forward errors Pushing a non-fast-forward update to a remote repository will result in an error, but the hint text doesn't provide the correct resolution in every case. Give better resolution advice in three push scenarios: 1) If you push your current branch and it triggers a non-fast-forward error, you should merge remote changes with 'git pull' before pushing again. 2) If you push to a shared repository others push to, and your local tracking branches are not kept up to date, the 'matching refs' default will generate non-fast-forward errors on outdated branches. If this is your workflow, the 'matching refs' default is not for you. Consider setting the 'push.default' configuration variable to 'current' or 'upstream' to ensure only your current branch is pushed. 3) If you explicitly specify a ref that is not your current branch or push matching branches with ':', you will generate a non-fast-forward error if any pushed branch tip is out of date. You should checkout the offending branch and merge remote changes before pushing again. Teach transport.c to recognize these scenarios and configure push.c to hint for them. If 'git push's default behavior changes or we discover more scenarios, extension is easy. Standardize on the advice API and add three new advice variables, 'pushNonFFCurrent', 'pushNonFFDefault', and 'pushNonFFMatching'. Setting any of these to 'false' will disable their affiliated advice. Setting 'pushNonFastForward' to false will disable all three, thus preserving the config option for users who already set it, but guaranteeing new users won't disable push advice accidentally. Based-on-patch-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Christopher Tiwald <christiwald@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-03-20 12:31:33 +08:00
if (transport_color_config() < 0)
warning(_("could not parse transport.color.* config"));
head = resolve_refdup("HEAD", RESOLVE_REF_READING, NULL, NULL);
if (verbose) {
for (ref = refs; ref; ref = ref->next)
if (ref->status == REF_STATUS_UPTODATE)
n += print_one_push_status(ref, dest, n,
porcelain, summary_width);
}
for (ref = refs; ref; ref = ref->next)
if (ref->status == REF_STATUS_OK)
n += print_one_push_status(ref, dest, n,
porcelain, summary_width);
*reject_reasons = 0;
for (ref = refs; ref; ref = ref->next) {
if (ref->status != REF_STATUS_NONE &&
ref->status != REF_STATUS_UPTODATE &&
ref->status != REF_STATUS_OK)
n += print_one_push_status(ref, dest, n,
porcelain, summary_width);
if (ref->status == REF_STATUS_REJECT_NONFASTFORWARD) {
if (head != NULL && !strcmp(head, ref->name))
*reject_reasons |= REJECT_NON_FF_HEAD;
push: Provide situational hints for non-fast-forward errors Pushing a non-fast-forward update to a remote repository will result in an error, but the hint text doesn't provide the correct resolution in every case. Give better resolution advice in three push scenarios: 1) If you push your current branch and it triggers a non-fast-forward error, you should merge remote changes with 'git pull' before pushing again. 2) If you push to a shared repository others push to, and your local tracking branches are not kept up to date, the 'matching refs' default will generate non-fast-forward errors on outdated branches. If this is your workflow, the 'matching refs' default is not for you. Consider setting the 'push.default' configuration variable to 'current' or 'upstream' to ensure only your current branch is pushed. 3) If you explicitly specify a ref that is not your current branch or push matching branches with ':', you will generate a non-fast-forward error if any pushed branch tip is out of date. You should checkout the offending branch and merge remote changes before pushing again. Teach transport.c to recognize these scenarios and configure push.c to hint for them. If 'git push's default behavior changes or we discover more scenarios, extension is easy. Standardize on the advice API and add three new advice variables, 'pushNonFFCurrent', 'pushNonFFDefault', and 'pushNonFFMatching'. Setting any of these to 'false' will disable their affiliated advice. Setting 'pushNonFastForward' to false will disable all three, thus preserving the config option for users who already set it, but guaranteeing new users won't disable push advice accidentally. Based-on-patch-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Christopher Tiwald <christiwald@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-03-20 12:31:33 +08:00
else
*reject_reasons |= REJECT_NON_FF_OTHER;
} else if (ref->status == REF_STATUS_REJECT_ALREADY_EXISTS) {
*reject_reasons |= REJECT_ALREADY_EXISTS;
push: introduce REJECT_FETCH_FIRST and REJECT_NEEDS_FORCE When we push to update an existing ref, if: * the object at the tip of the remote is not a commit; or * the object we are pushing is not a commit, it won't be correct to suggest to fetch, integrate and push again, as the old and new objects will not "merge". We should explain that the push must be forced when there is a non-committish object is involved in such a case. If we do not have the current object at the tip of the remote, we do not even know that object, when fetched, is something that can be merged. In such a case, suggesting to pull first just like non-fast-forward case may not be technically correct, but in practice, most such failures are seen when you try to push your work to a branch without knowing that somebody else already pushed to update the same branch since you forked, so "pull first" would work as a suggestion most of the time. And if the object at the tip is not a commit, "pull first" will fail, without making any permanent damage. As a side effect, it also makes the error message the user will get during the next "push" attempt easier to understand, now the user is aware that a non-commit object is involved. In these cases, the current code already rejects such a push on the client end, but we used the same error and advice messages as the ones used when rejecting a non-fast-forward push, i.e. pull from there and integrate before pushing again. Introduce new rejection reasons and reword the messages appropriately. [jc: with help by Peff on message details] Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-01-24 05:55:30 +08:00
} else if (ref->status == REF_STATUS_REJECT_FETCH_FIRST) {
*reject_reasons |= REJECT_FETCH_FIRST;
} else if (ref->status == REF_STATUS_REJECT_NEEDS_FORCE) {
*reject_reasons |= REJECT_NEEDS_FORCE;
} else if (ref->status == REF_STATUS_REJECT_REMOTE_UPDATED) {
*reject_reasons |= REJECT_REF_NEEDS_UPDATE;
push: Provide situational hints for non-fast-forward errors Pushing a non-fast-forward update to a remote repository will result in an error, but the hint text doesn't provide the correct resolution in every case. Give better resolution advice in three push scenarios: 1) If you push your current branch and it triggers a non-fast-forward error, you should merge remote changes with 'git pull' before pushing again. 2) If you push to a shared repository others push to, and your local tracking branches are not kept up to date, the 'matching refs' default will generate non-fast-forward errors on outdated branches. If this is your workflow, the 'matching refs' default is not for you. Consider setting the 'push.default' configuration variable to 'current' or 'upstream' to ensure only your current branch is pushed. 3) If you explicitly specify a ref that is not your current branch or push matching branches with ':', you will generate a non-fast-forward error if any pushed branch tip is out of date. You should checkout the offending branch and merge remote changes before pushing again. Teach transport.c to recognize these scenarios and configure push.c to hint for them. If 'git push's default behavior changes or we discover more scenarios, extension is easy. Standardize on the advice API and add three new advice variables, 'pushNonFFCurrent', 'pushNonFFDefault', and 'pushNonFFMatching'. Setting any of these to 'false' will disable their affiliated advice. Setting 'pushNonFastForward' to false will disable all three, thus preserving the config option for users who already set it, but guaranteeing new users won't disable push advice accidentally. Based-on-patch-by: Junio C Hamano <gitster@pobox.com> Signed-off-by: Christopher Tiwald <christiwald@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-03-20 12:31:33 +08:00
}
}
free(head);
}
static int git_transport_push(struct transport *transport, struct ref *remote_refs, int flags)
{
struct git_transport_data *data = transport->data;
struct send_pack_args args;
int ret = 0;
if (transport_color_config() < 0)
return -1;
if (!data->got_remote_heads)
get_refs_via_connect(transport, 1, NULL);
memset(&args, 0, sizeof(args));
args.send_mirror = !!(flags & TRANSPORT_PUSH_MIRROR);
args.force_update = !!(flags & TRANSPORT_PUSH_FORCE);
args.use_thin_pack = data->options.thin;
args.verbose = (transport->verbose > 0);
args.quiet = (transport->verbose < 0);
args.progress = transport->progress;
args.dry_run = !!(flags & TRANSPORT_PUSH_DRY_RUN);
args.porcelain = !!(flags & TRANSPORT_PUSH_PORCELAIN);
args.atomic = !!(flags & TRANSPORT_PUSH_ATOMIC);
args.push_options = transport->push_options;
args.url = transport->url;
if (flags & TRANSPORT_PUSH_CERT_ALWAYS)
args.push_cert = SEND_PACK_PUSH_CERT_ALWAYS;
else if (flags & TRANSPORT_PUSH_CERT_IF_ASKED)
args.push_cert = SEND_PACK_PUSH_CERT_IF_ASKED;
else
args.push_cert = SEND_PACK_PUSH_CERT_NEVER;
switch (data->version) {
case protocol_v2:
die(_("support for protocol v2 not implemented yet"));
break;
case protocol_v1:
case protocol_v0:
ret = send_pack(&args, data->fd, data->conn, remote_refs,
&data->extra_have);
break;
case protocol_unknown_version:
BUG("unknown protocol version");
}
close(data->fd[1]);
close(data->fd[0]);
/*
* Atomic push may abort the connection early and close the pipe,
* which may cause an error for `finish_connect()`. Ignore this error
* for atomic git-push.
*/
if (ret || args.atomic)
finish_connect(data->conn);
else
ret = finish_connect(data->conn);
data->conn = NULL;
data->got_remote_heads = 0;
return ret;
}
static int connect_git(struct transport *transport, const char *name,
const char *executable, int fd[2])
{
struct git_transport_data *data = transport->data;
data->conn = git_connect(data->fd, transport->url,
executable, 0);
fd[0] = data->fd[0];
fd[1] = data->fd[1];
return 0;
}
static int disconnect_git(struct transport *transport)
{
struct git_transport_data *data = transport->data;
if (data->conn) {
if (data->got_remote_heads && !transport->stateless_rpc)
packet_flush(data->fd[1]);
close(data->fd[0]);
fetch-pack: signal v2 server that we are done making requests When fetching with the v0 protocol over ssh (or a local upload-pack with pipes), the server closes the connection as soon as it is finished sending the pack. So even though the client may still be operating on the data via index-pack (e.g., resolving deltas, checking connectivity, etc), the server has released all resources. With the v2 protocol, however, the server considers the ssh session only as a transport, with individual requests coming over it. After sending the pack, it goes back to its main loop, waiting for another request to come from the client. As a result, the ssh session hangs around until the client process ends, which may be much later (because resolving deltas, etc, may consume a lot of CPU). This is bad for two reasons: - it's consuming resources on the server to leave open a connection that won't see any more use - if something bad happens to the ssh connection in the meantime (say, it gets killed by the network because it's idle, as happened in a real-world report), then ssh will exit non-zero, and we'll propagate the error up the stack. The server is correct here not to hang up after serving the pack. The v2 protocol's design is meant to allow multiple requests like this, and hanging up would be the wrong thing for a hypothetical client which was planning to make more requests (though in practice, the git.git client never would, and I doubt any other implementations would either). The right thing is instead for the client to signal to the server that it's not interested in making more requests. We can do that by closing the pipe descriptor we use to write to ssh. This will propagate to the server upload-pack as an EOF when it tries to read the next request (and then it will close its half, and the whole connection will go away). It's important to do this "half duplex" shutdown, because we have to do it _before_ we actually receive the pack. This is an artifact of the way fetch-pack and index-pack (or unpack-objects) interact. We hand the connection off to index-pack (really, a sideband demuxer which feeds it), and then wait until it returns. And it doesn't do that until it has resolved all of the deltas in the pack, even though it was done reading from the server long before. So just closing the connection fully after index-pack returns would be too late; we'd have held it open much longer than was necessary. And teaching index-pack to close the connection is awkward. It's not even seeing the whole conversation (the sideband demuxer is, but it doesn't actually know what's in the packets, or when the end comes). Note that this close() is happening deep within the transport code. It's possible that a caller would want to perform other operations over the same ssh transport after receiving the pack. But as of the current code, none of the callers do, and there haven't been discussions of any plans to change this. If we need to support that later, we can probably do so by passing down a flag for "you're the last request on the transport; it's OK to close" instead of the code just assuming that's true. The description above all discusses v2 ssh, so it's worth thinking about how this interacts with other protocols: - in v0 protocols, we could do the same half-duplex shutdown (it just goes into the v0 do_fetch_pack() instead). This does work, but since it doesn't have the same persistence problem in the first place, there's little reason to change it at this point. - local fetches against git-upload-pack on the same machine will behave the same as ssh (they are talking over two pipes, and see EOF on their input pipe) - fetches against git-daemon will run this same code, and close one of the descriptors. In practice, this won't do anything, since there our two descriptors are dups of each other, and not part of a half-duplex pair. The right thing would probably be to call shutdown(SHUT_WR) on it. I didn't bother with that here. It doesn't face the same error-code problem (since it's just a TCP connection), so it's really only an optimization problem. And git:// is not that widely used these days, and has less impact on server resources than an ssh termination. - v2 http doesn't suffer from this problem in the first place, as our pipes terminate at a local git-remote-https, which is passing data along as individual requests via curl. Probably curl is keeping the TCP/TLS connection open for more requests, and we might be able to tell it manually "hey, we are done making requests now". But I think that's much less important. It again doesn't suffer from the error-code problem, and HTTP keepalive is pretty well understood (importantly, the timeouts can be set low, because clients like curl know how to reconnect for subsequent requests if necessary). So it's probably not worth figuring out how to tell curl that we're done (though if we do, this patch is probably the first step anyway; fetch-pack closes the pipe back to remote-https, which would be the signal that it should tell curl we're done). The code is pretty straightforward. We close the pipe at the right moment, and set it to -1 to mark it as invalid. I modified the later cleanup code to avoid calling close(-1). That's not strictly necessary, since close(-1) is a noop, but hopefully makes things a bit more obvious to a reader. I suspect that trying to call more transport functions after the close() (e.g., calling transport_fetch_refs() again) would fail, as it's not smart enough to realize we need to re-open the ssh connection. But that's already true when v0 is in use. And no current callers want to do that (and again, the solution is probably a flag in the transport code to keep things open, which can be added later). There's no test here, as the situation it covers is inherently racy (the question is when upload-pack exits, compared to when index-pack finishes resolving deltas and exits). The rather gross shell snippet below does recreate the problematic situation; when run on a sufficiently-large repository (git.git works fine), it kills an "idle" upload-pack while the client is resolving deltas, leading to a failed clone. ( git clone --no-local --progress . foo.git 2>&1 echo >&2 "clone exit code=$?" ) | tr '\r' '\n' | while read line do case "$done,$line" in ,Resolving*) echo "hit resolving deltas; killing upload-pack" killall -9 git-upload-pack done=t ;; esac done Reported-by: Greg Pflaum <greg.pflaum@pnp-hcl.com> Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-05-20 00:11:05 +08:00
if (data->fd[1] >= 0)
close(data->fd[1]);
finish_connect(data->conn);
}
free(data);
return 0;
}
static struct transport_vtable taken_over_vtable = {
.get_refs_list = get_refs_via_connect,
.fetch_refs = fetch_refs_via_pack,
.push_refs = git_transport_push,
.disconnect = disconnect_git
};
void transport_take_over(struct transport *transport,
struct child_process *child)
{
struct git_transport_data *data;
if (!transport->smart_options)
BUG("taking over transport requires non-NULL "
"smart_options field.");
CALLOC_ARRAY(data, 1);
data->options = *transport->smart_options;
data->conn = child;
data->fd[0] = data->conn->out;
data->fd[1] = data->conn->in;
data->got_remote_heads = 0;
transport->data = data;
transport->vtable = &taken_over_vtable;
transport->smart_options = &(data->options);
fetch: work around "transport-take-over" hack A Git-aware "connect" transport allows the "transport_take_over" to redirect generic transport requests like fetch(), push_refs() and get_refs_list() to the native Git transport handling methods. The take-over process replaces transport->data with a fake data that these method implementations understand. While this hack works OK for a single request, it breaks when the transport needs to make more than one requests. transport->data that used to hold necessary information for the specific helper to work correctly is destroyed during the take-over process. One codepath that this matters is "git fetch" in auto-follow mode; when it does not get all the tags that ought to point at the history it got (which can be determined by looking at the peeled tags in the initial advertisement) from the primary transfer, it internally makes a second request to complete the fetch. Because "take-over" hack has already destroyed the data necessary to talk to the transport helper by the time this happens, the second request cannot make a request to the helper to make another connection to fetch these additional tags. Mark such a transport as "cannot_reuse", and use a separate transport to perform the backfill fetch in order to work around this breakage. Note that this problem does not manifest itself when running t5802, because our upload-pack gives you all the necessary auto-followed tags during the primary transfer. You would need to step through "git fetch" in a debugger, stop immediately after the primary transfer finishes and writes these auto-followed tags, remove the tag references and repack/prune the repository to convince the "find-non-local-tags" procedure that the primary transfer failed to give us all the necessary tags, and then let it continue, in order to trigger the bug in the secondary transfer this patch fixes. Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-08 06:47:18 +08:00
transport->cannot_reuse = 1;
}
static int is_file(const char *url)
{
struct stat buf;
if (stat(url, &buf))
return 0;
return S_ISREG(buf.st_mode);
}
static int external_specification_len(const char *url)
{
return strchr(url, ':') - url;
}
static const struct string_list *protocol_whitelist(void)
transport: add a protocol-whitelist environment variable If we are cloning an untrusted remote repository into a sandbox, we may also want to fetch remote submodules in order to get the complete view as intended by the other side. However, that opens us up to attacks where a malicious user gets us to clone something they would not otherwise have access to (this is not necessarily a problem by itself, but we may then act on the cloned contents in a way that exposes them to the attacker). Ideally such a setup would sandbox git entirely away from high-value items, but this is not always practical or easy to set up (e.g., OS network controls may block multiple protocols, and we would want to enable some but not others). We can help this case by providing a way to restrict particular protocols. We use a whitelist in the environment. This is more annoying to set up than a blacklist, but defaults to safety if the set of protocols git supports grows). If no whitelist is specified, we continue to default to allowing all protocols (this is an "unsafe" default, but since the minority of users will want this sandboxing effect, it is the only sensible one). A note on the tests: ideally these would all be in a single test file, but the git-daemon and httpd test infrastructure is an all-or-nothing proposition rather than a test-by-test prerequisite. By putting them all together, we would be unable to test the file-local code on machines without apache. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-09-17 01:12:52 +08:00
{
static int enabled = -1;
static struct string_list allowed = STRING_LIST_INIT_DUP;
transport: add a protocol-whitelist environment variable If we are cloning an untrusted remote repository into a sandbox, we may also want to fetch remote submodules in order to get the complete view as intended by the other side. However, that opens us up to attacks where a malicious user gets us to clone something they would not otherwise have access to (this is not necessarily a problem by itself, but we may then act on the cloned contents in a way that exposes them to the attacker). Ideally such a setup would sandbox git entirely away from high-value items, but this is not always practical or easy to set up (e.g., OS network controls may block multiple protocols, and we would want to enable some but not others). We can help this case by providing a way to restrict particular protocols. We use a whitelist in the environment. This is more annoying to set up than a blacklist, but defaults to safety if the set of protocols git supports grows). If no whitelist is specified, we continue to default to allowing all protocols (this is an "unsafe" default, but since the minority of users will want this sandboxing effect, it is the only sensible one). A note on the tests: ideally these would all be in a single test file, but the git-daemon and httpd test infrastructure is an all-or-nothing proposition rather than a test-by-test prerequisite. By putting them all together, we would be unable to test the file-local code on machines without apache. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-09-17 01:12:52 +08:00
if (enabled < 0) {
const char *v = getenv("GIT_ALLOW_PROTOCOL");
if (v) {
string_list_split(&allowed, v, ':', -1);
string_list_sort(&allowed);
enabled = 1;
} else {
enabled = 0;
}
}
return enabled ? &allowed : NULL;
}
transport: add a protocol-whitelist environment variable If we are cloning an untrusted remote repository into a sandbox, we may also want to fetch remote submodules in order to get the complete view as intended by the other side. However, that opens us up to attacks where a malicious user gets us to clone something they would not otherwise have access to (this is not necessarily a problem by itself, but we may then act on the cloned contents in a way that exposes them to the attacker). Ideally such a setup would sandbox git entirely away from high-value items, but this is not always practical or easy to set up (e.g., OS network controls may block multiple protocols, and we would want to enable some but not others). We can help this case by providing a way to restrict particular protocols. We use a whitelist in the environment. This is more annoying to set up than a blacklist, but defaults to safety if the set of protocols git supports grows). If no whitelist is specified, we continue to default to allowing all protocols (this is an "unsafe" default, but since the minority of users will want this sandboxing effect, it is the only sensible one). A note on the tests: ideally these would all be in a single test file, but the git-daemon and httpd test infrastructure is an all-or-nothing proposition rather than a test-by-test prerequisite. By putting them all together, we would be unable to test the file-local code on machines without apache. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-09-17 01:12:52 +08:00
transport: add protocol policy config option Previously the `GIT_ALLOW_PROTOCOL` environment variable was used to specify a whitelist of protocols to be used in clone/fetch/push commands. This patch introduces new configuration options for more fine-grained control for allowing/disallowing protocols. This also has the added benefit of allowing easier construction of a protocol whitelist on systems where setting an environment variable is non-trivial. Now users can specify a policy to be used for each type of protocol via the 'protocol.<name>.allow' config option. A default policy for all unconfigured protocols can be set with the 'protocol.allow' config option. If no user configured default is made git will allow known-safe protocols (http, https, git, ssh, file), disallow known-dangerous protocols (ext), and have a default policy of `user` for all other protocols. The supported policies are `always`, `never`, and `user`. The `user` policy can be used to configure a protocol to be usable when explicitly used by a user, while disallowing it for commands which run clone/fetch/push commands without direct user intervention (e.g. recursive initialization of submodules). Commands which can potentially clone/fetch/push from untrusted repositories without user intervention can export `GIT_PROTOCOL_FROM_USER` with a value of '0' to prevent protocols configured to the `user` policy from being used. Fix remote-ext tests to use the new config to allow the ext protocol to be tested. Based on a patch by Jeff King <peff@peff.net> Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-12-15 06:39:52 +08:00
enum protocol_allow_config {
PROTOCOL_ALLOW_NEVER = 0,
PROTOCOL_ALLOW_USER_ONLY,
PROTOCOL_ALLOW_ALWAYS
};
static enum protocol_allow_config parse_protocol_config(const char *key,
const char *value)
{
transport: add protocol policy config option Previously the `GIT_ALLOW_PROTOCOL` environment variable was used to specify a whitelist of protocols to be used in clone/fetch/push commands. This patch introduces new configuration options for more fine-grained control for allowing/disallowing protocols. This also has the added benefit of allowing easier construction of a protocol whitelist on systems where setting an environment variable is non-trivial. Now users can specify a policy to be used for each type of protocol via the 'protocol.<name>.allow' config option. A default policy for all unconfigured protocols can be set with the 'protocol.allow' config option. If no user configured default is made git will allow known-safe protocols (http, https, git, ssh, file), disallow known-dangerous protocols (ext), and have a default policy of `user` for all other protocols. The supported policies are `always`, `never`, and `user`. The `user` policy can be used to configure a protocol to be usable when explicitly used by a user, while disallowing it for commands which run clone/fetch/push commands without direct user intervention (e.g. recursive initialization of submodules). Commands which can potentially clone/fetch/push from untrusted repositories without user intervention can export `GIT_PROTOCOL_FROM_USER` with a value of '0' to prevent protocols configured to the `user` policy from being used. Fix remote-ext tests to use the new config to allow the ext protocol to be tested. Based on a patch by Jeff King <peff@peff.net> Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-12-15 06:39:52 +08:00
if (!strcasecmp(value, "always"))
return PROTOCOL_ALLOW_ALWAYS;
else if (!strcasecmp(value, "never"))
return PROTOCOL_ALLOW_NEVER;
else if (!strcasecmp(value, "user"))
return PROTOCOL_ALLOW_USER_ONLY;
die(_("unknown value for config '%s': %s"), key, value);
}
transport: add protocol policy config option Previously the `GIT_ALLOW_PROTOCOL` environment variable was used to specify a whitelist of protocols to be used in clone/fetch/push commands. This patch introduces new configuration options for more fine-grained control for allowing/disallowing protocols. This also has the added benefit of allowing easier construction of a protocol whitelist on systems where setting an environment variable is non-trivial. Now users can specify a policy to be used for each type of protocol via the 'protocol.<name>.allow' config option. A default policy for all unconfigured protocols can be set with the 'protocol.allow' config option. If no user configured default is made git will allow known-safe protocols (http, https, git, ssh, file), disallow known-dangerous protocols (ext), and have a default policy of `user` for all other protocols. The supported policies are `always`, `never`, and `user`. The `user` policy can be used to configure a protocol to be usable when explicitly used by a user, while disallowing it for commands which run clone/fetch/push commands without direct user intervention (e.g. recursive initialization of submodules). Commands which can potentially clone/fetch/push from untrusted repositories without user intervention can export `GIT_PROTOCOL_FROM_USER` with a value of '0' to prevent protocols configured to the `user` policy from being used. Fix remote-ext tests to use the new config to allow the ext protocol to be tested. Based on a patch by Jeff King <peff@peff.net> Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-12-15 06:39:52 +08:00
static enum protocol_allow_config get_protocol_config(const char *type)
{
transport: add protocol policy config option Previously the `GIT_ALLOW_PROTOCOL` environment variable was used to specify a whitelist of protocols to be used in clone/fetch/push commands. This patch introduces new configuration options for more fine-grained control for allowing/disallowing protocols. This also has the added benefit of allowing easier construction of a protocol whitelist on systems where setting an environment variable is non-trivial. Now users can specify a policy to be used for each type of protocol via the 'protocol.<name>.allow' config option. A default policy for all unconfigured protocols can be set with the 'protocol.allow' config option. If no user configured default is made git will allow known-safe protocols (http, https, git, ssh, file), disallow known-dangerous protocols (ext), and have a default policy of `user` for all other protocols. The supported policies are `always`, `never`, and `user`. The `user` policy can be used to configure a protocol to be usable when explicitly used by a user, while disallowing it for commands which run clone/fetch/push commands without direct user intervention (e.g. recursive initialization of submodules). Commands which can potentially clone/fetch/push from untrusted repositories without user intervention can export `GIT_PROTOCOL_FROM_USER` with a value of '0' to prevent protocols configured to the `user` policy from being used. Fix remote-ext tests to use the new config to allow the ext protocol to be tested. Based on a patch by Jeff King <peff@peff.net> Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-12-15 06:39:52 +08:00
char *key = xstrfmt("protocol.%s.allow", type);
char *value;
/* first check the per-protocol config */
if (!git_config_get_string(key, &value)) {
enum protocol_allow_config ret =
parse_protocol_config(key, value);
free(key);
free(value);
return ret;
}
free(key);
/* if defined, fallback to user-defined default for unknown protocols */
if (!git_config_get_string("protocol.allow", &value)) {
enum protocol_allow_config ret =
parse_protocol_config("protocol.allow", value);
free(value);
return ret;
}
/* fallback to built-in defaults */
/* known safe */
if (!strcmp(type, "http") ||
!strcmp(type, "https") ||
!strcmp(type, "git") ||
!strcmp(type, "ssh") ||
!strcmp(type, "file"))
return PROTOCOL_ALLOW_ALWAYS;
/* known scary; err on the side of caution */
if (!strcmp(type, "ext"))
return PROTOCOL_ALLOW_NEVER;
/* unknown; by default let them be used only directly by the user */
return PROTOCOL_ALLOW_USER_ONLY;
}
int is_transport_allowed(const char *type, int from_user)
{
transport: add protocol policy config option Previously the `GIT_ALLOW_PROTOCOL` environment variable was used to specify a whitelist of protocols to be used in clone/fetch/push commands. This patch introduces new configuration options for more fine-grained control for allowing/disallowing protocols. This also has the added benefit of allowing easier construction of a protocol whitelist on systems where setting an environment variable is non-trivial. Now users can specify a policy to be used for each type of protocol via the 'protocol.<name>.allow' config option. A default policy for all unconfigured protocols can be set with the 'protocol.allow' config option. If no user configured default is made git will allow known-safe protocols (http, https, git, ssh, file), disallow known-dangerous protocols (ext), and have a default policy of `user` for all other protocols. The supported policies are `always`, `never`, and `user`. The `user` policy can be used to configure a protocol to be usable when explicitly used by a user, while disallowing it for commands which run clone/fetch/push commands without direct user intervention (e.g. recursive initialization of submodules). Commands which can potentially clone/fetch/push from untrusted repositories without user intervention can export `GIT_PROTOCOL_FROM_USER` with a value of '0' to prevent protocols configured to the `user` policy from being used. Fix remote-ext tests to use the new config to allow the ext protocol to be tested. Based on a patch by Jeff King <peff@peff.net> Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-12-15 06:39:52 +08:00
const struct string_list *whitelist = protocol_whitelist();
if (whitelist)
return string_list_has_string(whitelist, type);
switch (get_protocol_config(type)) {
case PROTOCOL_ALLOW_ALWAYS:
return 1;
case PROTOCOL_ALLOW_NEVER:
return 0;
case PROTOCOL_ALLOW_USER_ONLY:
if (from_user < 0)
from_user = git_env_bool("GIT_PROTOCOL_FROM_USER", 1);
return from_user;
transport: add protocol policy config option Previously the `GIT_ALLOW_PROTOCOL` environment variable was used to specify a whitelist of protocols to be used in clone/fetch/push commands. This patch introduces new configuration options for more fine-grained control for allowing/disallowing protocols. This also has the added benefit of allowing easier construction of a protocol whitelist on systems where setting an environment variable is non-trivial. Now users can specify a policy to be used for each type of protocol via the 'protocol.<name>.allow' config option. A default policy for all unconfigured protocols can be set with the 'protocol.allow' config option. If no user configured default is made git will allow known-safe protocols (http, https, git, ssh, file), disallow known-dangerous protocols (ext), and have a default policy of `user` for all other protocols. The supported policies are `always`, `never`, and `user`. The `user` policy can be used to configure a protocol to be usable when explicitly used by a user, while disallowing it for commands which run clone/fetch/push commands without direct user intervention (e.g. recursive initialization of submodules). Commands which can potentially clone/fetch/push from untrusted repositories without user intervention can export `GIT_PROTOCOL_FROM_USER` with a value of '0' to prevent protocols configured to the `user` policy from being used. Fix remote-ext tests to use the new config to allow the ext protocol to be tested. Based on a patch by Jeff King <peff@peff.net> Signed-off-by: Brandon Williams <bmwill@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-12-15 06:39:52 +08:00
}
BUG("invalid protocol_allow_config type");
}
void transport_check_allowed(const char *type)
{
if (!is_transport_allowed(type, -1))
die(_("transport '%s' not allowed"), type);
transport: add a protocol-whitelist environment variable If we are cloning an untrusted remote repository into a sandbox, we may also want to fetch remote submodules in order to get the complete view as intended by the other side. However, that opens us up to attacks where a malicious user gets us to clone something they would not otherwise have access to (this is not necessarily a problem by itself, but we may then act on the cloned contents in a way that exposes them to the attacker). Ideally such a setup would sandbox git entirely away from high-value items, but this is not always practical or easy to set up (e.g., OS network controls may block multiple protocols, and we would want to enable some but not others). We can help this case by providing a way to restrict particular protocols. We use a whitelist in the environment. This is more annoying to set up than a blacklist, but defaults to safety if the set of protocols git supports grows). If no whitelist is specified, we continue to default to allowing all protocols (this is an "unsafe" default, but since the minority of users will want this sandboxing effect, it is the only sensible one). A note on the tests: ideally these would all be in a single test file, but the git-daemon and httpd test infrastructure is an all-or-nothing proposition rather than a test-by-test prerequisite. By putting them all together, we would be unable to test the file-local code on machines without apache. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-09-17 01:12:52 +08:00
}
static struct transport_vtable bundle_vtable = {
.get_refs_list = get_refs_from_bundle,
.fetch_refs = fetch_refs_from_bundle,
.disconnect = close_bundle
};
static struct transport_vtable builtin_smart_vtable = {
.get_refs_list = get_refs_via_connect,
.fetch_refs = fetch_refs_via_pack,
.push_refs = git_transport_push,
.connect = connect_git,
.disconnect = disconnect_git
};
struct transport *transport_get(struct remote *remote, const char *url)
{
const char *helper;
struct transport *ret = xcalloc(1, sizeof(*ret));
ret->progress = isatty(2);
string_list_init_dup(&ret->pack_lockfiles);
if (!remote)
BUG("No remote provided to transport_get()");
ret->got_remote_refs = 0;
ret->remote = remote;
helper = remote->foreign_vcs;
if (!url && remote->url)
url = remote->url[0];
ret->url = url;
/* maybe it is a foreign URL? */
if (url) {
const char *p = url;
while (is_urlschemechar(p == url, *p))
p++;
if (starts_with(p, "::"))
helper = xstrndup(url, p - url);
}
if (helper) {
transport_helper_init(ret, helper);
} else if (starts_with(url, "rsync:")) {
die(_("git-over-rsync is no longer supported"));
} else if (url_is_local_not_ssh(url) && is_file(url) && is_bundle(url, 1)) {
struct bundle_transport_data *data = xcalloc(1, sizeof(*data));
bundle_header_init(&data->header);
transport: add a protocol-whitelist environment variable If we are cloning an untrusted remote repository into a sandbox, we may also want to fetch remote submodules in order to get the complete view as intended by the other side. However, that opens us up to attacks where a malicious user gets us to clone something they would not otherwise have access to (this is not necessarily a problem by itself, but we may then act on the cloned contents in a way that exposes them to the attacker). Ideally such a setup would sandbox git entirely away from high-value items, but this is not always practical or easy to set up (e.g., OS network controls may block multiple protocols, and we would want to enable some but not others). We can help this case by providing a way to restrict particular protocols. We use a whitelist in the environment. This is more annoying to set up than a blacklist, but defaults to safety if the set of protocols git supports grows). If no whitelist is specified, we continue to default to allowing all protocols (this is an "unsafe" default, but since the minority of users will want this sandboxing effect, it is the only sensible one). A note on the tests: ideally these would all be in a single test file, but the git-daemon and httpd test infrastructure is an all-or-nothing proposition rather than a test-by-test prerequisite. By putting them all together, we would be unable to test the file-local code on machines without apache. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-09-17 01:12:52 +08:00
transport_check_allowed("file");
ret->data = data;
ret->vtable = &bundle_vtable;
ret->smart_options = NULL;
} else if (!is_url(url)
|| starts_with(url, "file://")
|| starts_with(url, "git://")
|| starts_with(url, "ssh://")
|| starts_with(url, "git+ssh://") /* deprecated - do not use */
|| starts_with(url, "ssh+git://") /* deprecated - do not use */
) {
transport: add a protocol-whitelist environment variable If we are cloning an untrusted remote repository into a sandbox, we may also want to fetch remote submodules in order to get the complete view as intended by the other side. However, that opens us up to attacks where a malicious user gets us to clone something they would not otherwise have access to (this is not necessarily a problem by itself, but we may then act on the cloned contents in a way that exposes them to the attacker). Ideally such a setup would sandbox git entirely away from high-value items, but this is not always practical or easy to set up (e.g., OS network controls may block multiple protocols, and we would want to enable some but not others). We can help this case by providing a way to restrict particular protocols. We use a whitelist in the environment. This is more annoying to set up than a blacklist, but defaults to safety if the set of protocols git supports grows). If no whitelist is specified, we continue to default to allowing all protocols (this is an "unsafe" default, but since the minority of users will want this sandboxing effect, it is the only sensible one). A note on the tests: ideally these would all be in a single test file, but the git-daemon and httpd test infrastructure is an all-or-nothing proposition rather than a test-by-test prerequisite. By putting them all together, we would be unable to test the file-local code on machines without apache. Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-09-17 01:12:52 +08:00
/*
* These are builtin smart transports; "allowed" transports
* will be checked individually in git_connect.
*/
struct git_transport_data *data = xcalloc(1, sizeof(*data));
ret->data = data;
ret->vtable = &builtin_smart_vtable;
ret->smart_options = &(data->options);
data->conn = NULL;
data->got_remote_heads = 0;
} else {
/* Unknown protocol in URL. Pass to external handler. */
int len = external_specification_len(url);
char *handler = xmemdupz(url, len);
transport_helper_init(ret, handler);
}
if (ret->smart_options) {
ret->smart_options->thin = 1;
ret->smart_options->uploadpack = "git-upload-pack";
if (remote->uploadpack)
ret->smart_options->uploadpack = remote->uploadpack;
ret->smart_options->receivepack = "git-receive-pack";
if (remote->receivepack)
ret->smart_options->receivepack = remote->receivepack;
}
ret->hash_algo = &hash_algos[GIT_HASH_SHA1];
return ret;
}
const struct git_hash_algo *transport_get_hash_algo(struct transport *transport)
{
return transport->hash_algo;
}
int transport_set_option(struct transport *transport,
const char *name, const char *value)
{
int git_reports = 1, protocol_reports = 1;
if (transport->smart_options)
git_reports = set_git_option(transport->smart_options,
name, value);
if (transport->vtable->set_option)
protocol_reports = transport->vtable->set_option(transport,
name, value);
/* If either report is 0, report 0 (success). */
if (!git_reports || !protocol_reports)
return 0;
/* If either reports -1 (invalid value), report -1. */
if ((git_reports == -1) || (protocol_reports == -1))
return -1;
/* Otherwise if both report unknown, report unknown. */
return 1;
}
void transport_set_verbosity(struct transport *transport, int verbosity,
int force_progress)
{
make "git push -v" actually verbose Providing a single "-v" to "git push" currently does nothing. Giving two flags ("git push -v -v") turns on the first level of verbosity. This is caused by a regression introduced in 8afd8dc (push: support multiple levels of verbosity, 2010-02-24). Before the series containing 8afd8dc, the verbosity handling for fetching and pushing was completely separate. Commit bde873c refactored the verbosity handling out of the fetch side, and then 8afd8dc converted push to use the refactored code. However, the fetch and push sides numbered and passed along their verbosity levels differently. For both, a verbosity level of "-1" meant "quiet", and "0" meant "default output". But from there they differed. For fetch, a verbosity level of "1" indicated to the "fetch" program that it should make the status table slightly more verbose, showing up-to-date entries. A verbosity level of "2" meant that we should pass a verbose flag to the transport; in the case of fetch-pack, this displays protocol debugging information. As a result, the refactored code in bde873c checks for "verbosity >= 2", and only then passes it on to the transport. From the transport code's perspective, a verbosity of 0 or 1 both meant "0". Push, on the other hand, does not show its own status table; that is always handled by the transport layer or below (originally send-pack itself, but these days it is done by the transport code). So a verbosity level of 1 meant that we should pass the verbose flag to send-pack, so that it knows we want a verbose status table. However, once 8afd8dc switched it to the refactored fetch code, a verbosity level of 1 was now being ignored. Thus, you needed to artificially bump the verbosity to 2 (via "-v -v") to have any effect. We can fix this by letting the transport code know about the true verbosity level (i.e., let it distinguish level 0 or 1). We then have to also make an adjustment to any transport methods that assumed "verbose > 0" meant they could spew lots of debugging information. Before, they could only get "0" or "2", but now they will also receive "1". They need to adjust their condition for turning on such spew from "verbose > 0" to "verbose > 1". Signed-off-by: Jeff King <peff@peff.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-12-17 17:37:15 +08:00
if (verbosity >= 1)
transport->verbose = verbosity <= 3 ? verbosity : 3;
if (verbosity < 0)
transport->verbose = -1;
/**
* Rules used to determine whether to report progress (processing aborts
* when a rule is satisfied):
*
* . Report progress, if force_progress is 1 (ie. --progress).
* . Don't report progress, if force_progress is 0 (ie. --no-progress).
* . Don't report progress, if verbosity < 0 (ie. -q/--quiet ).
* . Report progress if isatty(2) is 1.
**/
if (force_progress >= 0)
transport->progress = !!force_progress;
else
transport->progress = verbosity >= 0 && isatty(2);
}
static void die_with_unpushed_submodules(struct string_list *needs_pushing)
{
int i;
fprintf(stderr, _("The following submodule paths contain changes that can\n"
"not be found on any remote:\n"));
for (i = 0; i < needs_pushing->nr; i++)
fprintf(stderr, " %s\n", needs_pushing->items[i].string);
fprintf(stderr, _("\nPlease try\n\n"
" git push --recurse-submodules=on-demand\n\n"
"or cd to the path and use\n\n"
" git push\n\n"
"to push them to a remote.\n\n"));
string_list_clear(needs_pushing, 0);
die(_("Aborting."));
}
static int run_pre_push_hook(struct transport *transport,
struct ref *remote_refs)
{
int ret = 0, x;
struct ref *r;
struct child_process proc = CHILD_PROCESS_INIT;
struct strbuf buf;
const char *hook_path = find_hook("pre-push");
if (!hook_path)
return 0;
strvec_push(&proc.args, hook_path);
strvec_push(&proc.args, transport->remote->name);
strvec_push(&proc.args, transport->url);
proc.in = -1;
proc.trace2_hook_name = "pre-push";
if (start_command(&proc)) {
finish_command(&proc);
return -1;
}
sigchain_push(SIGPIPE, SIG_IGN);
strbuf_init(&buf, 256);
for (r = remote_refs; r; r = r->next) {
if (!r->peer_ref) continue;
if (r->status == REF_STATUS_REJECT_NONFASTFORWARD) continue;
if (r->status == REF_STATUS_REJECT_STALE) continue;
if (r->status == REF_STATUS_REJECT_REMOTE_UPDATED) continue;
if (r->status == REF_STATUS_UPTODATE) continue;
strbuf_reset(&buf);
strbuf_addf( &buf, "%s %s %s %s\n",
r->peer_ref->name, oid_to_hex(&r->new_oid),
r->name, oid_to_hex(&r->old_oid));
if (write_in_full(proc.in, buf.buf, buf.len) < 0) {
/* We do not mind if a hook does not read all refs. */
if (errno != EPIPE)
ret = -1;
break;
}
}
strbuf_release(&buf);
x = close(proc.in);
if (!ret)
ret = x;
sigchain_pop(SIGPIPE);
x = finish_command(&proc);
if (!ret)
ret = x;
return ret;
}
int transport_push(struct repository *r,
struct transport *transport,
struct refspec *rs, int flags,
unsigned int *reject_reasons)
{
struct ref *remote_refs;
struct ref *local_refs;
int match_flags = MATCH_REFS_NONE;
int verbose = (transport->verbose > 0);
int quiet = (transport->verbose < 0);
int porcelain = flags & TRANSPORT_PUSH_PORCELAIN;
int pretend = flags & TRANSPORT_PUSH_DRY_RUN;
int push_ret, ret, err;
struct transport_ls_refs_options transport_options =
TRANSPORT_LS_REFS_OPTIONS_INIT;
*reject_reasons = 0;
if (transport_color_config() < 0)
return -1;
if (!transport->vtable->push_refs)
return 1;
local_refs = get_local_heads();
if (check_push_refs(local_refs, rs) < 0)
return -1;
refspec_ref_prefixes(rs, &transport_options.ref_prefixes);
trace2_region_enter("transport_push", "get_refs_list", r);
remote_refs = transport->vtable->get_refs_list(transport, 1,
&transport_options);
trace2_region_leave("transport_push", "get_refs_list", r);
transport_ls_refs_options_release(&transport_options);
if (flags & TRANSPORT_PUSH_ALL)
match_flags |= MATCH_REFS_ALL;
if (flags & TRANSPORT_PUSH_MIRROR)
match_flags |= MATCH_REFS_MIRROR;
if (flags & TRANSPORT_PUSH_PRUNE)
match_flags |= MATCH_REFS_PRUNE;
if (flags & TRANSPORT_PUSH_FOLLOW_TAGS)
match_flags |= MATCH_REFS_FOLLOW_TAGS;
if (match_push_refs(local_refs, &remote_refs, rs, match_flags))
return -1;
if (transport->smart_options &&
transport->smart_options->cas &&
!is_empty_cas(transport->smart_options->cas))
apply_push_cas(transport->smart_options->cas,
transport->remote, remote_refs);
set_ref_status_for_push(remote_refs,
flags & TRANSPORT_PUSH_MIRROR,
flags & TRANSPORT_PUSH_FORCE);
if (!(flags & TRANSPORT_PUSH_NO_HOOK))
if (run_pre_push_hook(transport, remote_refs))
return -1;
if ((flags & (TRANSPORT_RECURSE_SUBMODULES_ON_DEMAND |
TRANSPORT_RECURSE_SUBMODULES_ONLY)) &&
!is_bare_repository()) {
struct ref *ref = remote_refs;
struct oid_array commits = OID_ARRAY_INIT;
trace2_region_enter("transport_push", "push_submodules", r);
for (; ref; ref = ref->next)
if (!is_null_oid(&ref->new_oid))
oid_array_append(&commits,
&ref->new_oid);
if (!push_unpushed_submodules(r,
&commits,
transport->remote,
rs,
transport->push_options,
pretend)) {
oid_array_clear(&commits);
trace2_region_leave("transport_push", "push_submodules", r);
die(_("failed to push all needed submodules"));
}
oid_array_clear(&commits);
trace2_region_leave("transport_push", "push_submodules", r);
}
if (((flags & TRANSPORT_RECURSE_SUBMODULES_CHECK) ||
((flags & (TRANSPORT_RECURSE_SUBMODULES_ON_DEMAND |
TRANSPORT_RECURSE_SUBMODULES_ONLY)) &&
!pretend)) && !is_bare_repository()) {
struct ref *ref = remote_refs;
struct string_list needs_pushing = STRING_LIST_INIT_DUP;
struct oid_array commits = OID_ARRAY_INIT;
trace2_region_enter("transport_push", "check_submodules", r);
for (; ref; ref = ref->next)
if (!is_null_oid(&ref->new_oid))
oid_array_append(&commits,
&ref->new_oid);
if (find_unpushed_submodules(r,
&commits,
transport->remote->name,
&needs_pushing)) {
oid_array_clear(&commits);
trace2_region_leave("transport_push", "check_submodules", r);
die_with_unpushed_submodules(&needs_pushing);
}
string_list_clear(&needs_pushing, 0);
oid_array_clear(&commits);
trace2_region_leave("transport_push", "check_submodules", r);
}
if (!(flags & TRANSPORT_RECURSE_SUBMODULES_ONLY)) {
trace2_region_enter("transport_push", "push_refs", r);
push_ret = transport->vtable->push_refs(transport, remote_refs, flags);
trace2_region_leave("transport_push", "push_refs", r);
} else
push_ret = 0;
err = push_had_errors(remote_refs);
ret = push_ret | err;
if (!quiet || err)
transport_print_push_status(transport->url, remote_refs,
verbose | porcelain, porcelain,
reject_reasons);
if (flags & TRANSPORT_PUSH_SET_UPSTREAM)
set_upstreams(transport, remote_refs, pretend);
if (!(flags & (TRANSPORT_PUSH_DRY_RUN |
TRANSPORT_RECURSE_SUBMODULES_ONLY))) {
struct ref *ref;
for (ref = remote_refs; ref; ref = ref->next)
transport_update_tracking_ref(transport->remote, ref, verbose);
}
if (porcelain && !push_ret)
puts("Done");
else if (!quiet && !ret && !transport_refs_pushed(remote_refs))
fprintf(stderr, "Everything up-to-date\n");
return ret;
}
const struct ref *transport_get_remote_refs(struct transport *transport,
struct transport_ls_refs_options *transport_options)
{
if (!transport->got_remote_refs) {
transport->remote_refs =
transport->vtable->get_refs_list(transport, 0,
transport_options);
transport->got_remote_refs = 1;
}
return transport->remote_refs;
}
void transport_ls_refs_options_release(struct transport_ls_refs_options *opts)
{
strvec_clear(&opts->ref_prefixes);
free((char *)opts->unborn_head_target);
}
fetch-pack: unify ref in and out param When a user fetches: - at least one up-to-date ref and at least one non-up-to-date ref, - using HTTP with protocol v0 (or something else that uses the fetch command of a remote helper) some refs might not be updated after the fetch. This bug was introduced in commit 989b8c4452 ("fetch-pack: put shallow info in output parameter", 2018-06-28) which allowed transports to report the refs that they have fetched in a new out-parameter "fetched_refs". If they do so, transport_fetch_refs() makes this information available to its caller. Users of "fetched_refs" rely on the following 3 properties: (1) it is the complete list of refs that was passed to transport_fetch_refs(), (2) it has shallow information (REF_STATUS_REJECT_SHALLOW set if relevant), and (3) it has updated OIDs if ref-in-want was used (introduced after 989b8c4452). In an effort to satisfy (1), whenever transport_fetch_refs() filters the refs sent to the transport, it re-adds the filtered refs to whatever the transport supplies before returning it to the user. However, the implementation in 989b8c4452 unconditionally re-adds the filtered refs without checking if the transport refrained from reporting anything in "fetched_refs" (which it is allowed to do), resulting in an incomplete list, no longer satisfying (1). An earlier effort to resolve this [1] solved the issue by readding the filtered refs only if the transport did not refrain from reporting in "fetched_refs", but after further discussion, it seems that the better solution is to revert the API change that introduced "fetched_refs". This API change was first suggested as part of a ref-in-want implementation that allowed for ref patterns and, thus, there could be drastic differences between the input refs and the refs actually fetched [2]; we eventually decided to only allow exact ref names, but this API change remained even though its necessity was decreased. Therefore, revert this API change by reverting commit 989b8c4452, and make receive_wanted_refs() update the OIDs in the sought array (like how update_shallow() updates shallow information in the sought array) instead. A test is also included to show that the user-visible bug discussed at the beginning of this commit message no longer exists. [1] https://public-inbox.org/git/20180801171806.GA122458@google.com/ [2] https://public-inbox.org/git/86a128c5fb710a41791e7183207c4d64889f9307.1485381677.git.jonathantanmy@google.com/ Signed-off-by: Jonathan Tan <jonathantanmy@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-08-02 04:13:20 +08:00
int transport_fetch_refs(struct transport *transport, struct ref *refs)
{
int rc;
int nr_heads = 0, nr_alloc = 0, nr_refs = 0;
struct ref **heads = NULL;
struct ref *rm;
for (rm = refs; rm; rm = rm->next) {
nr_refs++;
if (rm->peer_ref &&
!is_null_oid(&rm->old_oid) &&
oideq(&rm->peer_ref->old_oid, &rm->old_oid))
continue;
ALLOC_GROW(heads, nr_heads + 1, nr_alloc);
heads[nr_heads++] = rm;
}
if (!nr_heads) {
/*
* When deepening of a shallow repository is requested,
* then local and remote refs are likely to still be equal.
* Just feed them all to the fetch method in that case.
* This condition shouldn't be met in a non-deepening fetch
* (see builtin/fetch.c:quickfetch()).
*/
ALLOC_ARRAY(heads, nr_refs);
for (rm = refs; rm; rm = rm->next)
heads[nr_heads++] = rm;
}
rc = transport->vtable->fetch_refs(transport, nr_heads, heads);
free(heads);
return rc;
}
fetch: fix deadlock when cleaning up lockfiles in async signals When fetching packfiles, we write a bunch of lockfiles for the packfiles we're writing into the repository. In order to not leave behind any cruft in case we exit or receive a signal, we register both an exit handler as well as signal handlers for common signals like SIGINT. These handlers will then unlink the locks and free the data structure tracking them. We have observed a deadlock in this logic though: (gdb) bt #0 __lll_lock_wait_private () at ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:95 #1 0x00007f4932bea2cd in _int_free (av=0x7f4932f2eb20 <main_arena>, p=0x3e3e4200, have_lock=0) at malloc.c:3969 #2 0x00007f4932bee58c in __GI___libc_free (mem=<optimized out>) at malloc.c:2975 #3 0x0000000000662ab1 in string_list_clear () #4 0x000000000044f5bc in unlock_pack_on_signal () #5 <signal handler called> #6 _int_free (av=0x7f4932f2eb20 <main_arena>, p=<optimized out>, have_lock=0) at malloc.c:4024 #7 0x00007f4932bee58c in __GI___libc_free (mem=<optimized out>) at malloc.c:2975 #8 0x000000000065afd5 in strbuf_release () #9 0x000000000066ddb9 in delete_tempfile () #10 0x0000000000610d0b in files_transaction_cleanup.isra () #11 0x0000000000611718 in files_transaction_abort () #12 0x000000000060d2ef in ref_transaction_abort () #13 0x000000000060d441 in ref_transaction_prepare () #14 0x000000000060e0b5 in ref_transaction_commit () #15 0x00000000004511c2 in fetch_and_consume_refs () #16 0x000000000045279a in cmd_fetch () #17 0x0000000000407c48 in handle_builtin () #18 0x0000000000408df2 in cmd_main () #19 0x00000000004078b5 in main () The process was killed with a signal, which caused the signal handler to kick in and try free the data structures after we have unlinked the locks. It then deadlocks while calling free(3P). The root cause of this is that it is not allowed to call certain functions in async-signal handlers, as specified by signal-safety(7). Next to most I/O functions, this list of disallowed functions also includes memory-handling functions like malloc(3P) and free(3P) because they may not be reentrant. As a result, if we execute such functions in the signal handler, then they may operate on inconistent state and fail in unexpected ways. Fix this bug by not calling non-async-signal-safe functions when running in the signal handler. We're about to re-raise the signal anyway and will thus exit, so it's not much of a problem to keep the string list of lockfiles untouched. Note that it's fine though to call unlink(2), so we'll still clean up the lockfiles correctly. Signed-off-by: Patrick Steinhardt <ps@pks.im> Reviewed-by: brian m. carlson <sandals@crustytoothpaste.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-01-07 18:55:47 +08:00
void transport_unlock_pack(struct transport *transport, unsigned int flags)
{
fetch: fix deadlock when cleaning up lockfiles in async signals When fetching packfiles, we write a bunch of lockfiles for the packfiles we're writing into the repository. In order to not leave behind any cruft in case we exit or receive a signal, we register both an exit handler as well as signal handlers for common signals like SIGINT. These handlers will then unlink the locks and free the data structure tracking them. We have observed a deadlock in this logic though: (gdb) bt #0 __lll_lock_wait_private () at ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:95 #1 0x00007f4932bea2cd in _int_free (av=0x7f4932f2eb20 <main_arena>, p=0x3e3e4200, have_lock=0) at malloc.c:3969 #2 0x00007f4932bee58c in __GI___libc_free (mem=<optimized out>) at malloc.c:2975 #3 0x0000000000662ab1 in string_list_clear () #4 0x000000000044f5bc in unlock_pack_on_signal () #5 <signal handler called> #6 _int_free (av=0x7f4932f2eb20 <main_arena>, p=<optimized out>, have_lock=0) at malloc.c:4024 #7 0x00007f4932bee58c in __GI___libc_free (mem=<optimized out>) at malloc.c:2975 #8 0x000000000065afd5 in strbuf_release () #9 0x000000000066ddb9 in delete_tempfile () #10 0x0000000000610d0b in files_transaction_cleanup.isra () #11 0x0000000000611718 in files_transaction_abort () #12 0x000000000060d2ef in ref_transaction_abort () #13 0x000000000060d441 in ref_transaction_prepare () #14 0x000000000060e0b5 in ref_transaction_commit () #15 0x00000000004511c2 in fetch_and_consume_refs () #16 0x000000000045279a in cmd_fetch () #17 0x0000000000407c48 in handle_builtin () #18 0x0000000000408df2 in cmd_main () #19 0x00000000004078b5 in main () The process was killed with a signal, which caused the signal handler to kick in and try free the data structures after we have unlinked the locks. It then deadlocks while calling free(3P). The root cause of this is that it is not allowed to call certain functions in async-signal handlers, as specified by signal-safety(7). Next to most I/O functions, this list of disallowed functions also includes memory-handling functions like malloc(3P) and free(3P) because they may not be reentrant. As a result, if we execute such functions in the signal handler, then they may operate on inconistent state and fail in unexpected ways. Fix this bug by not calling non-async-signal-safe functions when running in the signal handler. We're about to re-raise the signal anyway and will thus exit, so it's not much of a problem to keep the string list of lockfiles untouched. Note that it's fine though to call unlink(2), so we'll still clean up the lockfiles correctly. Signed-off-by: Patrick Steinhardt <ps@pks.im> Reviewed-by: brian m. carlson <sandals@crustytoothpaste.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-01-07 18:55:47 +08:00
int in_signal_handler = !!(flags & TRANSPORT_UNLOCK_PACK_IN_SIGNAL_HANDLER);
int i;
for (i = 0; i < transport->pack_lockfiles.nr; i++)
fetch: fix deadlock when cleaning up lockfiles in async signals When fetching packfiles, we write a bunch of lockfiles for the packfiles we're writing into the repository. In order to not leave behind any cruft in case we exit or receive a signal, we register both an exit handler as well as signal handlers for common signals like SIGINT. These handlers will then unlink the locks and free the data structure tracking them. We have observed a deadlock in this logic though: (gdb) bt #0 __lll_lock_wait_private () at ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:95 #1 0x00007f4932bea2cd in _int_free (av=0x7f4932f2eb20 <main_arena>, p=0x3e3e4200, have_lock=0) at malloc.c:3969 #2 0x00007f4932bee58c in __GI___libc_free (mem=<optimized out>) at malloc.c:2975 #3 0x0000000000662ab1 in string_list_clear () #4 0x000000000044f5bc in unlock_pack_on_signal () #5 <signal handler called> #6 _int_free (av=0x7f4932f2eb20 <main_arena>, p=<optimized out>, have_lock=0) at malloc.c:4024 #7 0x00007f4932bee58c in __GI___libc_free (mem=<optimized out>) at malloc.c:2975 #8 0x000000000065afd5 in strbuf_release () #9 0x000000000066ddb9 in delete_tempfile () #10 0x0000000000610d0b in files_transaction_cleanup.isra () #11 0x0000000000611718 in files_transaction_abort () #12 0x000000000060d2ef in ref_transaction_abort () #13 0x000000000060d441 in ref_transaction_prepare () #14 0x000000000060e0b5 in ref_transaction_commit () #15 0x00000000004511c2 in fetch_and_consume_refs () #16 0x000000000045279a in cmd_fetch () #17 0x0000000000407c48 in handle_builtin () #18 0x0000000000408df2 in cmd_main () #19 0x00000000004078b5 in main () The process was killed with a signal, which caused the signal handler to kick in and try free the data structures after we have unlinked the locks. It then deadlocks while calling free(3P). The root cause of this is that it is not allowed to call certain functions in async-signal handlers, as specified by signal-safety(7). Next to most I/O functions, this list of disallowed functions also includes memory-handling functions like malloc(3P) and free(3P) because they may not be reentrant. As a result, if we execute such functions in the signal handler, then they may operate on inconistent state and fail in unexpected ways. Fix this bug by not calling non-async-signal-safe functions when running in the signal handler. We're about to re-raise the signal anyway and will thus exit, so it's not much of a problem to keep the string list of lockfiles untouched. Note that it's fine though to call unlink(2), so we'll still clean up the lockfiles correctly. Signed-off-by: Patrick Steinhardt <ps@pks.im> Reviewed-by: brian m. carlson <sandals@crustytoothpaste.net> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-01-07 18:55:47 +08:00
if (in_signal_handler)
unlink(transport->pack_lockfiles.items[i].string);
else
unlink_or_warn(transport->pack_lockfiles.items[i].string);
if (!in_signal_handler)
string_list_clear(&transport->pack_lockfiles, 0);
}
int transport_connect(struct transport *transport, const char *name,
const char *exec, int fd[2])
{
if (transport->vtable->connect)
return transport->vtable->connect(transport, name, exec, fd);
else
die(_("operation not supported by protocol"));
}
int transport_disconnect(struct transport *transport)
{
int ret = 0;
if (transport->vtable->disconnect)
ret = transport->vtable->disconnect(transport);
transport: also free remote_refs in transport_disconnect() transport_get_remote_refs() can populate the transport struct's remote_refs. transport_disconnect() is already responsible for most of transport's cleanup - therefore we also take care of freeing remote_refs there. There are 2 locations where transport_disconnect() is called before we're done using the returned remote_refs. This patch changes those callsites to only call transport_disconnect() after the returned refs are no longer being used - which is necessary to safely be able to free remote_refs during transport_disconnect(). This commit fixes the following leak which was found while running t0000, but is expected to also fix the same pattern of leak in all locations that use transport_get_remote_refs(): Direct leak of 165 byte(s) in 1 object(s) allocated from: #0 0x49a6b2 in calloc /home/abuild/rpmbuild/BUILD/llvm-11.0.0.src/build/../projects/compiler-rt/lib/asan/asan_malloc_linux.cpp:154:3 #1 0x9a72f2 in xcalloc /home/ahunt/oss-fuzz/git/wrapper.c:140:8 #2 0x8ce203 in alloc_ref_with_prefix /home/ahunt/oss-fuzz/git/remote.c:867:20 #3 0x8ce1a2 in alloc_ref /home/ahunt/oss-fuzz/git/remote.c:875:9 #4 0x72f63e in process_ref_v2 /home/ahunt/oss-fuzz/git/connect.c:426:8 #5 0x72f21a in get_remote_refs /home/ahunt/oss-fuzz/git/connect.c:525:8 #6 0x979ab7 in handshake /home/ahunt/oss-fuzz/git/transport.c:305:4 #7 0x97872d in get_refs_via_connect /home/ahunt/oss-fuzz/git/transport.c:339:9 #8 0x9774b5 in transport_get_remote_refs /home/ahunt/oss-fuzz/git/transport.c:1388:4 #9 0x51cf80 in cmd_clone /home/ahunt/oss-fuzz/git/builtin/clone.c:1271:9 #10 0x4cd60d in run_builtin /home/ahunt/oss-fuzz/git/git.c:453:11 #11 0x4cb2da in handle_builtin /home/ahunt/oss-fuzz/git/git.c:704:3 #12 0x4ccc37 in run_argv /home/ahunt/oss-fuzz/git/git.c:771:4 #13 0x4cac29 in cmd_main /home/ahunt/oss-fuzz/git/git.c:902:19 #14 0x69c45e in main /home/ahunt/oss-fuzz/git/common-main.c:52:11 #15 0x7f6a459d5349 in __libc_start_main (/lib64/libc.so.6+0x24349) Signed-off-by: Andrzej Hunt <ajrhunt@google.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-03-22 00:58:37 +08:00
if (transport->got_remote_refs)
free_refs((void *)transport->remote_refs);
free(transport);
return ret;
}
/*
* Strip username (and password) from a URL and return
* it in a newly allocated string.
*/
char *transport_anonymize_url(const char *url)
{
char *scheme_prefix, *anon_part;
size_t anon_len, prefix_len = 0;
anon_part = strchr(url, '@');
if (url_is_local_not_ssh(url) || !anon_part)
goto literal_copy;
anon_len = strlen(++anon_part);
scheme_prefix = strstr(url, "://");
if (!scheme_prefix) {
if (!strchr(anon_part, ':'))
/* cannot be "me@there:/path/name" */
goto literal_copy;
} else {
const char *cp;
/* make sure scheme is reasonable */
for (cp = url; cp < scheme_prefix; cp++) {
switch (*cp) {
/* RFC 1738 2.1 */
case '+': case '.': case '-':
break; /* ok */
default:
if (isalnum(*cp))
break;
/* it isn't */
goto literal_copy;
}
}
/* @ past the first slash does not count */
cp = strchr(scheme_prefix + 3, '/');
if (cp && cp < anon_part)
goto literal_copy;
prefix_len = scheme_prefix - url + 3;
}
return xstrfmt("%.*s%.*s", (int)prefix_len, url,
(int)anon_len, anon_part);
literal_copy:
return xstrdup(url);
}