2006-01-26 04:38:36 +08:00
|
|
|
#include "git-compat-util.h"
|
2005-07-05 02:57:58 +08:00
|
|
|
#include "cache.h"
|
2017-06-15 02:07:36 +08:00
|
|
|
#include "config.h"
|
2023-03-21 14:26:03 +08:00
|
|
|
#include "environment.h"
|
2023-03-21 14:25:54 +08:00
|
|
|
#include "gettext.h"
|
2023-02-24 08:09:27 +08:00
|
|
|
#include "hex.h"
|
2005-07-06 06:44:09 +08:00
|
|
|
#include "pkt-line.h"
|
2005-07-08 15:02:52 +08:00
|
|
|
#include "quote.h"
|
2005-10-16 15:25:26 +08:00
|
|
|
#include "refs.h"
|
2007-03-13 07:00:19 +08:00
|
|
|
#include "run-command.h"
|
2007-05-12 23:45:59 +08:00
|
|
|
#include "remote.h"
|
2013-07-09 04:56:53 +08:00
|
|
|
#include "connect.h"
|
2010-05-23 17:19:44 +08:00
|
|
|
#include "url.h"
|
2013-09-18 10:10:31 +08:00
|
|
|
#include "string-list.h"
|
2020-03-30 22:03:46 +08:00
|
|
|
#include "oid-array.h"
|
transport: add a protocol-whitelist environment variable
If we are cloning an untrusted remote repository into a
sandbox, we may also want to fetch remote submodules in
order to get the complete view as intended by the other
side. However, that opens us up to attacks where a malicious
user gets us to clone something they would not otherwise
have access to (this is not necessarily a problem by itself,
but we may then act on the cloned contents in a way that
exposes them to the attacker).
Ideally such a setup would sandbox git entirely away from
high-value items, but this is not always practical or easy
to set up (e.g., OS network controls may block multiple
protocols, and we would want to enable some but not others).
We can help this case by providing a way to restrict
particular protocols. We use a whitelist in the environment.
This is more annoying to set up than a blacklist, but
defaults to safety if the set of protocols git supports
grows). If no whitelist is specified, we continue to default
to allowing all protocols (this is an "unsafe" default, but
since the minority of users will want this sandboxing
effect, it is the only sensible one).
A note on the tests: ideally these would all be in a single
test file, but the git-daemon and httpd test infrastructure
is an all-or-nothing proposition rather than a test-by-test
prerequisite. By putting them all together, we would be
unable to test the file-local code on machines without
apache.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-09-17 01:12:52 +08:00
|
|
|
#include "transport.h"
|
2017-09-27 07:56:19 +08:00
|
|
|
#include "strbuf.h"
|
2018-03-16 01:31:21 +08:00
|
|
|
#include "version.h"
|
2017-10-17 01:55:27 +08:00
|
|
|
#include "protocol.h"
|
2018-05-21 02:40:06 +08:00
|
|
|
#include "alias.h"
|
2022-12-22 23:14:09 +08:00
|
|
|
#include "bundle-uri.h"
|
2005-07-05 02:57:58 +08:00
|
|
|
|
2018-03-16 01:31:21 +08:00
|
|
|
static char *server_capabilities_v1;
|
2020-07-29 04:24:53 +08:00
|
|
|
static struct strvec server_capabilities_v2 = STRVEC_INIT;
|
2020-05-26 03:58:52 +08:00
|
|
|
static const char *next_server_feature_value(const char *feature, int *len, int *offset);
|
2005-10-28 10:48:54 +08:00
|
|
|
|
2014-08-30 17:46:54 +08:00
|
|
|
static int check_ref(const char *name, unsigned int flags)
|
Improve git-peek-remote
This makes git-peek-remote able to basically do everything that
git-ls-remote does (but obviously just for the native protocol, so no
http[s]: or rsync: support).
The default behaviour is the same, but you can now give a mixture of
"--refs", "--tags" and "--heads" flags, where "--refs" forces
git-peek-remote to only show real refs (ie none of the fakey tag lookups,
but also not the special pseudo-refs like HEAD and MERGE_HEAD).
The "--tags" and "--heads" flags respectively limit the output to just
regular tags and heads, of course.
You can still also ask to limit them by name too.
You can combine the flags, so
git peek-remote --refs --tags .
will show all local _true_ tags, without the generated tag lookups
(compare the output without the "--refs" flag).
And "--tags --heads" will show both tags and heads, but will avoid (for
example) any special refs outside of the standard locations.
I'm also planning on adding a "--ignore-local" flag that allows us to ask
it to ignore any refs that we already have in the local tree, but that's
an independent thing.
All this is obviously gearing up to making "git fetch" cheaper.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-07-05 03:29:10 +08:00
|
|
|
{
|
|
|
|
if (!flags)
|
|
|
|
return 1;
|
|
|
|
|
2014-08-30 17:46:54 +08:00
|
|
|
if (!skip_prefix(name, "refs/", &name))
|
Improve git-peek-remote
This makes git-peek-remote able to basically do everything that
git-ls-remote does (but obviously just for the native protocol, so no
http[s]: or rsync: support).
The default behaviour is the same, but you can now give a mixture of
"--refs", "--tags" and "--heads" flags, where "--refs" forces
git-peek-remote to only show real refs (ie none of the fakey tag lookups,
but also not the special pseudo-refs like HEAD and MERGE_HEAD).
The "--tags" and "--heads" flags respectively limit the output to just
regular tags and heads, of course.
You can still also ask to limit them by name too.
You can combine the flags, so
git peek-remote --refs --tags .
will show all local _true_ tags, without the generated tag lookups
(compare the output without the "--refs" flag).
And "--tags --heads" will show both tags and heads, but will avoid (for
example) any special refs outside of the standard locations.
I'm also planning on adding a "--ignore-local" flag that allows us to ask
it to ignore any refs that we already have in the local tree, but that's
an independent thing.
All this is obviously gearing up to making "git fetch" cheaper.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-07-05 03:29:10 +08:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* REF_NORMAL means that we don't want the magic fake tag refs */
|
push: allow delete single-level ref
We discourage the creation/update of single-level refs
because some upper-layer applications only work in specified
reference namespaces, such as "refs/heads/*" or "refs/tags/*",
these single-level refnames may not be recognized. However,
we still hope users can delete them which have been created
by mistake.
Therefore, when updating branches on the server with
"git receive-pack", by checking whether it is a branch deletion
operation, it will determine whether to allow the update of
a single-level refs. This avoids creating/updating such
single-level refs, but allows them to be deleted.
On the client side, "git push" also does not properly fill in
the old-oid of single-level refs, which causes the server-side
"git receive-pack" to think that the ref's old-oid has changed
when deleting single-level refs, this causes the push to be
rejected. So the solution is to fix the client to be able to
delete single-level refs by properly filling old-oid.
Signed-off-by: ZheNing Hu <adlternative@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-01 18:20:29 +08:00
|
|
|
if ((flags & REF_NORMAL) && check_refname_format(name,
|
|
|
|
REFNAME_ALLOW_ONELEVEL))
|
Improve git-peek-remote
This makes git-peek-remote able to basically do everything that
git-ls-remote does (but obviously just for the native protocol, so no
http[s]: or rsync: support).
The default behaviour is the same, but you can now give a mixture of
"--refs", "--tags" and "--heads" flags, where "--refs" forces
git-peek-remote to only show real refs (ie none of the fakey tag lookups,
but also not the special pseudo-refs like HEAD and MERGE_HEAD).
The "--tags" and "--heads" flags respectively limit the output to just
regular tags and heads, of course.
You can still also ask to limit them by name too.
You can combine the flags, so
git peek-remote --refs --tags .
will show all local _true_ tags, without the generated tag lookups
(compare the output without the "--refs" flag).
And "--tags --heads" will show both tags and heads, but will avoid (for
example) any special refs outside of the standard locations.
I'm also planning on adding a "--ignore-local" flag that allows us to ask
it to ignore any refs that we already have in the local tree, but that's
an independent thing.
All this is obviously gearing up to making "git fetch" cheaper.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-07-05 03:29:10 +08:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* REF_HEADS means that we want regular branch heads */
|
2014-08-30 17:46:54 +08:00
|
|
|
if ((flags & REF_HEADS) && starts_with(name, "heads/"))
|
Improve git-peek-remote
This makes git-peek-remote able to basically do everything that
git-ls-remote does (but obviously just for the native protocol, so no
http[s]: or rsync: support).
The default behaviour is the same, but you can now give a mixture of
"--refs", "--tags" and "--heads" flags, where "--refs" forces
git-peek-remote to only show real refs (ie none of the fakey tag lookups,
but also not the special pseudo-refs like HEAD and MERGE_HEAD).
The "--tags" and "--heads" flags respectively limit the output to just
regular tags and heads, of course.
You can still also ask to limit them by name too.
You can combine the flags, so
git peek-remote --refs --tags .
will show all local _true_ tags, without the generated tag lookups
(compare the output without the "--refs" flag).
And "--tags --heads" will show both tags and heads, but will avoid (for
example) any special refs outside of the standard locations.
I'm also planning on adding a "--ignore-local" flag that allows us to ask
it to ignore any refs that we already have in the local tree, but that's
an independent thing.
All this is obviously gearing up to making "git fetch" cheaper.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-07-05 03:29:10 +08:00
|
|
|
return 1;
|
|
|
|
|
|
|
|
/* REF_TAGS means that we want tags */
|
2014-08-30 17:46:54 +08:00
|
|
|
if ((flags & REF_TAGS) && starts_with(name, "tags/"))
|
Improve git-peek-remote
This makes git-peek-remote able to basically do everything that
git-ls-remote does (but obviously just for the native protocol, so no
http[s]: or rsync: support).
The default behaviour is the same, but you can now give a mixture of
"--refs", "--tags" and "--heads" flags, where "--refs" forces
git-peek-remote to only show real refs (ie none of the fakey tag lookups,
but also not the special pseudo-refs like HEAD and MERGE_HEAD).
The "--tags" and "--heads" flags respectively limit the output to just
regular tags and heads, of course.
You can still also ask to limit them by name too.
You can combine the flags, so
git peek-remote --refs --tags .
will show all local _true_ tags, without the generated tag lookups
(compare the output without the "--refs" flag).
And "--tags --heads" will show both tags and heads, but will avoid (for
example) any special refs outside of the standard locations.
I'm also planning on adding a "--ignore-local" flag that allows us to ask
it to ignore any refs that we already have in the local tree, but that's
an independent thing.
All this is obviously gearing up to making "git fetch" cheaper.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-07-05 03:29:10 +08:00
|
|
|
return 1;
|
|
|
|
|
|
|
|
/* All type bits clear means that we are ok with anything */
|
|
|
|
return !(flags & ~REF_NORMAL);
|
|
|
|
}
|
|
|
|
|
2007-10-30 09:05:40 +08:00
|
|
|
int check_ref_type(const struct ref *ref, int flags)
|
|
|
|
{
|
2014-08-30 17:46:54 +08:00
|
|
|
return check_ref(ref->name, flags);
|
2007-10-30 09:05:40 +08:00
|
|
|
}
|
|
|
|
|
2018-04-15 03:19:43 +08:00
|
|
|
static NORETURN void die_initial_contact(int unexpected)
|
2012-06-20 02:24:50 +08:00
|
|
|
{
|
2018-03-15 02:31:44 +08:00
|
|
|
/*
|
|
|
|
* A hang-up after seeing some response from the other end
|
|
|
|
* means that it is unexpected, as we know the other end is
|
|
|
|
* willing to talk to us. A hang-up before seeing any
|
|
|
|
* response does not necessarily mean an ACL problem, though.
|
|
|
|
*/
|
2016-09-10 01:36:29 +08:00
|
|
|
if (unexpected)
|
2018-07-21 15:49:19 +08:00
|
|
|
die(_("the remote end hung up upon initial contact"));
|
2012-06-20 02:24:50 +08:00
|
|
|
else
|
2016-09-19 21:08:17 +08:00
|
|
|
die(_("Could not read from remote repository.\n\n"
|
|
|
|
"Please make sure you have the correct access rights\n"
|
|
|
|
"and the repository exists."));
|
2012-06-20 02:24:50 +08:00
|
|
|
}
|
|
|
|
|
2018-03-16 01:31:21 +08:00
|
|
|
/* Checks if the server supports the capability 'c' */
|
2022-12-13 18:52:58 +08:00
|
|
|
int server_supports_v2(const char *c)
|
2018-03-16 01:31:21 +08:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2020-07-29 08:37:20 +08:00
|
|
|
for (i = 0; i < server_capabilities_v2.nr; i++) {
|
2018-03-16 01:31:21 +08:00
|
|
|
const char *out;
|
2020-07-29 08:37:20 +08:00
|
|
|
if (skip_prefix(server_capabilities_v2.v[i], c, &out) &&
|
2018-03-16 01:31:21 +08:00
|
|
|
(!*out || *out == '='))
|
|
|
|
return 1;
|
|
|
|
}
|
2022-12-13 18:52:58 +08:00
|
|
|
return 0;
|
|
|
|
}
|
2018-03-16 01:31:21 +08:00
|
|
|
|
2022-12-13 18:52:58 +08:00
|
|
|
void ensure_server_supports_v2(const char *c)
|
|
|
|
{
|
|
|
|
if (!server_supports_v2(c))
|
2018-07-21 15:49:28 +08:00
|
|
|
die(_("server doesn't support '%s'"), c);
|
2018-03-16 01:31:21 +08:00
|
|
|
}
|
|
|
|
|
2020-05-26 03:58:53 +08:00
|
|
|
int server_feature_v2(const char *c, const char **v)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2020-07-29 08:37:20 +08:00
|
|
|
for (i = 0; i < server_capabilities_v2.nr; i++) {
|
2020-05-26 03:58:53 +08:00
|
|
|
const char *out;
|
2020-07-29 08:37:20 +08:00
|
|
|
if (skip_prefix(server_capabilities_v2.v[i], c, &out) &&
|
2020-05-26 03:58:53 +08:00
|
|
|
(*out == '=')) {
|
|
|
|
*v = out + 1;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-03-16 01:31:29 +08:00
|
|
|
int server_supports_feature(const char *c, const char *feature,
|
|
|
|
int die_on_error)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2020-07-29 08:37:20 +08:00
|
|
|
for (i = 0; i < server_capabilities_v2.nr; i++) {
|
2018-03-16 01:31:29 +08:00
|
|
|
const char *out;
|
2020-07-29 08:37:20 +08:00
|
|
|
if (skip_prefix(server_capabilities_v2.v[i], c, &out) &&
|
2018-03-16 01:31:29 +08:00
|
|
|
(!*out || *(out++) == '=')) {
|
|
|
|
if (parse_feature_request(out, feature))
|
|
|
|
return 1;
|
|
|
|
else
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (die_on_error)
|
2018-07-21 15:49:28 +08:00
|
|
|
die(_("server doesn't support feature '%s'"), feature);
|
2018-03-16 01:31:29 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-03-16 01:31:21 +08:00
|
|
|
static void process_capabilities_v2(struct packet_reader *reader)
|
|
|
|
{
|
|
|
|
while (packet_reader_read(reader) == PACKET_READ_NORMAL)
|
2020-07-29 04:24:53 +08:00
|
|
|
strvec_push(&server_capabilities_v2, reader->line);
|
2018-03-16 01:31:21 +08:00
|
|
|
|
|
|
|
if (reader->status != PACKET_READ_FLUSH)
|
2018-07-21 15:49:28 +08:00
|
|
|
die(_("expected flush after capabilities"));
|
2018-03-16 01:31:21 +08:00
|
|
|
}
|
|
|
|
|
2018-03-15 02:31:45 +08:00
|
|
|
enum protocol_version discover_version(struct packet_reader *reader)
|
2018-03-15 02:31:44 +08:00
|
|
|
{
|
|
|
|
enum protocol_version version = protocol_unknown_version;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Peek the first line of the server's response to
|
|
|
|
* determine the protocol version the server is speaking.
|
|
|
|
*/
|
|
|
|
switch (packet_reader_peek(reader)) {
|
|
|
|
case PACKET_READ_EOF:
|
|
|
|
die_initial_contact(0);
|
|
|
|
case PACKET_READ_FLUSH:
|
|
|
|
case PACKET_READ_DELIM:
|
2020-05-19 18:53:59 +08:00
|
|
|
case PACKET_READ_RESPONSE_END:
|
2018-03-15 02:31:44 +08:00
|
|
|
version = protocol_v0;
|
|
|
|
break;
|
|
|
|
case PACKET_READ_NORMAL:
|
|
|
|
version = determine_protocol_version_client(reader->line);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (version) {
|
2018-03-15 02:31:47 +08:00
|
|
|
case protocol_v2:
|
2018-03-16 01:31:21 +08:00
|
|
|
process_capabilities_v2(reader);
|
2018-03-15 02:31:47 +08:00
|
|
|
break;
|
2018-03-15 02:31:44 +08:00
|
|
|
case protocol_v1:
|
|
|
|
/* Read the peeked version line */
|
|
|
|
packet_reader_read(reader);
|
|
|
|
break;
|
|
|
|
case protocol_v0:
|
|
|
|
break;
|
|
|
|
case protocol_unknown_version:
|
|
|
|
BUG("unknown protocol version");
|
|
|
|
}
|
|
|
|
|
2021-08-11 01:20:39 +08:00
|
|
|
trace2_data_intmax("transfer", NULL, "negotiated-version", version);
|
|
|
|
|
2018-03-15 02:31:44 +08:00
|
|
|
return version;
|
|
|
|
}
|
|
|
|
|
2013-09-18 10:10:31 +08:00
|
|
|
static void parse_one_symref_info(struct string_list *symref, const char *val, int len)
|
|
|
|
{
|
|
|
|
char *sym, *target;
|
|
|
|
struct string_list_item *item;
|
|
|
|
|
|
|
|
if (!len)
|
|
|
|
return; /* just "symref" */
|
|
|
|
/* e.g. "symref=HEAD:refs/heads/master" */
|
2014-07-19 23:35:34 +08:00
|
|
|
sym = xmemdupz(val, len);
|
2013-09-18 10:10:31 +08:00
|
|
|
target = strchr(sym, ':');
|
|
|
|
if (!target)
|
|
|
|
/* just "symref=something" */
|
|
|
|
goto reject;
|
|
|
|
*(target++) = '\0';
|
|
|
|
if (check_refname_format(sym, REFNAME_ALLOW_ONELEVEL) ||
|
|
|
|
check_refname_format(target, REFNAME_ALLOW_ONELEVEL))
|
|
|
|
/* "symref=bogus:pair */
|
|
|
|
goto reject;
|
connect.c: fix leak in parse_one_symref_info()
If we successfully parse a symref value like
"HEAD:refs/heads/master", we add the result to a string
list. But because the string list is marked
STRING_LIST_INIT_DUP, the string list code will make a copy
of the string and add the copy.
This patch fixes it by adding the entry with
string_list_append_nodup(), which lets the string list take
ownership of our newly allocated string. There are two
alternatives that seem like they would work, but aren't the
right solution.
The first is to initialize the list with the "NODUP"
initializer. That would avoid the copy, but then the string
list would not realize that it owns the strings. When we
eventually call string_list_clear(), it would not free the
strings, causing a leak.
The second option would be to use the normal
string_list_append(), but free the local copy in our
function. We can't do this because the local copy actually
contains _two_ strings; the symref name and its target. We
point to the target pointer via the "util" field, and its
memory must last as long as the string list does.
You may also wonder whether it's safe to ever free the local
copy, since the target points into it. The answer is yes,
because we duplicate it in annotaate_refs_with_symref_info
before clearing the string list.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-05-26 03:33:05 +08:00
|
|
|
item = string_list_append_nodup(symref, sym);
|
2013-09-18 10:10:31 +08:00
|
|
|
item->util = target;
|
|
|
|
return;
|
|
|
|
reject:
|
|
|
|
free(sym);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void annotate_refs_with_symref_info(struct ref *ref)
|
|
|
|
{
|
|
|
|
struct string_list symref = STRING_LIST_INIT_DUP;
|
2020-05-26 03:58:52 +08:00
|
|
|
int offset = 0;
|
2013-09-18 10:10:31 +08:00
|
|
|
|
2020-05-26 03:58:52 +08:00
|
|
|
while (1) {
|
2013-09-18 10:10:31 +08:00
|
|
|
int len;
|
|
|
|
const char *val;
|
|
|
|
|
2020-05-26 03:58:52 +08:00
|
|
|
val = next_server_feature_value("symref", &len, &offset);
|
2013-09-18 10:10:31 +08:00
|
|
|
if (!val)
|
|
|
|
break;
|
|
|
|
parse_one_symref_info(&symref, val, len);
|
|
|
|
}
|
2014-11-25 16:02:35 +08:00
|
|
|
string_list_sort(&symref);
|
2013-09-18 10:10:31 +08:00
|
|
|
|
|
|
|
for (; ref; ref = ref->next) {
|
|
|
|
struct string_list_item *item;
|
|
|
|
item = string_list_lookup(&symref, ref->name);
|
|
|
|
if (!item)
|
|
|
|
continue;
|
|
|
|
ref->symref = xstrdup((char *)item->util);
|
|
|
|
}
|
|
|
|
string_list_clear(&symref, 0);
|
|
|
|
}
|
|
|
|
|
2020-05-26 03:58:49 +08:00
|
|
|
static void process_capabilities(struct packet_reader *reader, int *linelen)
|
2017-10-17 01:55:27 +08:00
|
|
|
{
|
2020-05-26 03:59:00 +08:00
|
|
|
const char *feat_val;
|
|
|
|
int feat_len;
|
2020-05-26 03:58:49 +08:00
|
|
|
const char *line = reader->line;
|
2018-03-15 02:31:44 +08:00
|
|
|
int nul_location = strlen(line);
|
2020-05-26 03:58:49 +08:00
|
|
|
if (nul_location == *linelen)
|
2017-09-27 07:56:19 +08:00
|
|
|
return;
|
2018-03-16 01:31:21 +08:00
|
|
|
server_capabilities_v1 = xstrdup(line + nul_location + 1);
|
2020-05-26 03:58:49 +08:00
|
|
|
*linelen = nul_location;
|
2020-05-26 03:59:00 +08:00
|
|
|
|
|
|
|
feat_val = server_feature_value("object-format", &feat_len);
|
|
|
|
if (feat_val) {
|
|
|
|
char *hash_name = xstrndup(feat_val, feat_len);
|
|
|
|
int hash_algo = hash_algo_by_name(hash_name);
|
|
|
|
if (hash_algo != GIT_HASH_UNKNOWN)
|
|
|
|
reader->hash_algo = &hash_algos[hash_algo];
|
|
|
|
free(hash_name);
|
|
|
|
} else {
|
|
|
|
reader->hash_algo = &hash_algos[GIT_HASH_SHA1];
|
|
|
|
}
|
2017-09-27 07:56:19 +08:00
|
|
|
}
|
|
|
|
|
2020-05-26 03:58:49 +08:00
|
|
|
static int process_dummy_ref(const struct packet_reader *reader)
|
2017-09-27 07:56:19 +08:00
|
|
|
{
|
2020-05-26 03:58:49 +08:00
|
|
|
const char *line = reader->line;
|
2017-09-27 07:56:19 +08:00
|
|
|
struct object_id oid;
|
|
|
|
const char *name;
|
|
|
|
|
2020-05-26 03:59:00 +08:00
|
|
|
if (parse_oid_hex_algop(line, &oid, &name, reader->hash_algo))
|
2017-09-27 07:56:19 +08:00
|
|
|
return 0;
|
|
|
|
if (*name != ' ')
|
|
|
|
return 0;
|
|
|
|
name++;
|
|
|
|
|
v0 protocol: fix sha1/sha256 confusion for capabilities^{}
Commit eb398797cd (connect: advertized capability is not a ref,
2016-09-09) added support for an upload-pack server responding with:
0000000000000000000000000000000000000000 capabilities^{}
followed by a NUL and the actual capabilities. We correctly parse the
oid using the packet_reader's hash_algo field, but then we compare it to
null_oid(), which will instead use our current repo's default algorithm.
If we're defaulting to sha256 locally but the other side is sha1, they
won't match and we'll fail to parse the line (and thus die()).
This can cause a test failure when the suite is run with
GIT_TEST_DEFAULT_HASH=sha256, and we even do so regularly via the
linux-sha256 CI job. But since the test requires JGit to run, it's
usually just skipped, and nobody noticed the problem.
The reason the original patch used JGit is that Git itself does not ever
produce such a line via upload-pack; the feature was added to fix a
real-world problem when interacting with JGit. That was good for
verifying that the incompatibility was fixed, but it's not a good
regression test:
- hardly anybody runs it, because you have to have jgit installed;
hence this bug going unnoticed
- we're depending on jgit's behavior for the test to do anything
useful. In particular, this behavior is only relevant to the v0
protocol, but these days we ask for the v2 protocol by default. So
for modern jgit, this is probably testing nothing.
- it's complicated and slow. We had to do some fifo trickery to handle
races, and this one test makes up 40% of the runtime of the total
script.
Instead, let's just hard-code the response that's of interest to us.
That will test exactly what we want for every run, and reveals the bug
when run in sha256 mode. And of course we'll fix the actual bug by using
the correct hash_algo struct.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-04-15 05:25:11 +08:00
|
|
|
return oideq(reader->hash_algo->null_oid, &oid) &&
|
|
|
|
!strcmp(name, "capabilities^{}");
|
2017-09-27 07:56:19 +08:00
|
|
|
}
|
|
|
|
|
2018-03-15 02:31:44 +08:00
|
|
|
static void check_no_capabilities(const char *line, int len)
|
2017-09-27 07:56:19 +08:00
|
|
|
{
|
2018-03-15 02:31:44 +08:00
|
|
|
if (strlen(line) != len)
|
2018-07-21 15:49:28 +08:00
|
|
|
warning(_("ignoring capabilities after first line '%s'"),
|
2018-03-15 02:31:44 +08:00
|
|
|
line + strlen(line));
|
2017-09-27 07:56:19 +08:00
|
|
|
}
|
|
|
|
|
2020-05-26 03:58:49 +08:00
|
|
|
static int process_ref(const struct packet_reader *reader, int len,
|
|
|
|
struct ref ***list, unsigned int flags,
|
|
|
|
struct oid_array *extra_have)
|
2017-09-27 07:56:19 +08:00
|
|
|
{
|
2020-05-26 03:58:49 +08:00
|
|
|
const char *line = reader->line;
|
2017-09-27 07:56:19 +08:00
|
|
|
struct object_id old_oid;
|
|
|
|
const char *name;
|
|
|
|
|
2020-05-26 03:59:00 +08:00
|
|
|
if (parse_oid_hex_algop(line, &old_oid, &name, reader->hash_algo))
|
2017-09-27 07:56:19 +08:00
|
|
|
return 0;
|
|
|
|
if (*name != ' ')
|
|
|
|
return 0;
|
|
|
|
name++;
|
|
|
|
|
|
|
|
if (extra_have && !strcmp(name, ".have")) {
|
|
|
|
oid_array_append(extra_have, &old_oid);
|
|
|
|
} else if (!strcmp(name, "capabilities^{}")) {
|
2018-07-21 15:49:28 +08:00
|
|
|
die(_("protocol error: unexpected capabilities^{}"));
|
2017-09-27 07:56:19 +08:00
|
|
|
} else if (check_ref(name, flags)) {
|
|
|
|
struct ref *ref = alloc_ref(name);
|
|
|
|
oidcpy(&ref->old_oid, &old_oid);
|
|
|
|
**list = ref;
|
|
|
|
*list = &ref->next;
|
|
|
|
}
|
2018-03-15 02:31:44 +08:00
|
|
|
check_no_capabilities(line, len);
|
2017-09-27 07:56:19 +08:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2020-05-26 03:58:49 +08:00
|
|
|
static int process_shallow(const struct packet_reader *reader, int len,
|
2018-03-15 02:31:44 +08:00
|
|
|
struct oid_array *shallow_points)
|
2017-09-27 07:56:19 +08:00
|
|
|
{
|
2020-05-26 03:58:49 +08:00
|
|
|
const char *line = reader->line;
|
2017-09-27 07:56:19 +08:00
|
|
|
const char *arg;
|
|
|
|
struct object_id old_oid;
|
|
|
|
|
2018-03-15 02:31:44 +08:00
|
|
|
if (!skip_prefix(line, "shallow ", &arg))
|
2017-09-27 07:56:19 +08:00
|
|
|
return 0;
|
|
|
|
|
2020-05-26 03:59:00 +08:00
|
|
|
if (get_oid_hex_algop(arg, &old_oid, reader->hash_algo))
|
2018-07-21 15:49:28 +08:00
|
|
|
die(_("protocol error: expected shallow sha-1, got '%s'"), arg);
|
2017-09-27 07:56:19 +08:00
|
|
|
if (!shallow_points)
|
2018-07-21 15:49:28 +08:00
|
|
|
die(_("repository on the other end cannot be shallow"));
|
2017-09-27 07:56:19 +08:00
|
|
|
oid_array_append(shallow_points, &old_oid);
|
2018-03-15 02:31:44 +08:00
|
|
|
check_no_capabilities(line, len);
|
2017-09-27 07:56:19 +08:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2018-03-15 02:31:44 +08:00
|
|
|
enum get_remote_heads_state {
|
|
|
|
EXPECTING_FIRST_REF = 0,
|
|
|
|
EXPECTING_REF,
|
|
|
|
EXPECTING_SHALLOW,
|
|
|
|
EXPECTING_DONE,
|
|
|
|
};
|
|
|
|
|
2005-07-17 04:55:50 +08:00
|
|
|
/*
|
|
|
|
* Read all the refs from the other end
|
|
|
|
*/
|
2018-03-15 02:31:45 +08:00
|
|
|
struct ref **get_remote_heads(struct packet_reader *reader,
|
2013-02-21 04:06:45 +08:00
|
|
|
struct ref **list, unsigned int flags,
|
2017-03-31 09:40:00 +08:00
|
|
|
struct oid_array *extra_have,
|
|
|
|
struct oid_array *shallow_points)
|
2005-07-17 04:55:50 +08:00
|
|
|
{
|
2013-09-18 10:10:31 +08:00
|
|
|
struct ref **orig_list = list;
|
2018-03-15 02:31:44 +08:00
|
|
|
int len = 0;
|
|
|
|
enum get_remote_heads_state state = EXPECTING_FIRST_REF;
|
2016-09-10 01:36:29 +08:00
|
|
|
|
2005-07-17 04:55:50 +08:00
|
|
|
*list = NULL;
|
2005-10-14 09:57:40 +08:00
|
|
|
|
2018-03-15 02:31:44 +08:00
|
|
|
while (state != EXPECTING_DONE) {
|
2018-03-15 02:31:45 +08:00
|
|
|
switch (packet_reader_read(reader)) {
|
2018-03-15 02:31:44 +08:00
|
|
|
case PACKET_READ_EOF:
|
|
|
|
die_initial_contact(1);
|
|
|
|
case PACKET_READ_NORMAL:
|
2018-03-15 02:31:45 +08:00
|
|
|
len = reader->pktlen;
|
2018-03-15 02:31:44 +08:00
|
|
|
break;
|
|
|
|
case PACKET_READ_FLUSH:
|
|
|
|
state = EXPECTING_DONE;
|
|
|
|
break;
|
|
|
|
case PACKET_READ_DELIM:
|
2020-05-19 18:53:59 +08:00
|
|
|
case PACKET_READ_RESPONSE_END:
|
2018-07-21 15:49:28 +08:00
|
|
|
die(_("invalid packet"));
|
2018-03-15 02:31:44 +08:00
|
|
|
}
|
|
|
|
|
2017-09-27 07:56:19 +08:00
|
|
|
switch (state) {
|
|
|
|
case EXPECTING_FIRST_REF:
|
2020-05-26 03:58:49 +08:00
|
|
|
process_capabilities(reader, &len);
|
|
|
|
if (process_dummy_ref(reader)) {
|
2017-09-27 07:56:19 +08:00
|
|
|
state = EXPECTING_SHALLOW;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
state = EXPECTING_REF;
|
|
|
|
/* fallthrough */
|
|
|
|
case EXPECTING_REF:
|
2020-05-26 03:58:49 +08:00
|
|
|
if (process_ref(reader, len, &list, flags, extra_have))
|
2017-09-27 07:56:19 +08:00
|
|
|
break;
|
|
|
|
state = EXPECTING_SHALLOW;
|
|
|
|
/* fallthrough */
|
|
|
|
case EXPECTING_SHALLOW:
|
2020-05-26 03:58:49 +08:00
|
|
|
if (process_shallow(reader, len, shallow_points))
|
2017-09-27 07:56:19 +08:00
|
|
|
break;
|
2018-07-21 15:49:28 +08:00
|
|
|
die(_("protocol error: unexpected '%s'"), reader->line);
|
2018-03-15 02:31:44 +08:00
|
|
|
case EXPECTING_DONE:
|
|
|
|
break;
|
2005-10-28 10:48:54 +08:00
|
|
|
}
|
2005-07-17 04:55:50 +08:00
|
|
|
}
|
2013-09-18 10:10:31 +08:00
|
|
|
|
|
|
|
annotate_refs_with_symref_info(*orig_list);
|
|
|
|
|
2005-07-17 04:55:50 +08:00
|
|
|
return list;
|
|
|
|
}
|
|
|
|
|
2018-03-16 01:31:21 +08:00
|
|
|
/* Returns 1 when a valid ref has been added to `list`, 0 otherwise */
|
2021-02-06 04:48:49 +08:00
|
|
|
static int process_ref_v2(struct packet_reader *reader, struct ref ***list,
|
2022-02-05 08:08:14 +08:00
|
|
|
const char **unborn_head_target)
|
2018-03-16 01:31:21 +08:00
|
|
|
{
|
|
|
|
int ret = 1;
|
|
|
|
int i = 0;
|
|
|
|
struct object_id old_oid;
|
|
|
|
struct ref *ref;
|
|
|
|
struct string_list line_sections = STRING_LIST_INIT_DUP;
|
|
|
|
const char *end;
|
2020-05-26 03:59:15 +08:00
|
|
|
const char *line = reader->line;
|
2018-03-16 01:31:21 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Ref lines have a number of fields which are space deliminated. The
|
|
|
|
* first field is the OID of the ref. The second field is the ref
|
|
|
|
* name. Subsequent fields (symref-target and peeled) are optional and
|
|
|
|
* don't have a particular order.
|
|
|
|
*/
|
|
|
|
if (string_list_split(&line_sections, line, ' ', -1) < 2) {
|
|
|
|
ret = 0;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2021-02-06 04:48:49 +08:00
|
|
|
if (!strcmp("unborn", line_sections.items[i].string)) {
|
|
|
|
i++;
|
|
|
|
if (unborn_head_target &&
|
|
|
|
!strcmp("HEAD", line_sections.items[i++].string)) {
|
|
|
|
/*
|
|
|
|
* Look for the symref target (if any). If found,
|
|
|
|
* return it to the caller.
|
|
|
|
*/
|
|
|
|
for (; i < line_sections.nr; i++) {
|
|
|
|
const char *arg = line_sections.items[i].string;
|
|
|
|
|
|
|
|
if (skip_prefix(arg, "symref-target:", &arg)) {
|
|
|
|
*unborn_head_target = xstrdup(arg);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
goto out;
|
|
|
|
}
|
2020-05-26 03:59:16 +08:00
|
|
|
if (parse_oid_hex_algop(line_sections.items[i++].string, &old_oid, &end, reader->hash_algo) ||
|
2018-03-16 01:31:21 +08:00
|
|
|
*end) {
|
|
|
|
ret = 0;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
ref = alloc_ref(line_sections.items[i++].string);
|
|
|
|
|
2020-05-26 03:59:16 +08:00
|
|
|
memcpy(ref->old_oid.hash, old_oid.hash, reader->hash_algo->rawsz);
|
2018-03-16 01:31:21 +08:00
|
|
|
**list = ref;
|
|
|
|
*list = &ref->next;
|
|
|
|
|
|
|
|
for (; i < line_sections.nr; i++) {
|
|
|
|
const char *arg = line_sections.items[i].string;
|
|
|
|
if (skip_prefix(arg, "symref-target:", &arg))
|
|
|
|
ref->symref = xstrdup(arg);
|
|
|
|
|
|
|
|
if (skip_prefix(arg, "peeled:", &arg)) {
|
|
|
|
struct object_id peeled_oid;
|
|
|
|
char *peeled_name;
|
|
|
|
struct ref *peeled;
|
2020-05-26 03:59:16 +08:00
|
|
|
if (parse_oid_hex_algop(arg, &peeled_oid, &end,
|
|
|
|
reader->hash_algo) || *end) {
|
2018-03-16 01:31:21 +08:00
|
|
|
ret = 0;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
peeled_name = xstrfmt("%s^{}", ref->name);
|
|
|
|
peeled = alloc_ref(peeled_name);
|
|
|
|
|
2020-05-26 03:59:16 +08:00
|
|
|
memcpy(peeled->old_oid.hash, peeled_oid.hash,
|
|
|
|
reader->hash_algo->rawsz);
|
2018-03-16 01:31:21 +08:00
|
|
|
**list = peeled;
|
|
|
|
*list = &peeled->next;
|
|
|
|
|
|
|
|
free(peeled_name);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
out:
|
|
|
|
string_list_clear(&line_sections, 0);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2020-05-19 18:54:00 +08:00
|
|
|
void check_stateless_delimiter(int stateless_rpc,
|
|
|
|
struct packet_reader *reader,
|
|
|
|
const char *error)
|
|
|
|
{
|
|
|
|
if (!stateless_rpc)
|
|
|
|
return; /* not in stateless mode, no delimiter expected */
|
|
|
|
if (packet_reader_read(reader) != PACKET_READ_RESPONSE_END)
|
|
|
|
die("%s", error);
|
|
|
|
}
|
|
|
|
|
2022-05-17 04:10:58 +08:00
|
|
|
static void send_capabilities(int fd_out, struct packet_reader *reader)
|
|
|
|
{
|
|
|
|
const char *hash_name;
|
|
|
|
|
2022-12-13 18:52:58 +08:00
|
|
|
if (server_supports_v2("agent"))
|
2022-05-17 04:10:58 +08:00
|
|
|
packet_write_fmt(fd_out, "agent=%s", git_user_agent_sanitized());
|
|
|
|
|
|
|
|
if (server_feature_v2("object-format", &hash_name)) {
|
|
|
|
int hash_algo = hash_algo_by_name(hash_name);
|
|
|
|
if (hash_algo == GIT_HASH_UNKNOWN)
|
|
|
|
die(_("unknown object format '%s' specified by server"), hash_name);
|
|
|
|
reader->hash_algo = &hash_algos[hash_algo];
|
|
|
|
packet_write_fmt(fd_out, "object-format=%s", reader->hash_algo->name);
|
|
|
|
} else {
|
|
|
|
reader->hash_algo = &hash_algos[GIT_HASH_SHA1];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-12-22 23:14:09 +08:00
|
|
|
int get_remote_bundle_uri(int fd_out, struct packet_reader *reader,
|
|
|
|
struct bundle_list *bundles, int stateless_rpc)
|
|
|
|
{
|
|
|
|
int line_nr = 1;
|
|
|
|
|
|
|
|
/* Assert bundle-uri support */
|
2023-01-02 20:37:18 +08:00
|
|
|
ensure_server_supports_v2("bundle-uri");
|
2022-12-22 23:14:09 +08:00
|
|
|
|
|
|
|
/* (Re-)send capabilities */
|
|
|
|
send_capabilities(fd_out, reader);
|
|
|
|
|
|
|
|
/* Send command */
|
|
|
|
packet_write_fmt(fd_out, "command=bundle-uri\n");
|
|
|
|
packet_delim(fd_out);
|
|
|
|
|
|
|
|
packet_flush(fd_out);
|
|
|
|
|
|
|
|
/* Process response from server */
|
|
|
|
while (packet_reader_read(reader) == PACKET_READ_NORMAL) {
|
|
|
|
const char *line = reader->line;
|
|
|
|
line_nr++;
|
|
|
|
|
|
|
|
if (!bundle_uri_parse_line(bundles, line))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
return error(_("error on bundle-uri response line %d: %s"),
|
|
|
|
line_nr, line);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (reader->status != PACKET_READ_FLUSH)
|
|
|
|
return error(_("expected flush after bundle-uri listing"));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Might die(), but obscure enough that that's OK, e.g. in
|
|
|
|
* serve.c we'll call BUG() on its equivalent (the
|
|
|
|
* PACKET_READ_RESPONSE_END check).
|
|
|
|
*/
|
|
|
|
check_stateless_delimiter(stateless_rpc, reader,
|
|
|
|
_("expected response end packet after ref listing"));
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2018-03-16 01:31:21 +08:00
|
|
|
struct ref **get_remote_refs(int fd_out, struct packet_reader *reader,
|
|
|
|
struct ref **list, int for_push,
|
2021-02-06 04:48:48 +08:00
|
|
|
struct transport_ls_refs_options *transport_options,
|
2020-05-19 18:54:00 +08:00
|
|
|
const struct string_list *server_options,
|
|
|
|
int stateless_rpc)
|
2018-03-16 01:31:21 +08:00
|
|
|
{
|
|
|
|
int i;
|
2021-02-06 04:48:48 +08:00
|
|
|
struct strvec *ref_prefixes = transport_options ?
|
|
|
|
&transport_options->ref_prefixes : NULL;
|
2022-02-05 08:08:14 +08:00
|
|
|
const char **unborn_head_target = transport_options ?
|
2021-02-06 04:48:49 +08:00
|
|
|
&transport_options->unborn_head_target : NULL;
|
2018-03-16 01:31:21 +08:00
|
|
|
*list = NULL;
|
|
|
|
|
2022-12-13 18:52:58 +08:00
|
|
|
ensure_server_supports_v2("ls-refs");
|
|
|
|
packet_write_fmt(fd_out, "command=ls-refs\n");
|
2018-03-16 01:31:21 +08:00
|
|
|
|
2022-05-17 04:10:58 +08:00
|
|
|
/* Send capabilities */
|
|
|
|
send_capabilities(fd_out, reader);
|
2020-05-26 03:59:16 +08:00
|
|
|
|
2022-12-13 18:52:58 +08:00
|
|
|
if (server_options && server_options->nr) {
|
|
|
|
ensure_server_supports_v2("server-option");
|
2018-04-24 06:46:23 +08:00
|
|
|
for (i = 0; i < server_options->nr; i++)
|
|
|
|
packet_write_fmt(fd_out, "server-option=%s",
|
|
|
|
server_options->items[i].string);
|
2022-12-13 18:52:58 +08:00
|
|
|
}
|
2018-04-24 06:46:23 +08:00
|
|
|
|
2018-03-16 01:31:21 +08:00
|
|
|
packet_delim(fd_out);
|
|
|
|
/* When pushing we don't want to request the peeled tags */
|
|
|
|
if (!for_push)
|
|
|
|
packet_write_fmt(fd_out, "peel\n");
|
|
|
|
packet_write_fmt(fd_out, "symrefs\n");
|
2021-02-06 04:48:49 +08:00
|
|
|
if (server_supports_feature("ls-refs", "unborn", 0))
|
|
|
|
packet_write_fmt(fd_out, "unborn\n");
|
2020-07-29 08:37:20 +08:00
|
|
|
for (i = 0; ref_prefixes && i < ref_prefixes->nr; i++) {
|
2018-03-16 01:31:21 +08:00
|
|
|
packet_write_fmt(fd_out, "ref-prefix %s\n",
|
2020-07-29 08:37:20 +08:00
|
|
|
ref_prefixes->v[i]);
|
2018-03-16 01:31:21 +08:00
|
|
|
}
|
|
|
|
packet_flush(fd_out);
|
|
|
|
|
|
|
|
/* Process response from server */
|
|
|
|
while (packet_reader_read(reader) == PACKET_READ_NORMAL) {
|
2021-02-06 04:48:49 +08:00
|
|
|
if (!process_ref_v2(reader, &list, unborn_head_target))
|
2018-07-21 15:49:28 +08:00
|
|
|
die(_("invalid ls-refs response: %s"), reader->line);
|
2018-03-16 01:31:21 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (reader->status != PACKET_READ_FLUSH)
|
2018-07-21 15:49:28 +08:00
|
|
|
die(_("expected flush after ref listing"));
|
2018-03-16 01:31:21 +08:00
|
|
|
|
2020-05-19 18:54:00 +08:00
|
|
|
check_stateless_delimiter(stateless_rpc, reader,
|
|
|
|
_("expected response end packet after ref listing"));
|
|
|
|
|
2018-03-16 01:31:21 +08:00
|
|
|
return list;
|
|
|
|
}
|
|
|
|
|
2020-05-26 03:58:58 +08:00
|
|
|
const char *parse_feature_value(const char *feature_list, const char *feature, int *lenp, int *offset)
|
2012-01-09 05:06:19 +08:00
|
|
|
{
|
v0 protocol: fix infinite loop when parsing multi-valued capabilities
If Git's client-side parsing of an upload-pack response (so git-fetch or
ls-remote) sees multiple instances of a single capability, it can enter
an infinite loop due to a bug in advancing the "offset" parameter in the
parser.
This bug can't happen between a client and server of the same Git
version. The client bug is in parse_feature_value() when the caller
passes in an offset parameter. And that only happens when the v0
protocol is parsing "symref" and "object-format" capabilities, via
next_server_feature_value(). But Git has never produced multiple
object-format capabilities, and it stopped producing multiple symref
values in d007dbf7d6 (Revert "upload-pack: send non-HEAD symbolic refs",
2013-11-18).
However, upload-pack did produce multiple symref entries for a while,
and they are valid. Plus other implementations, such as Dulwich will
still do so. So we should handle them. And even if we do not expect it,
it is obviously a bug for the parser to enter an infinite loop.
The bug itself is pretty simple. Commit 2c6a403d96 (connect: add
function to parse multiple v1 capability values, 2020-05-25) added the
"offset" parameter, which is used as both an in- and out-parameter. When
parsing the first "symref" capability, *offset will be 0 on input, and
after parsing the capability, we set *offset to an index just past the
value by taking a pointer difference "(value + end) - feature_list".
But on the second call, now *offset is set to that larger index, which
lets us skip past the first "symref" capability. However, we do so by
incrementing feature_list. That means our pointer difference is now too
small; it is counting from where we resumed parsing, not from the start
of the original feature_list pointer. And because we incremented
feature_list only inside our function, and not the caller, that
increment is lost next time the function is called.
One solution would be to account for those skipped bytes by incrementing
*offset, rather than assigning to it. But wait, there's more!
We also increment feature_list if we have a near-miss. Say we are
looking for "symref" and find "almost-symref". In that case we'll point
feature_list to the "y" in "almost-symref" and restart our search. But
that again means our offset won't be correct, as it won't account for
the bytes between the start of the string and that "y".
So instead, let's just record the beginning of the feature_list string
in a separate pointer that we never touch. That offset we take in and
return is meant to be using that point as a base, and now we'll do so
consistently.
Since the bug can't be reproduced using the current version of
git-upload-pack, we'll instead hard-code an input which triggers the
problem. Before this patch it loops forever re-parsing the second symref
entry. Now we check both that it finishes, and that it parses both
entries correctly (a case we could not test at all before).
We don't need to worry about testing v2 here; it communicates the
capabilities in a completely different way, and doesn't use this code at
all. There are tests earlier in t5512 that are meant to cover this (they
don't, but we'll address that in a future patch).
Reported-by: Jonas Haag <jonas@lophus.org>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-04-15 05:24:16 +08:00
|
|
|
const char *orig_start = feature_list;
|
2012-01-09 05:06:19 +08:00
|
|
|
int len;
|
|
|
|
|
|
|
|
if (!feature_list)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
len = strlen(feature);
|
2020-05-26 03:58:52 +08:00
|
|
|
if (offset)
|
|
|
|
feature_list += *offset;
|
2012-01-09 05:06:19 +08:00
|
|
|
while (*feature_list) {
|
|
|
|
const char *found = strstr(feature_list, feature);
|
|
|
|
if (!found)
|
|
|
|
return NULL;
|
parse_feature_request: make it easier to see feature values
We already take care to parse key/value capabilities like
"foo=bar", but the code does not provide a good way of
actually finding out what is on the right-hand side of the
"=".
A server using "parse_feature_request" could accomplish this
with some extra parsing. You must skip past the "key"
portion manually, check for "=" versus NUL or space, and
then find the length by searching for the next space (or
NUL). But clients can't even do that, since the
"server_supports" interface does not even return the
pointer.
Instead, let's have our parser share more information by
providing a pointer to the value and its length. The
"parse_feature_value" function returns a pointer to the
feature's value portion, along with the length of the value.
If the feature is missing, NULL is returned. If it does not
have an "=", then a zero-length value is returned.
Similarly, "server_feature_value" behaves in the same way,
but always checks the static server_feature_list variable.
We can then implement "server_supports" in terms of
"server_feature_value". We cannot implement the original
"parse_feature_request" in terms of our new function,
because it returned a pointer to the beginning of the
feature. However, no callers actually cared about the value
of the returned pointer, so we can simplify it to a boolean
just as we do for "server_supports".
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-08-14 09:59:27 +08:00
|
|
|
if (feature_list == found || isspace(found[-1])) {
|
|
|
|
const char *value = found + len;
|
|
|
|
/* feature with no value (e.g., "thin-pack") */
|
|
|
|
if (!*value || isspace(*value)) {
|
|
|
|
if (lenp)
|
|
|
|
*lenp = 0;
|
2021-09-26 23:58:33 +08:00
|
|
|
if (offset)
|
v0 protocol: fix infinite loop when parsing multi-valued capabilities
If Git's client-side parsing of an upload-pack response (so git-fetch or
ls-remote) sees multiple instances of a single capability, it can enter
an infinite loop due to a bug in advancing the "offset" parameter in the
parser.
This bug can't happen between a client and server of the same Git
version. The client bug is in parse_feature_value() when the caller
passes in an offset parameter. And that only happens when the v0
protocol is parsing "symref" and "object-format" capabilities, via
next_server_feature_value(). But Git has never produced multiple
object-format capabilities, and it stopped producing multiple symref
values in d007dbf7d6 (Revert "upload-pack: send non-HEAD symbolic refs",
2013-11-18).
However, upload-pack did produce multiple symref entries for a while,
and they are valid. Plus other implementations, such as Dulwich will
still do so. So we should handle them. And even if we do not expect it,
it is obviously a bug for the parser to enter an infinite loop.
The bug itself is pretty simple. Commit 2c6a403d96 (connect: add
function to parse multiple v1 capability values, 2020-05-25) added the
"offset" parameter, which is used as both an in- and out-parameter. When
parsing the first "symref" capability, *offset will be 0 on input, and
after parsing the capability, we set *offset to an index just past the
value by taking a pointer difference "(value + end) - feature_list".
But on the second call, now *offset is set to that larger index, which
lets us skip past the first "symref" capability. However, we do so by
incrementing feature_list. That means our pointer difference is now too
small; it is counting from where we resumed parsing, not from the start
of the original feature_list pointer. And because we incremented
feature_list only inside our function, and not the caller, that
increment is lost next time the function is called.
One solution would be to account for those skipped bytes by incrementing
*offset, rather than assigning to it. But wait, there's more!
We also increment feature_list if we have a near-miss. Say we are
looking for "symref" and find "almost-symref". In that case we'll point
feature_list to the "y" in "almost-symref" and restart our search. But
that again means our offset won't be correct, as it won't account for
the bytes between the start of the string and that "y".
So instead, let's just record the beginning of the feature_list string
in a separate pointer that we never touch. That offset we take in and
return is meant to be using that point as a base, and now we'll do so
consistently.
Since the bug can't be reproduced using the current version of
git-upload-pack, we'll instead hard-code an input which triggers the
problem. Before this patch it loops forever re-parsing the second symref
entry. Now we check both that it finishes, and that it parses both
entries correctly (a case we could not test at all before).
We don't need to worry about testing v2 here; it communicates the
capabilities in a completely different way, and doesn't use this code at
all. There are tests earlier in t5512 that are meant to cover this (they
don't, but we'll address that in a future patch).
Reported-by: Jonas Haag <jonas@lophus.org>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-04-15 05:24:16 +08:00
|
|
|
*offset = found + len - orig_start;
|
parse_feature_request: make it easier to see feature values
We already take care to parse key/value capabilities like
"foo=bar", but the code does not provide a good way of
actually finding out what is on the right-hand side of the
"=".
A server using "parse_feature_request" could accomplish this
with some extra parsing. You must skip past the "key"
portion manually, check for "=" versus NUL or space, and
then find the length by searching for the next space (or
NUL). But clients can't even do that, since the
"server_supports" interface does not even return the
pointer.
Instead, let's have our parser share more information by
providing a pointer to the value and its length. The
"parse_feature_value" function returns a pointer to the
feature's value portion, along with the length of the value.
If the feature is missing, NULL is returned. If it does not
have an "=", then a zero-length value is returned.
Similarly, "server_feature_value" behaves in the same way,
but always checks the static server_feature_list variable.
We can then implement "server_supports" in terms of
"server_feature_value". We cannot implement the original
"parse_feature_request" in terms of our new function,
because it returned a pointer to the beginning of the
feature. However, no callers actually cared about the value
of the returned pointer, so we can simplify it to a boolean
just as we do for "server_supports".
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-08-14 09:59:27 +08:00
|
|
|
return value;
|
|
|
|
}
|
|
|
|
/* feature with a value (e.g., "agent=git/1.2.3") */
|
|
|
|
else if (*value == '=') {
|
2020-05-26 03:58:52 +08:00
|
|
|
int end;
|
|
|
|
|
parse_feature_request: make it easier to see feature values
We already take care to parse key/value capabilities like
"foo=bar", but the code does not provide a good way of
actually finding out what is on the right-hand side of the
"=".
A server using "parse_feature_request" could accomplish this
with some extra parsing. You must skip past the "key"
portion manually, check for "=" versus NUL or space, and
then find the length by searching for the next space (or
NUL). But clients can't even do that, since the
"server_supports" interface does not even return the
pointer.
Instead, let's have our parser share more information by
providing a pointer to the value and its length. The
"parse_feature_value" function returns a pointer to the
feature's value portion, along with the length of the value.
If the feature is missing, NULL is returned. If it does not
have an "=", then a zero-length value is returned.
Similarly, "server_feature_value" behaves in the same way,
but always checks the static server_feature_list variable.
We can then implement "server_supports" in terms of
"server_feature_value". We cannot implement the original
"parse_feature_request" in terms of our new function,
because it returned a pointer to the beginning of the
feature. However, no callers actually cared about the value
of the returned pointer, so we can simplify it to a boolean
just as we do for "server_supports".
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-08-14 09:59:27 +08:00
|
|
|
value++;
|
2020-05-26 03:58:52 +08:00
|
|
|
end = strcspn(value, " \t\n");
|
parse_feature_request: make it easier to see feature values
We already take care to parse key/value capabilities like
"foo=bar", but the code does not provide a good way of
actually finding out what is on the right-hand side of the
"=".
A server using "parse_feature_request" could accomplish this
with some extra parsing. You must skip past the "key"
portion manually, check for "=" versus NUL or space, and
then find the length by searching for the next space (or
NUL). But clients can't even do that, since the
"server_supports" interface does not even return the
pointer.
Instead, let's have our parser share more information by
providing a pointer to the value and its length. The
"parse_feature_value" function returns a pointer to the
feature's value portion, along with the length of the value.
If the feature is missing, NULL is returned. If it does not
have an "=", then a zero-length value is returned.
Similarly, "server_feature_value" behaves in the same way,
but always checks the static server_feature_list variable.
We can then implement "server_supports" in terms of
"server_feature_value". We cannot implement the original
"parse_feature_request" in terms of our new function,
because it returned a pointer to the beginning of the
feature. However, no callers actually cared about the value
of the returned pointer, so we can simplify it to a boolean
just as we do for "server_supports".
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-08-14 09:59:27 +08:00
|
|
|
if (lenp)
|
2020-05-26 03:58:52 +08:00
|
|
|
*lenp = end;
|
|
|
|
if (offset)
|
v0 protocol: fix infinite loop when parsing multi-valued capabilities
If Git's client-side parsing of an upload-pack response (so git-fetch or
ls-remote) sees multiple instances of a single capability, it can enter
an infinite loop due to a bug in advancing the "offset" parameter in the
parser.
This bug can't happen between a client and server of the same Git
version. The client bug is in parse_feature_value() when the caller
passes in an offset parameter. And that only happens when the v0
protocol is parsing "symref" and "object-format" capabilities, via
next_server_feature_value(). But Git has never produced multiple
object-format capabilities, and it stopped producing multiple symref
values in d007dbf7d6 (Revert "upload-pack: send non-HEAD symbolic refs",
2013-11-18).
However, upload-pack did produce multiple symref entries for a while,
and they are valid. Plus other implementations, such as Dulwich will
still do so. So we should handle them. And even if we do not expect it,
it is obviously a bug for the parser to enter an infinite loop.
The bug itself is pretty simple. Commit 2c6a403d96 (connect: add
function to parse multiple v1 capability values, 2020-05-25) added the
"offset" parameter, which is used as both an in- and out-parameter. When
parsing the first "symref" capability, *offset will be 0 on input, and
after parsing the capability, we set *offset to an index just past the
value by taking a pointer difference "(value + end) - feature_list".
But on the second call, now *offset is set to that larger index, which
lets us skip past the first "symref" capability. However, we do so by
incrementing feature_list. That means our pointer difference is now too
small; it is counting from where we resumed parsing, not from the start
of the original feature_list pointer. And because we incremented
feature_list only inside our function, and not the caller, that
increment is lost next time the function is called.
One solution would be to account for those skipped bytes by incrementing
*offset, rather than assigning to it. But wait, there's more!
We also increment feature_list if we have a near-miss. Say we are
looking for "symref" and find "almost-symref". In that case we'll point
feature_list to the "y" in "almost-symref" and restart our search. But
that again means our offset won't be correct, as it won't account for
the bytes between the start of the string and that "y".
So instead, let's just record the beginning of the feature_list string
in a separate pointer that we never touch. That offset we take in and
return is meant to be using that point as a base, and now we'll do so
consistently.
Since the bug can't be reproduced using the current version of
git-upload-pack, we'll instead hard-code an input which triggers the
problem. Before this patch it loops forever re-parsing the second symref
entry. Now we check both that it finishes, and that it parses both
entries correctly (a case we could not test at all before).
We don't need to worry about testing v2 here; it communicates the
capabilities in a completely different way, and doesn't use this code at
all. There are tests earlier in t5512 that are meant to cover this (they
don't, but we'll address that in a future patch).
Reported-by: Jonas Haag <jonas@lophus.org>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-04-15 05:24:16 +08:00
|
|
|
*offset = value + end - orig_start;
|
parse_feature_request: make it easier to see feature values
We already take care to parse key/value capabilities like
"foo=bar", but the code does not provide a good way of
actually finding out what is on the right-hand side of the
"=".
A server using "parse_feature_request" could accomplish this
with some extra parsing. You must skip past the "key"
portion manually, check for "=" versus NUL or space, and
then find the length by searching for the next space (or
NUL). But clients can't even do that, since the
"server_supports" interface does not even return the
pointer.
Instead, let's have our parser share more information by
providing a pointer to the value and its length. The
"parse_feature_value" function returns a pointer to the
feature's value portion, along with the length of the value.
If the feature is missing, NULL is returned. If it does not
have an "=", then a zero-length value is returned.
Similarly, "server_feature_value" behaves in the same way,
but always checks the static server_feature_list variable.
We can then implement "server_supports" in terms of
"server_feature_value". We cannot implement the original
"parse_feature_request" in terms of our new function,
because it returned a pointer to the beginning of the
feature. However, no callers actually cared about the value
of the returned pointer, so we can simplify it to a boolean
just as we do for "server_supports".
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-08-14 09:59:27 +08:00
|
|
|
return value;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* otherwise we matched a substring of another feature;
|
|
|
|
* keep looking
|
|
|
|
*/
|
|
|
|
}
|
2012-01-09 05:06:19 +08:00
|
|
|
feature_list = found + 1;
|
|
|
|
}
|
|
|
|
return NULL;
|
2005-10-28 10:48:54 +08:00
|
|
|
}
|
|
|
|
|
2020-05-26 03:58:56 +08:00
|
|
|
int server_supports_hash(const char *desired, int *feature_supported)
|
|
|
|
{
|
|
|
|
int offset = 0;
|
|
|
|
int len;
|
|
|
|
const char *hash;
|
|
|
|
|
|
|
|
hash = next_server_feature_value("object-format", &len, &offset);
|
|
|
|
if (feature_supported)
|
|
|
|
*feature_supported = !!hash;
|
|
|
|
if (!hash) {
|
|
|
|
hash = hash_algos[GIT_HASH_SHA1].name;
|
|
|
|
len = strlen(hash);
|
|
|
|
}
|
|
|
|
while (hash) {
|
|
|
|
if (!xstrncmpz(desired, hash, len))
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
hash = next_server_feature_value("object-format", &len, &offset);
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
parse_feature_request: make it easier to see feature values
We already take care to parse key/value capabilities like
"foo=bar", but the code does not provide a good way of
actually finding out what is on the right-hand side of the
"=".
A server using "parse_feature_request" could accomplish this
with some extra parsing. You must skip past the "key"
portion manually, check for "=" versus NUL or space, and
then find the length by searching for the next space (or
NUL). But clients can't even do that, since the
"server_supports" interface does not even return the
pointer.
Instead, let's have our parser share more information by
providing a pointer to the value and its length. The
"parse_feature_value" function returns a pointer to the
feature's value portion, along with the length of the value.
If the feature is missing, NULL is returned. If it does not
have an "=", then a zero-length value is returned.
Similarly, "server_feature_value" behaves in the same way,
but always checks the static server_feature_list variable.
We can then implement "server_supports" in terms of
"server_feature_value". We cannot implement the original
"parse_feature_request" in terms of our new function,
because it returned a pointer to the beginning of the
feature. However, no callers actually cared about the value
of the returned pointer, so we can simplify it to a boolean
just as we do for "server_supports".
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-08-14 09:59:27 +08:00
|
|
|
int parse_feature_request(const char *feature_list, const char *feature)
|
|
|
|
{
|
2020-05-26 03:58:52 +08:00
|
|
|
return !!parse_feature_value(feature_list, feature, NULL, NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
static const char *next_server_feature_value(const char *feature, int *len, int *offset)
|
|
|
|
{
|
|
|
|
return parse_feature_value(server_capabilities_v1, feature, len, offset);
|
parse_feature_request: make it easier to see feature values
We already take care to parse key/value capabilities like
"foo=bar", but the code does not provide a good way of
actually finding out what is on the right-hand side of the
"=".
A server using "parse_feature_request" could accomplish this
with some extra parsing. You must skip past the "key"
portion manually, check for "=" versus NUL or space, and
then find the length by searching for the next space (or
NUL). But clients can't even do that, since the
"server_supports" interface does not even return the
pointer.
Instead, let's have our parser share more information by
providing a pointer to the value and its length. The
"parse_feature_value" function returns a pointer to the
feature's value portion, along with the length of the value.
If the feature is missing, NULL is returned. If it does not
have an "=", then a zero-length value is returned.
Similarly, "server_feature_value" behaves in the same way,
but always checks the static server_feature_list variable.
We can then implement "server_supports" in terms of
"server_feature_value". We cannot implement the original
"parse_feature_request" in terms of our new function,
because it returned a pointer to the beginning of the
feature. However, no callers actually cared about the value
of the returned pointer, so we can simplify it to a boolean
just as we do for "server_supports".
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-08-14 09:59:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
const char *server_feature_value(const char *feature, int *len)
|
|
|
|
{
|
2020-05-26 03:58:52 +08:00
|
|
|
return parse_feature_value(server_capabilities_v1, feature, len, NULL);
|
parse_feature_request: make it easier to see feature values
We already take care to parse key/value capabilities like
"foo=bar", but the code does not provide a good way of
actually finding out what is on the right-hand side of the
"=".
A server using "parse_feature_request" could accomplish this
with some extra parsing. You must skip past the "key"
portion manually, check for "=" versus NUL or space, and
then find the length by searching for the next space (or
NUL). But clients can't even do that, since the
"server_supports" interface does not even return the
pointer.
Instead, let's have our parser share more information by
providing a pointer to the value and its length. The
"parse_feature_value" function returns a pointer to the
feature's value portion, along with the length of the value.
If the feature is missing, NULL is returned. If it does not
have an "=", then a zero-length value is returned.
Similarly, "server_feature_value" behaves in the same way,
but always checks the static server_feature_list variable.
We can then implement "server_supports" in terms of
"server_feature_value". We cannot implement the original
"parse_feature_request" in terms of our new function,
because it returned a pointer to the beginning of the
feature. However, no callers actually cared about the value
of the returned pointer, so we can simplify it to a boolean
just as we do for "server_supports".
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2012-08-14 09:59:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
int server_supports(const char *feature)
|
|
|
|
{
|
|
|
|
return !!server_feature_value(feature, NULL);
|
|
|
|
}
|
|
|
|
|
2005-07-14 09:46:20 +08:00
|
|
|
enum protocol {
|
|
|
|
PROTO_LOCAL = 1,
|
2013-11-29 03:50:03 +08:00
|
|
|
PROTO_FILE,
|
2005-07-14 09:46:20 +08:00
|
|
|
PROTO_SSH,
|
2010-05-14 17:31:35 +08:00
|
|
|
PROTO_GIT
|
2005-07-14 09:46:20 +08:00
|
|
|
};
|
|
|
|
|
2013-11-29 03:50:03 +08:00
|
|
|
int url_is_local_not_ssh(const char *url)
|
|
|
|
{
|
|
|
|
const char *colon = strchr(url, ':');
|
|
|
|
const char *slash = strchr(url, '/');
|
|
|
|
return !colon || (slash && slash < colon) ||
|
mingw: handle `subst`-ed "DOS drives"
Over a decade ago, in 25fe217b86c (Windows: Treat Windows style path
names., 2008-03-05), Git was taught to handle absolute Windows paths,
i.e. paths that start with a drive letter and a colon.
Unbeknownst to us, while drive letters of physical drives are limited to
letters of the English alphabet, there is a way to assign virtual drive
letters to arbitrary directories, via the `subst` command, which is
_not_ limited to English letters.
It is therefore possible to have absolute Windows paths of the form
`1:\what\the\hex.txt`. Even "better": pretty much arbitrary Unicode
letters can also be used, e.g. `ä:\tschibät.sch`.
While it can be sensibly argued that users who set up such funny drive
letters really seek adverse consequences, the Windows Operating System
is known to be a platform where many users are at the mercy of
administrators who have their very own idea of what constitutes a
reasonable setup.
Therefore, let's just make sure that such funny paths are still
considered absolute paths by Git, on Windows.
In addition to Unicode characters, pretty much any character is a valid
drive letter, as far as `subst` is concerned, even `:` and `"` or even a
space character. While it is probably the opposite of smart to use them,
let's safeguard `is_dos_drive_prefix()` against all of them.
Note: `[::1]:repo` is a valid URL, but not a valid path on Windows.
As `[` is now considered a valid drive letter, we need to be very
careful to avoid misinterpreting such a string as valid local path in
`url_is_local_not_ssh()`. To do that, we use the just-introduced
function `is_valid_path()` (which will label the string as invalid file
name because of the colon characters).
This fixes CVE-2019-1351.
Reported-by: Nicolas Joly <Nicolas.Joly@microsoft.com>
Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
2019-09-06 06:09:10 +08:00
|
|
|
(has_dos_drive_prefix(url) && is_valid_path(url));
|
2013-11-29 03:50:03 +08:00
|
|
|
}
|
|
|
|
|
2013-11-29 03:49:17 +08:00
|
|
|
static const char *prot_name(enum protocol protocol)
|
|
|
|
{
|
|
|
|
switch (protocol) {
|
|
|
|
case PROTO_LOCAL:
|
2013-11-29 03:50:03 +08:00
|
|
|
case PROTO_FILE:
|
2013-11-29 03:49:17 +08:00
|
|
|
return "file";
|
|
|
|
case PROTO_SSH:
|
|
|
|
return "ssh";
|
|
|
|
case PROTO_GIT:
|
|
|
|
return "git";
|
|
|
|
default:
|
2015-09-24 20:44:49 +08:00
|
|
|
return "unknown protocol";
|
2013-11-29 03:49:17 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-07-14 09:46:20 +08:00
|
|
|
static enum protocol get_protocol(const char *name)
|
|
|
|
{
|
|
|
|
if (!strcmp(name, "ssh"))
|
|
|
|
return PROTO_SSH;
|
|
|
|
if (!strcmp(name, "git"))
|
|
|
|
return PROTO_GIT;
|
2016-02-15 22:29:06 +08:00
|
|
|
if (!strcmp(name, "git+ssh")) /* deprecated - do not use */
|
2005-10-15 08:14:56 +08:00
|
|
|
return PROTO_SSH;
|
2016-02-15 22:29:06 +08:00
|
|
|
if (!strcmp(name, "ssh+git")) /* deprecated - do not use */
|
2005-10-15 08:14:56 +08:00
|
|
|
return PROTO_SSH;
|
2007-08-02 01:03:37 +08:00
|
|
|
if (!strcmp(name, "file"))
|
2013-11-29 03:50:03 +08:00
|
|
|
return PROTO_FILE;
|
2018-07-21 15:49:28 +08:00
|
|
|
die(_("protocol '%s' is not supported"), name);
|
2005-07-14 09:46:20 +08:00
|
|
|
}
|
|
|
|
|
2015-02-21 23:52:48 +08:00
|
|
|
static char *host_end(char **hoststart, int removebrackets)
|
|
|
|
{
|
|
|
|
char *host = *hoststart;
|
|
|
|
char *end;
|
|
|
|
char *start = strstr(host, "@[");
|
|
|
|
if (start)
|
|
|
|
start++; /* Jump over '@' */
|
|
|
|
else
|
|
|
|
start = host;
|
|
|
|
if (start[0] == '[') {
|
|
|
|
end = strchr(start + 1, ']');
|
|
|
|
if (end) {
|
|
|
|
if (removebrackets) {
|
|
|
|
*end = 0;
|
|
|
|
memmove(start, start + 1, end - start);
|
|
|
|
end++;
|
|
|
|
}
|
|
|
|
} else
|
|
|
|
end = host;
|
|
|
|
} else
|
|
|
|
end = host;
|
|
|
|
return end;
|
|
|
|
}
|
|
|
|
|
2005-07-21 21:10:36 +08:00
|
|
|
#define STR_(s) # s
|
|
|
|
#define STR(s) STR_(s)
|
2005-07-14 09:46:20 +08:00
|
|
|
|
2010-02-18 04:56:02 +08:00
|
|
|
static void get_host_and_port(char **host, const char **port)
|
|
|
|
{
|
|
|
|
char *colon, *end;
|
2015-02-21 23:52:48 +08:00
|
|
|
end = host_end(host, 1);
|
2010-02-18 04:56:02 +08:00
|
|
|
colon = strchr(end, ':');
|
|
|
|
if (colon) {
|
2015-02-21 23:52:48 +08:00
|
|
|
long portnr = strtol(colon + 1, &end, 10);
|
|
|
|
if (end != colon + 1 && *end == '\0' && 0 <= portnr && portnr < 65536) {
|
|
|
|
*colon = 0;
|
|
|
|
*port = colon + 1;
|
2015-04-08 04:03:25 +08:00
|
|
|
} else if (!colon[1]) {
|
|
|
|
*colon = 0;
|
2015-02-21 23:52:48 +08:00
|
|
|
}
|
2010-02-18 04:56:02 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-12-06 12:39:36 +08:00
|
|
|
static void enable_keepalive(int sockfd)
|
|
|
|
{
|
|
|
|
int ka = 1;
|
|
|
|
|
|
|
|
if (setsockopt(sockfd, SOL_SOCKET, SO_KEEPALIVE, &ka, sizeof(ka)) < 0)
|
2018-07-21 15:49:28 +08:00
|
|
|
error_errno(_("unable to set SO_KEEPALIVE on socket"));
|
2011-12-06 12:39:36 +08:00
|
|
|
}
|
|
|
|
|
2005-09-29 07:52:21 +08:00
|
|
|
#ifndef NO_IPV6
|
2005-09-29 07:37:58 +08:00
|
|
|
|
2007-05-24 05:34:27 +08:00
|
|
|
static const char *ai_name(const struct addrinfo *ai)
|
|
|
|
{
|
2009-04-24 20:16:41 +08:00
|
|
|
static char addr[NI_MAXHOST];
|
|
|
|
if (getnameinfo(ai->ai_addr, ai->ai_addrlen, addr, sizeof(addr), NULL, 0,
|
|
|
|
NI_NUMERICHOST) != 0)
|
2015-09-25 05:06:08 +08:00
|
|
|
xsnprintf(addr, sizeof(addr), "(unknown)");
|
2009-04-24 20:16:41 +08:00
|
|
|
|
2007-05-24 05:34:27 +08:00
|
|
|
return addr;
|
|
|
|
}
|
|
|
|
|
2006-06-07 11:58:41 +08:00
|
|
|
/*
|
|
|
|
* Returns a connected socket() fd, or else die()s.
|
|
|
|
*/
|
2007-05-17 01:09:41 +08:00
|
|
|
static int git_tcp_connect_sock(char *host, int flags)
|
2005-07-14 09:46:20 +08:00
|
|
|
{
|
2011-07-13 00:28:34 +08:00
|
|
|
struct strbuf error_message = STRBUF_INIT;
|
|
|
|
int sockfd = -1;
|
2006-06-28 17:04:39 +08:00
|
|
|
const char *port = STR(DEFAULT_GIT_PORT);
|
2005-07-21 21:10:36 +08:00
|
|
|
struct addrinfo hints, *ai0, *ai;
|
|
|
|
int gai;
|
2007-05-24 05:34:27 +08:00
|
|
|
int cnt = 0;
|
2005-07-21 21:10:36 +08:00
|
|
|
|
2010-02-18 04:56:02 +08:00
|
|
|
get_host_and_port(&host, &port);
|
|
|
|
if (!*port)
|
|
|
|
port = "<none>";
|
2005-07-21 21:10:36 +08:00
|
|
|
|
|
|
|
memset(&hints, 0, sizeof(hints));
|
2016-02-03 12:09:14 +08:00
|
|
|
if (flags & CONNECT_IPV4)
|
|
|
|
hints.ai_family = AF_INET;
|
|
|
|
else if (flags & CONNECT_IPV6)
|
|
|
|
hints.ai_family = AF_INET6;
|
2005-07-21 21:10:36 +08:00
|
|
|
hints.ai_socktype = SOCK_STREAM;
|
|
|
|
hints.ai_protocol = IPPROTO_TCP;
|
|
|
|
|
2007-05-17 01:09:41 +08:00
|
|
|
if (flags & CONNECT_VERBOSE)
|
2018-07-21 15:49:28 +08:00
|
|
|
fprintf(stderr, _("Looking up %s ... "), host);
|
2007-05-17 01:09:41 +08:00
|
|
|
|
2005-07-21 21:10:36 +08:00
|
|
|
gai = getaddrinfo(host, port, &hints, &ai);
|
|
|
|
if (gai)
|
2018-07-21 15:49:28 +08:00
|
|
|
die(_("unable to look up %s (port %s) (%s)"), host, port, gai_strerror(gai));
|
2005-07-21 21:10:36 +08:00
|
|
|
|
2007-05-17 01:09:41 +08:00
|
|
|
if (flags & CONNECT_VERBOSE)
|
2018-07-21 15:49:28 +08:00
|
|
|
/* TRANSLATORS: this is the end of "Looking up %s ... " */
|
|
|
|
fprintf(stderr, _("done.\nConnecting to %s (port %s) ... "), host, port);
|
2007-05-17 01:09:41 +08:00
|
|
|
|
2011-08-01 19:16:09 +08:00
|
|
|
for (ai0 = ai; ai; ai = ai->ai_next, cnt++) {
|
2006-06-07 11:58:41 +08:00
|
|
|
sockfd = socket(ai->ai_family,
|
|
|
|
ai->ai_socktype, ai->ai_protocol);
|
2011-07-13 00:28:34 +08:00
|
|
|
if ((sockfd < 0) ||
|
|
|
|
(connect(sockfd, ai->ai_addr, ai->ai_addrlen) < 0)) {
|
|
|
|
strbuf_addf(&error_message, "%s[%d: %s]: errno=%s\n",
|
|
|
|
host, cnt, ai_name(ai), strerror(errno));
|
|
|
|
if (0 <= sockfd)
|
|
|
|
close(sockfd);
|
2005-07-21 21:10:36 +08:00
|
|
|
sockfd = -1;
|
|
|
|
continue;
|
2005-07-14 09:46:20 +08:00
|
|
|
}
|
2007-05-24 05:34:27 +08:00
|
|
|
if (flags & CONNECT_VERBOSE)
|
|
|
|
fprintf(stderr, "%s ", ai_name(ai));
|
2005-07-21 21:10:36 +08:00
|
|
|
break;
|
2005-07-14 09:46:20 +08:00
|
|
|
}
|
|
|
|
|
2005-07-21 21:10:36 +08:00
|
|
|
freeaddrinfo(ai0);
|
2005-07-14 09:46:20 +08:00
|
|
|
|
|
|
|
if (sockfd < 0)
|
2018-07-21 15:49:28 +08:00
|
|
|
die(_("unable to connect to %s:\n%s"), host, error_message.buf);
|
2005-07-21 21:10:36 +08:00
|
|
|
|
2011-12-06 12:39:36 +08:00
|
|
|
enable_keepalive(sockfd);
|
|
|
|
|
2007-05-17 01:09:41 +08:00
|
|
|
if (flags & CONNECT_VERBOSE)
|
2018-07-21 15:49:28 +08:00
|
|
|
/* TRANSLATORS: this is the end of "Connecting to %s (port %s) ... " */
|
|
|
|
fprintf_ln(stderr, _("done."));
|
2007-05-17 01:09:41 +08:00
|
|
|
|
2011-07-13 00:28:34 +08:00
|
|
|
strbuf_release(&error_message);
|
|
|
|
|
2006-06-07 11:58:41 +08:00
|
|
|
return sockfd;
|
2005-07-14 09:46:20 +08:00
|
|
|
}
|
|
|
|
|
2005-09-29 07:52:21 +08:00
|
|
|
#else /* NO_IPV6 */
|
2005-09-29 07:37:58 +08:00
|
|
|
|
2006-06-07 11:58:41 +08:00
|
|
|
/*
|
|
|
|
* Returns a connected socket() fd, or else die()s.
|
|
|
|
*/
|
2007-05-17 01:09:41 +08:00
|
|
|
static int git_tcp_connect_sock(char *host, int flags)
|
2005-09-29 07:37:58 +08:00
|
|
|
{
|
2011-08-01 19:16:10 +08:00
|
|
|
struct strbuf error_message = STRBUF_INIT;
|
|
|
|
int sockfd = -1;
|
2010-02-18 04:56:02 +08:00
|
|
|
const char *port = STR(DEFAULT_GIT_PORT);
|
|
|
|
char *ep;
|
2005-09-29 07:37:58 +08:00
|
|
|
struct hostent *he;
|
|
|
|
struct sockaddr_in sa;
|
|
|
|
char **ap;
|
|
|
|
unsigned int nport;
|
2007-05-24 05:34:27 +08:00
|
|
|
int cnt;
|
2005-09-29 07:37:58 +08:00
|
|
|
|
2010-02-18 04:56:02 +08:00
|
|
|
get_host_and_port(&host, &port);
|
2005-09-29 07:37:58 +08:00
|
|
|
|
2007-05-17 01:09:41 +08:00
|
|
|
if (flags & CONNECT_VERBOSE)
|
2018-07-21 15:49:28 +08:00
|
|
|
fprintf(stderr, _("Looking up %s ... "), host);
|
2007-05-17 01:09:41 +08:00
|
|
|
|
2005-09-29 07:37:58 +08:00
|
|
|
he = gethostbyname(host);
|
|
|
|
if (!he)
|
2018-07-21 15:49:28 +08:00
|
|
|
die(_("unable to look up %s (%s)"), host, hstrerror(h_errno));
|
2005-09-29 07:37:58 +08:00
|
|
|
nport = strtoul(port, &ep, 10);
|
|
|
|
if ( ep == port || *ep ) {
|
|
|
|
/* Not numeric */
|
|
|
|
struct servent *se = getservbyname(port,"tcp");
|
|
|
|
if ( !se )
|
2018-07-21 15:49:28 +08:00
|
|
|
die(_("unknown port %s"), port);
|
2005-09-29 07:37:58 +08:00
|
|
|
nport = se->s_port;
|
|
|
|
}
|
|
|
|
|
2007-05-17 01:09:41 +08:00
|
|
|
if (flags & CONNECT_VERBOSE)
|
2018-07-21 15:49:28 +08:00
|
|
|
/* TRANSLATORS: this is the end of "Looking up %s ... " */
|
|
|
|
fprintf(stderr, _("done.\nConnecting to %s (port %s) ... "), host, port);
|
2007-05-17 01:09:41 +08:00
|
|
|
|
2007-05-24 05:34:27 +08:00
|
|
|
for (cnt = 0, ap = he->h_addr_list; *ap; ap++, cnt++) {
|
2005-09-29 07:37:58 +08:00
|
|
|
memset(&sa, 0, sizeof sa);
|
|
|
|
sa.sin_family = he->h_addrtype;
|
2005-09-29 08:26:44 +08:00
|
|
|
sa.sin_port = htons(nport);
|
2005-11-22 21:54:23 +08:00
|
|
|
memcpy(&sa.sin_addr, *ap, he->h_length);
|
2005-09-29 07:37:58 +08:00
|
|
|
|
2011-08-01 19:16:10 +08:00
|
|
|
sockfd = socket(he->h_addrtype, SOCK_STREAM, 0);
|
|
|
|
if ((sockfd < 0) ||
|
|
|
|
connect(sockfd, (struct sockaddr *)&sa, sizeof sa) < 0) {
|
|
|
|
strbuf_addf(&error_message, "%s[%d: %s]: errno=%s\n",
|
2007-05-24 05:34:27 +08:00
|
|
|
host,
|
|
|
|
cnt,
|
|
|
|
inet_ntoa(*(struct in_addr *)&sa.sin_addr),
|
2011-08-01 19:16:10 +08:00
|
|
|
strerror(errno));
|
|
|
|
if (0 <= sockfd)
|
|
|
|
close(sockfd);
|
2005-09-29 07:37:58 +08:00
|
|
|
sockfd = -1;
|
|
|
|
continue;
|
|
|
|
}
|
2007-05-24 05:34:27 +08:00
|
|
|
if (flags & CONNECT_VERBOSE)
|
|
|
|
fprintf(stderr, "%s ",
|
|
|
|
inet_ntoa(*(struct in_addr *)&sa.sin_addr));
|
2005-09-29 07:37:58 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (sockfd < 0)
|
2018-07-21 15:49:28 +08:00
|
|
|
die(_("unable to connect to %s:\n%s"), host, error_message.buf);
|
2005-09-29 07:37:58 +08:00
|
|
|
|
2011-12-06 12:39:36 +08:00
|
|
|
enable_keepalive(sockfd);
|
|
|
|
|
2007-05-17 01:09:41 +08:00
|
|
|
if (flags & CONNECT_VERBOSE)
|
2018-07-21 15:49:28 +08:00
|
|
|
/* TRANSLATORS: this is the end of "Connecting to %s (port %s) ... " */
|
|
|
|
fprintf_ln(stderr, _("done."));
|
2007-05-17 01:09:41 +08:00
|
|
|
|
2006-06-07 11:58:41 +08:00
|
|
|
return sockfd;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* NO_IPV6 */
|
|
|
|
|
|
|
|
|
2017-11-21 05:22:54 +08:00
|
|
|
/*
|
|
|
|
* Dummy child_process returned by git_connect() if the transport protocol
|
|
|
|
* does not need fork(2).
|
|
|
|
*/
|
|
|
|
static struct child_process no_fork = CHILD_PROCESS_INIT;
|
|
|
|
|
|
|
|
int git_connection_is_socket(struct child_process *conn)
|
|
|
|
{
|
|
|
|
return conn == &no_fork;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct child_process *git_tcp_connect(int fd[2], char *host, int flags)
|
2006-06-07 11:58:41 +08:00
|
|
|
{
|
2007-05-17 01:09:41 +08:00
|
|
|
int sockfd = git_tcp_connect_sock(host, flags);
|
2006-06-07 11:58:41 +08:00
|
|
|
|
2005-09-29 07:37:58 +08:00
|
|
|
fd[0] = sockfd;
|
2007-01-22 09:10:51 +08:00
|
|
|
fd[1] = dup(sockfd);
|
2017-11-21 05:22:54 +08:00
|
|
|
|
|
|
|
return &no_fork;
|
2005-09-29 07:37:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2006-08-16 01:23:48 +08:00
|
|
|
static char *git_proxy_command;
|
2005-11-04 22:57:16 +08:00
|
|
|
|
2008-05-15 01:46:53 +08:00
|
|
|
static int git_proxy_command_options(const char *var, const char *value,
|
|
|
|
void *cb)
|
2005-11-04 22:57:16 +08:00
|
|
|
{
|
2005-11-19 19:48:56 +08:00
|
|
|
if (!strcmp(var, "core.gitproxy")) {
|
2005-11-22 11:18:23 +08:00
|
|
|
const char *for_pos;
|
|
|
|
int matchlen = -1;
|
|
|
|
int hostlen;
|
2009-03-11 10:38:12 +08:00
|
|
|
const char *rhost_name = cb;
|
|
|
|
int rhost_len = strlen(rhost_name);
|
2005-11-22 11:18:23 +08:00
|
|
|
|
2005-11-19 19:48:56 +08:00
|
|
|
if (git_proxy_command)
|
2005-11-04 22:57:16 +08:00
|
|
|
return 0;
|
2008-02-12 02:52:15 +08:00
|
|
|
if (!value)
|
|
|
|
return config_error_nonbool(var);
|
2005-11-19 19:48:56 +08:00
|
|
|
/* [core]
|
|
|
|
* ;# matches www.kernel.org as well
|
|
|
|
* gitproxy = netcatter-1 for kernel.org
|
|
|
|
* gitproxy = netcatter-2 for sample.xz
|
|
|
|
* gitproxy = netcatter-default
|
|
|
|
*/
|
2005-11-22 11:18:23 +08:00
|
|
|
for_pos = strstr(value, " for ");
|
2005-11-19 19:48:56 +08:00
|
|
|
if (!for_pos)
|
|
|
|
/* matches everybody */
|
|
|
|
matchlen = strlen(value);
|
|
|
|
else {
|
|
|
|
hostlen = strlen(for_pos + 5);
|
|
|
|
if (rhost_len < hostlen)
|
|
|
|
matchlen = -1;
|
|
|
|
else if (!strncmp(for_pos + 5,
|
|
|
|
rhost_name + rhost_len - hostlen,
|
|
|
|
hostlen) &&
|
|
|
|
((rhost_len == hostlen) ||
|
|
|
|
rhost_name[rhost_len - hostlen -1] == '.'))
|
|
|
|
matchlen = for_pos - value;
|
|
|
|
else
|
|
|
|
matchlen = -1;
|
|
|
|
}
|
|
|
|
if (0 <= matchlen) {
|
|
|
|
/* core.gitproxy = none for kernel.org */
|
2007-06-07 15:04:01 +08:00
|
|
|
if (matchlen == 4 &&
|
2005-11-19 19:48:56 +08:00
|
|
|
!memcmp(value, "none", 4))
|
|
|
|
matchlen = 0;
|
2007-09-16 06:32:36 +08:00
|
|
|
git_proxy_command = xmemdupz(value, matchlen);
|
2005-11-04 22:57:16 +08:00
|
|
|
}
|
2005-11-19 19:48:56 +08:00
|
|
|
return 0;
|
2005-11-04 22:57:16 +08:00
|
|
|
}
|
|
|
|
|
2008-05-15 01:46:53 +08:00
|
|
|
return git_default_config(var, value, cb);
|
2005-11-04 22:57:16 +08:00
|
|
|
}
|
|
|
|
|
2005-11-19 19:48:56 +08:00
|
|
|
static int git_use_proxy(const char *host)
|
2005-11-04 22:57:16 +08:00
|
|
|
{
|
|
|
|
git_proxy_command = getenv("GIT_PROXY_COMMAND");
|
2009-03-11 10:38:12 +08:00
|
|
|
git_config(git_proxy_command_options, (void*)host);
|
2005-11-19 19:48:56 +08:00
|
|
|
return (git_proxy_command && *git_proxy_command);
|
2005-11-04 22:57:16 +08:00
|
|
|
}
|
|
|
|
|
connect: treat generic proxy processes like ssh processes
The git_connect function returns two ends of a pipe for
talking with a remote, plus a struct child_process
representing the other end of the pipe. If we have a direct
socket connection, then this points to a special "no_fork"
child process.
The code path for doing git-over-pipes or git-over-ssh sets
up this child process to point to the child git command or
the ssh process. When we call finish_connect eventually, we
check wait() on the command and report its return value.
The code path for git://, on the other hand, always sets it
to no_fork. In the case of a direct TCP connection, this
makes sense; we have no child process. But in the case of a
proxy command (configured by core.gitproxy), we do have a
child process, but we throw away its pid, and therefore
ignore its return code.
Instead, let's keep that information in the proxy case, and
respect its return code, which can help catch some errors
(though depending on your proxy command, it will be errors
reported by the proxy command itself, and not propagated
from git commands. Still, it is probably better to propagate
such errors than to ignore them).
It also means that the child_process field can reliably be
used to determine whether the returned descriptors are
actually a full-duplex socket, which means we should be
using shutdown() instead of a simple close.
Signed-off-by: Jeff King <peff@peff.net>
Helped-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-05-16 14:46:07 +08:00
|
|
|
static struct child_process *git_proxy_connect(int fd[2], char *host)
|
2005-11-04 22:57:16 +08:00
|
|
|
{
|
2006-06-28 17:04:39 +08:00
|
|
|
const char *port = STR(DEFAULT_GIT_PORT);
|
connect: treat generic proxy processes like ssh processes
The git_connect function returns two ends of a pipe for
talking with a remote, plus a struct child_process
representing the other end of the pipe. If we have a direct
socket connection, then this points to a special "no_fork"
child process.
The code path for doing git-over-pipes or git-over-ssh sets
up this child process to point to the child git command or
the ssh process. When we call finish_connect eventually, we
check wait() on the command and report its return value.
The code path for git://, on the other hand, always sets it
to no_fork. In the case of a direct TCP connection, this
makes sense; we have no child process. But in the case of a
proxy command (configured by core.gitproxy), we do have a
child process, but we throw away its pid, and therefore
ignore its return code.
Instead, let's keep that information in the proxy case, and
respect its return code, which can help catch some errors
(though depending on your proxy command, it will be errors
reported by the proxy command itself, and not propagated
from git commands. Still, it is probably better to propagate
such errors than to ignore them).
It also means that the child_process field can reliably be
used to determine whether the returned descriptors are
actually a full-duplex socket, which means we should be
using shutdown() instead of a simple close.
Signed-off-by: Jeff King <peff@peff.net>
Helped-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-05-16 14:46:07 +08:00
|
|
|
struct child_process *proxy;
|
2005-11-04 22:57:16 +08:00
|
|
|
|
2010-02-18 04:56:02 +08:00
|
|
|
get_host_and_port(&host, &port);
|
2005-11-04 22:57:16 +08:00
|
|
|
|
2017-07-29 03:26:50 +08:00
|
|
|
if (looks_like_command_line_option(host))
|
2018-07-21 15:49:28 +08:00
|
|
|
die(_("strange hostname '%s' blocked"), host);
|
2017-07-29 03:26:50 +08:00
|
|
|
if (looks_like_command_line_option(port))
|
2018-07-21 15:49:28 +08:00
|
|
|
die(_("strange port '%s' blocked"), port);
|
2017-07-29 03:26:50 +08:00
|
|
|
|
2014-08-20 03:10:48 +08:00
|
|
|
proxy = xmalloc(sizeof(*proxy));
|
|
|
|
child_process_init(proxy);
|
2020-07-29 04:24:53 +08:00
|
|
|
strvec_push(&proxy->args, git_proxy_command);
|
|
|
|
strvec_push(&proxy->args, host);
|
|
|
|
strvec_push(&proxy->args, port);
|
connect: treat generic proxy processes like ssh processes
The git_connect function returns two ends of a pipe for
talking with a remote, plus a struct child_process
representing the other end of the pipe. If we have a direct
socket connection, then this points to a special "no_fork"
child process.
The code path for doing git-over-pipes or git-over-ssh sets
up this child process to point to the child git command or
the ssh process. When we call finish_connect eventually, we
check wait() on the command and report its return value.
The code path for git://, on the other hand, always sets it
to no_fork. In the case of a direct TCP connection, this
makes sense; we have no child process. But in the case of a
proxy command (configured by core.gitproxy), we do have a
child process, but we throw away its pid, and therefore
ignore its return code.
Instead, let's keep that information in the proxy case, and
respect its return code, which can help catch some errors
(though depending on your proxy command, it will be errors
reported by the proxy command itself, and not propagated
from git commands. Still, it is probably better to propagate
such errors than to ignore them).
It also means that the child_process field can reliably be
used to determine whether the returned descriptors are
actually a full-duplex socket, which means we should be
using shutdown() instead of a simple close.
Signed-off-by: Jeff King <peff@peff.net>
Helped-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-05-16 14:46:07 +08:00
|
|
|
proxy->in = -1;
|
|
|
|
proxy->out = -1;
|
|
|
|
if (start_command(proxy))
|
2018-07-21 15:49:28 +08:00
|
|
|
die(_("cannot start proxy %s"), git_proxy_command);
|
connect: treat generic proxy processes like ssh processes
The git_connect function returns two ends of a pipe for
talking with a remote, plus a struct child_process
representing the other end of the pipe. If we have a direct
socket connection, then this points to a special "no_fork"
child process.
The code path for doing git-over-pipes or git-over-ssh sets
up this child process to point to the child git command or
the ssh process. When we call finish_connect eventually, we
check wait() on the command and report its return value.
The code path for git://, on the other hand, always sets it
to no_fork. In the case of a direct TCP connection, this
makes sense; we have no child process. But in the case of a
proxy command (configured by core.gitproxy), we do have a
child process, but we throw away its pid, and therefore
ignore its return code.
Instead, let's keep that information in the proxy case, and
respect its return code, which can help catch some errors
(though depending on your proxy command, it will be errors
reported by the proxy command itself, and not propagated
from git commands. Still, it is probably better to propagate
such errors than to ignore them).
It also means that the child_process field can reliably be
used to determine whether the returned descriptors are
actually a full-duplex socket, which means we should be
using shutdown() instead of a simple close.
Signed-off-by: Jeff King <peff@peff.net>
Helped-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-05-16 14:46:07 +08:00
|
|
|
fd[0] = proxy->out; /* read from proxy stdout */
|
|
|
|
fd[1] = proxy->in; /* write to proxy stdin */
|
|
|
|
return proxy;
|
2005-11-04 22:57:16 +08:00
|
|
|
}
|
|
|
|
|
2015-02-21 23:52:48 +08:00
|
|
|
static char *get_port(char *host)
|
2007-09-01 17:36:31 +08:00
|
|
|
{
|
|
|
|
char *end;
|
2015-02-21 23:52:48 +08:00
|
|
|
char *p = strchr(host, ':');
|
|
|
|
|
2007-09-01 17:36:31 +08:00
|
|
|
if (p) {
|
2008-12-21 09:12:11 +08:00
|
|
|
long port = strtol(p + 1, &end, 10);
|
|
|
|
if (end != p + 1 && *end == '\0' && 0 <= port && port < 65536) {
|
2015-02-21 23:52:48 +08:00
|
|
|
*p = '\0';
|
|
|
|
return p+1;
|
2007-09-01 17:36:31 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2005-07-05 02:57:58 +08:00
|
|
|
/*
|
2013-11-29 03:49:01 +08:00
|
|
|
* Extract protocol and relevant parts from the specified connection URL.
|
|
|
|
* The caller must free() the returned strings.
|
2005-07-05 02:57:58 +08:00
|
|
|
*/
|
2013-11-29 03:49:01 +08:00
|
|
|
static enum protocol parse_connect_url(const char *url_orig, char **ret_host,
|
2013-11-29 03:49:54 +08:00
|
|
|
char **ret_path)
|
2005-07-05 02:57:58 +08:00
|
|
|
{
|
2010-05-23 17:19:44 +08:00
|
|
|
char *url;
|
2009-03-13 20:51:33 +08:00
|
|
|
char *host, *path;
|
2005-12-21 18:23:42 +08:00
|
|
|
char *end;
|
2013-11-29 03:50:03 +08:00
|
|
|
int separator = '/';
|
2005-11-18 03:37:14 +08:00
|
|
|
enum protocol protocol = PROTO_LOCAL;
|
2006-06-20 09:25:21 +08:00
|
|
|
|
2010-05-23 17:19:44 +08:00
|
|
|
if (is_url(url_orig))
|
|
|
|
url = url_decode(url_orig);
|
|
|
|
else
|
|
|
|
url = xstrdup(url_orig);
|
|
|
|
|
2005-11-18 03:37:14 +08:00
|
|
|
host = strstr(url, "://");
|
2009-09-01 13:35:10 +08:00
|
|
|
if (host) {
|
2005-11-18 03:37:14 +08:00
|
|
|
*host = '\0';
|
|
|
|
protocol = get_protocol(url);
|
|
|
|
host += 3;
|
2005-12-21 18:23:42 +08:00
|
|
|
} else {
|
2005-07-05 02:57:58 +08:00
|
|
|
host = url;
|
2013-11-29 03:50:03 +08:00
|
|
|
if (!url_is_local_not_ssh(url)) {
|
|
|
|
protocol = PROTO_SSH;
|
|
|
|
separator = ':';
|
|
|
|
}
|
2005-12-21 18:23:42 +08:00
|
|
|
}
|
|
|
|
|
2010-01-27 02:24:42 +08:00
|
|
|
/*
|
2013-11-29 03:49:54 +08:00
|
|
|
* Don't do destructive transforms as protocol code does
|
|
|
|
* '[]' unwrapping in get_host_and_port()
|
2010-01-27 02:24:42 +08:00
|
|
|
*/
|
2015-02-21 23:52:48 +08:00
|
|
|
end = host_end(&host, 0);
|
2005-12-21 18:23:42 +08:00
|
|
|
|
2013-11-29 03:50:03 +08:00
|
|
|
if (protocol == PROTO_LOCAL)
|
2007-08-02 01:03:37 +08:00
|
|
|
path = end;
|
2019-08-25 06:07:59 +08:00
|
|
|
else if (protocol == PROTO_FILE && *host != '/' &&
|
|
|
|
!has_dos_drive_prefix(host) &&
|
|
|
|
offset_1st_component(host - 2) > 1)
|
|
|
|
path = host - 2; /* include the leading "//" */
|
2013-11-29 03:50:03 +08:00
|
|
|
else if (protocol == PROTO_FILE && has_dos_drive_prefix(end))
|
|
|
|
path = end; /* "file://$(pwd)" may be "file://C:/projects/repo" */
|
|
|
|
else
|
|
|
|
path = strchr(end, separator);
|
2005-07-14 09:46:20 +08:00
|
|
|
|
2005-11-18 03:37:14 +08:00
|
|
|
if (!path || !*path)
|
2018-07-21 15:49:28 +08:00
|
|
|
die(_("no path specified; see 'git help pull' for valid url syntax"));
|
2005-11-18 03:37:14 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* null-terminate hostname and point path to ~ for URL's like this:
|
|
|
|
* ssh://host.xz/~user/repo
|
|
|
|
*/
|
2013-11-29 03:50:03 +08:00
|
|
|
|
|
|
|
end = path; /* Need to \0 terminate host here */
|
|
|
|
if (separator == ':')
|
|
|
|
path++; /* path starts after ':' */
|
|
|
|
if (protocol == PROTO_GIT || protocol == PROTO_SSH) {
|
2005-11-18 03:37:14 +08:00
|
|
|
if (path[1] == '~')
|
|
|
|
path++;
|
|
|
|
}
|
|
|
|
|
2013-11-29 03:50:03 +08:00
|
|
|
path = xstrdup(path);
|
|
|
|
*end = '\0';
|
|
|
|
|
2013-11-29 03:49:01 +08:00
|
|
|
*ret_host = xstrdup(host);
|
2013-11-29 03:50:03 +08:00
|
|
|
*ret_path = path;
|
2013-11-29 03:49:01 +08:00
|
|
|
free(url);
|
|
|
|
return protocol;
|
|
|
|
}
|
|
|
|
|
2016-06-26 19:16:35 +08:00
|
|
|
static const char *get_ssh_command(void)
|
|
|
|
{
|
|
|
|
const char *ssh;
|
|
|
|
|
|
|
|
if ((ssh = getenv("GIT_SSH_COMMAND")))
|
|
|
|
return ssh;
|
|
|
|
|
2020-08-15 00:17:36 +08:00
|
|
|
if (!git_config_get_string_tmp("core.sshcommand", &ssh))
|
2016-06-26 19:16:35 +08:00
|
|
|
return ssh;
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2017-10-17 01:55:31 +08:00
|
|
|
enum ssh_variant {
|
ssh: 'auto' variant to select between 'ssh' and 'simple'
Android's "repo" tool is a tool for managing a large codebase
consisting of multiple smaller repositories, similar to Git's
submodule feature. Starting with Git 94b8ae5a (ssh: introduce a
'simple' ssh variant, 2017-10-16), users noticed that it stopped
handling the port in ssh:// URLs.
The cause: when it encounters ssh:// URLs, repo pre-connects to the
server and sets GIT_SSH to a helper ".repo/repo/git_ssh" that reuses
that connection. Before 94b8ae5a, the helper was assumed to support
OpenSSH options for lack of a better guess and got passed a -p option
to set the port. After that patch, it uses the new default of a
simple helper that does not accept an option to set the port.
The next release of "repo" will set GIT_SSH_VARIANT to "ssh" to avoid
that. But users of old versions and of other similar GIT_SSH
implementations would not get the benefit of that fix.
So update the default to use OpenSSH options again, with a twist. As
observed in 94b8ae5a, we cannot assume that $GIT_SSH always handles
OpenSSH options: common helpers such as travis-ci's dpl[*] are
configured using GIT_SSH and do not accept OpenSSH options. So make
the default a new variant "auto", with the following behavior:
1. First, check for a recognized basename, like today.
2. If the basename is not recognized, check whether $GIT_SSH supports
OpenSSH options by running
$GIT_SSH -G <options> <host>
This returns status 0 and prints configuration in OpenSSH if it
recognizes all <options> and returns status 255 if it encounters
an unrecognized option. A wrapper script like
exec ssh -- "$@"
would fail with
ssh: Could not resolve hostname -g: Name or service not known
, correctly reflecting that it does not support OpenSSH options.
The command is run with stdin, stdout, and stderr redirected to
/dev/null so even a command that expects a terminal would exit
immediately.
3. Based on the result from step (2), behave like "ssh" (if it
succeeded) or "simple" (if it failed).
This way, the default ssh variant for unrecognized commands can handle
both the repo and dpl cases as intended.
This autodetection has been running on Google workstations since
2017-10-23 with no reported negative effects.
[*] https://github.com/travis-ci/dpl/blob/6c3fddfda1f2a85944c544446b068bac0a77c049/lib/dpl/provider.rb#L215
Reported-by: William Yan <wyan@google.com>
Improved-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-11-21 05:30:04 +08:00
|
|
|
VARIANT_AUTO,
|
2017-10-17 01:55:31 +08:00
|
|
|
VARIANT_SIMPLE,
|
|
|
|
VARIANT_SSH,
|
|
|
|
VARIANT_PLINK,
|
|
|
|
VARIANT_PUTTY,
|
|
|
|
VARIANT_TORTOISEPLINK,
|
|
|
|
};
|
|
|
|
|
ssh: 'auto' variant to select between 'ssh' and 'simple'
Android's "repo" tool is a tool for managing a large codebase
consisting of multiple smaller repositories, similar to Git's
submodule feature. Starting with Git 94b8ae5a (ssh: introduce a
'simple' ssh variant, 2017-10-16), users noticed that it stopped
handling the port in ssh:// URLs.
The cause: when it encounters ssh:// URLs, repo pre-connects to the
server and sets GIT_SSH to a helper ".repo/repo/git_ssh" that reuses
that connection. Before 94b8ae5a, the helper was assumed to support
OpenSSH options for lack of a better guess and got passed a -p option
to set the port. After that patch, it uses the new default of a
simple helper that does not accept an option to set the port.
The next release of "repo" will set GIT_SSH_VARIANT to "ssh" to avoid
that. But users of old versions and of other similar GIT_SSH
implementations would not get the benefit of that fix.
So update the default to use OpenSSH options again, with a twist. As
observed in 94b8ae5a, we cannot assume that $GIT_SSH always handles
OpenSSH options: common helpers such as travis-ci's dpl[*] are
configured using GIT_SSH and do not accept OpenSSH options. So make
the default a new variant "auto", with the following behavior:
1. First, check for a recognized basename, like today.
2. If the basename is not recognized, check whether $GIT_SSH supports
OpenSSH options by running
$GIT_SSH -G <options> <host>
This returns status 0 and prints configuration in OpenSSH if it
recognizes all <options> and returns status 255 if it encounters
an unrecognized option. A wrapper script like
exec ssh -- "$@"
would fail with
ssh: Could not resolve hostname -g: Name or service not known
, correctly reflecting that it does not support OpenSSH options.
The command is run with stdin, stdout, and stderr redirected to
/dev/null so even a command that expects a terminal would exit
immediately.
3. Based on the result from step (2), behave like "ssh" (if it
succeeded) or "simple" (if it failed).
This way, the default ssh variant for unrecognized commands can handle
both the repo and dpl cases as intended.
This autodetection has been running on Google workstations since
2017-10-23 with no reported negative effects.
[*] https://github.com/travis-ci/dpl/blob/6c3fddfda1f2a85944c544446b068bac0a77c049/lib/dpl/provider.rb#L215
Reported-by: William Yan <wyan@google.com>
Improved-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-11-21 05:30:04 +08:00
|
|
|
static void override_ssh_variant(enum ssh_variant *ssh_variant)
|
2017-02-01 20:01:10 +08:00
|
|
|
{
|
2017-10-17 01:55:31 +08:00
|
|
|
const char *variant = getenv("GIT_SSH_VARIANT");
|
2017-02-10 01:20:25 +08:00
|
|
|
|
2020-08-15 00:17:36 +08:00
|
|
|
if (!variant && git_config_get_string_tmp("ssh.variant", &variant))
|
ssh: 'auto' variant to select between 'ssh' and 'simple'
Android's "repo" tool is a tool for managing a large codebase
consisting of multiple smaller repositories, similar to Git's
submodule feature. Starting with Git 94b8ae5a (ssh: introduce a
'simple' ssh variant, 2017-10-16), users noticed that it stopped
handling the port in ssh:// URLs.
The cause: when it encounters ssh:// URLs, repo pre-connects to the
server and sets GIT_SSH to a helper ".repo/repo/git_ssh" that reuses
that connection. Before 94b8ae5a, the helper was assumed to support
OpenSSH options for lack of a better guess and got passed a -p option
to set the port. After that patch, it uses the new default of a
simple helper that does not accept an option to set the port.
The next release of "repo" will set GIT_SSH_VARIANT to "ssh" to avoid
that. But users of old versions and of other similar GIT_SSH
implementations would not get the benefit of that fix.
So update the default to use OpenSSH options again, with a twist. As
observed in 94b8ae5a, we cannot assume that $GIT_SSH always handles
OpenSSH options: common helpers such as travis-ci's dpl[*] are
configured using GIT_SSH and do not accept OpenSSH options. So make
the default a new variant "auto", with the following behavior:
1. First, check for a recognized basename, like today.
2. If the basename is not recognized, check whether $GIT_SSH supports
OpenSSH options by running
$GIT_SSH -G <options> <host>
This returns status 0 and prints configuration in OpenSSH if it
recognizes all <options> and returns status 255 if it encounters
an unrecognized option. A wrapper script like
exec ssh -- "$@"
would fail with
ssh: Could not resolve hostname -g: Name or service not known
, correctly reflecting that it does not support OpenSSH options.
The command is run with stdin, stdout, and stderr redirected to
/dev/null so even a command that expects a terminal would exit
immediately.
3. Based on the result from step (2), behave like "ssh" (if it
succeeded) or "simple" (if it failed).
This way, the default ssh variant for unrecognized commands can handle
both the repo and dpl cases as intended.
This autodetection has been running on Google workstations since
2017-10-23 with no reported negative effects.
[*] https://github.com/travis-ci/dpl/blob/6c3fddfda1f2a85944c544446b068bac0a77c049/lib/dpl/provider.rb#L215
Reported-by: William Yan <wyan@google.com>
Improved-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-11-21 05:30:04 +08:00
|
|
|
return;
|
2017-02-10 01:20:25 +08:00
|
|
|
|
ssh: 'auto' variant to select between 'ssh' and 'simple'
Android's "repo" tool is a tool for managing a large codebase
consisting of multiple smaller repositories, similar to Git's
submodule feature. Starting with Git 94b8ae5a (ssh: introduce a
'simple' ssh variant, 2017-10-16), users noticed that it stopped
handling the port in ssh:// URLs.
The cause: when it encounters ssh:// URLs, repo pre-connects to the
server and sets GIT_SSH to a helper ".repo/repo/git_ssh" that reuses
that connection. Before 94b8ae5a, the helper was assumed to support
OpenSSH options for lack of a better guess and got passed a -p option
to set the port. After that patch, it uses the new default of a
simple helper that does not accept an option to set the port.
The next release of "repo" will set GIT_SSH_VARIANT to "ssh" to avoid
that. But users of old versions and of other similar GIT_SSH
implementations would not get the benefit of that fix.
So update the default to use OpenSSH options again, with a twist. As
observed in 94b8ae5a, we cannot assume that $GIT_SSH always handles
OpenSSH options: common helpers such as travis-ci's dpl[*] are
configured using GIT_SSH and do not accept OpenSSH options. So make
the default a new variant "auto", with the following behavior:
1. First, check for a recognized basename, like today.
2. If the basename is not recognized, check whether $GIT_SSH supports
OpenSSH options by running
$GIT_SSH -G <options> <host>
This returns status 0 and prints configuration in OpenSSH if it
recognizes all <options> and returns status 255 if it encounters
an unrecognized option. A wrapper script like
exec ssh -- "$@"
would fail with
ssh: Could not resolve hostname -g: Name or service not known
, correctly reflecting that it does not support OpenSSH options.
The command is run with stdin, stdout, and stderr redirected to
/dev/null so even a command that expects a terminal would exit
immediately.
3. Based on the result from step (2), behave like "ssh" (if it
succeeded) or "simple" (if it failed).
This way, the default ssh variant for unrecognized commands can handle
both the repo and dpl cases as intended.
This autodetection has been running on Google workstations since
2017-10-23 with no reported negative effects.
[*] https://github.com/travis-ci/dpl/blob/6c3fddfda1f2a85944c544446b068bac0a77c049/lib/dpl/provider.rb#L215
Reported-by: William Yan <wyan@google.com>
Improved-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-11-21 05:30:04 +08:00
|
|
|
if (!strcmp(variant, "auto"))
|
|
|
|
*ssh_variant = VARIANT_AUTO;
|
|
|
|
else if (!strcmp(variant, "plink"))
|
2017-10-17 01:55:31 +08:00
|
|
|
*ssh_variant = VARIANT_PLINK;
|
|
|
|
else if (!strcmp(variant, "putty"))
|
|
|
|
*ssh_variant = VARIANT_PUTTY;
|
|
|
|
else if (!strcmp(variant, "tortoiseplink"))
|
|
|
|
*ssh_variant = VARIANT_TORTOISEPLINK;
|
|
|
|
else if (!strcmp(variant, "simple"))
|
|
|
|
*ssh_variant = VARIANT_SIMPLE;
|
|
|
|
else
|
|
|
|
*ssh_variant = VARIANT_SSH;
|
2017-02-10 01:20:25 +08:00
|
|
|
}
|
|
|
|
|
2017-10-17 01:55:31 +08:00
|
|
|
static enum ssh_variant determine_ssh_variant(const char *ssh_command,
|
|
|
|
int is_cmdline)
|
2017-02-10 01:20:25 +08:00
|
|
|
{
|
ssh: 'auto' variant to select between 'ssh' and 'simple'
Android's "repo" tool is a tool for managing a large codebase
consisting of multiple smaller repositories, similar to Git's
submodule feature. Starting with Git 94b8ae5a (ssh: introduce a
'simple' ssh variant, 2017-10-16), users noticed that it stopped
handling the port in ssh:// URLs.
The cause: when it encounters ssh:// URLs, repo pre-connects to the
server and sets GIT_SSH to a helper ".repo/repo/git_ssh" that reuses
that connection. Before 94b8ae5a, the helper was assumed to support
OpenSSH options for lack of a better guess and got passed a -p option
to set the port. After that patch, it uses the new default of a
simple helper that does not accept an option to set the port.
The next release of "repo" will set GIT_SSH_VARIANT to "ssh" to avoid
that. But users of old versions and of other similar GIT_SSH
implementations would not get the benefit of that fix.
So update the default to use OpenSSH options again, with a twist. As
observed in 94b8ae5a, we cannot assume that $GIT_SSH always handles
OpenSSH options: common helpers such as travis-ci's dpl[*] are
configured using GIT_SSH and do not accept OpenSSH options. So make
the default a new variant "auto", with the following behavior:
1. First, check for a recognized basename, like today.
2. If the basename is not recognized, check whether $GIT_SSH supports
OpenSSH options by running
$GIT_SSH -G <options> <host>
This returns status 0 and prints configuration in OpenSSH if it
recognizes all <options> and returns status 255 if it encounters
an unrecognized option. A wrapper script like
exec ssh -- "$@"
would fail with
ssh: Could not resolve hostname -g: Name or service not known
, correctly reflecting that it does not support OpenSSH options.
The command is run with stdin, stdout, and stderr redirected to
/dev/null so even a command that expects a terminal would exit
immediately.
3. Based on the result from step (2), behave like "ssh" (if it
succeeded) or "simple" (if it failed).
This way, the default ssh variant for unrecognized commands can handle
both the repo and dpl cases as intended.
This autodetection has been running on Google workstations since
2017-10-23 with no reported negative effects.
[*] https://github.com/travis-ci/dpl/blob/6c3fddfda1f2a85944c544446b068bac0a77c049/lib/dpl/provider.rb#L215
Reported-by: William Yan <wyan@google.com>
Improved-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-11-21 05:30:04 +08:00
|
|
|
enum ssh_variant ssh_variant = VARIANT_AUTO;
|
2017-02-10 01:20:25 +08:00
|
|
|
const char *variant;
|
2017-02-01 20:01:10 +08:00
|
|
|
char *p = NULL;
|
|
|
|
|
ssh: 'auto' variant to select between 'ssh' and 'simple'
Android's "repo" tool is a tool for managing a large codebase
consisting of multiple smaller repositories, similar to Git's
submodule feature. Starting with Git 94b8ae5a (ssh: introduce a
'simple' ssh variant, 2017-10-16), users noticed that it stopped
handling the port in ssh:// URLs.
The cause: when it encounters ssh:// URLs, repo pre-connects to the
server and sets GIT_SSH to a helper ".repo/repo/git_ssh" that reuses
that connection. Before 94b8ae5a, the helper was assumed to support
OpenSSH options for lack of a better guess and got passed a -p option
to set the port. After that patch, it uses the new default of a
simple helper that does not accept an option to set the port.
The next release of "repo" will set GIT_SSH_VARIANT to "ssh" to avoid
that. But users of old versions and of other similar GIT_SSH
implementations would not get the benefit of that fix.
So update the default to use OpenSSH options again, with a twist. As
observed in 94b8ae5a, we cannot assume that $GIT_SSH always handles
OpenSSH options: common helpers such as travis-ci's dpl[*] are
configured using GIT_SSH and do not accept OpenSSH options. So make
the default a new variant "auto", with the following behavior:
1. First, check for a recognized basename, like today.
2. If the basename is not recognized, check whether $GIT_SSH supports
OpenSSH options by running
$GIT_SSH -G <options> <host>
This returns status 0 and prints configuration in OpenSSH if it
recognizes all <options> and returns status 255 if it encounters
an unrecognized option. A wrapper script like
exec ssh -- "$@"
would fail with
ssh: Could not resolve hostname -g: Name or service not known
, correctly reflecting that it does not support OpenSSH options.
The command is run with stdin, stdout, and stderr redirected to
/dev/null so even a command that expects a terminal would exit
immediately.
3. Based on the result from step (2), behave like "ssh" (if it
succeeded) or "simple" (if it failed).
This way, the default ssh variant for unrecognized commands can handle
both the repo and dpl cases as intended.
This autodetection has been running on Google workstations since
2017-10-23 with no reported negative effects.
[*] https://github.com/travis-ci/dpl/blob/6c3fddfda1f2a85944c544446b068bac0a77c049/lib/dpl/provider.rb#L215
Reported-by: William Yan <wyan@google.com>
Improved-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-11-21 05:30:04 +08:00
|
|
|
override_ssh_variant(&ssh_variant);
|
|
|
|
|
|
|
|
if (ssh_variant != VARIANT_AUTO)
|
2017-10-17 01:55:31 +08:00
|
|
|
return ssh_variant;
|
2017-02-10 01:20:25 +08:00
|
|
|
|
|
|
|
if (!is_cmdline) {
|
2017-02-01 20:01:10 +08:00
|
|
|
p = xstrdup(ssh_command);
|
|
|
|
variant = basename(p);
|
|
|
|
} else {
|
|
|
|
const char **ssh_argv;
|
|
|
|
|
|
|
|
p = xstrdup(ssh_command);
|
2017-04-11 08:30:23 +08:00
|
|
|
if (split_cmdline(p, &ssh_argv) > 0) {
|
2017-02-01 20:01:10 +08:00
|
|
|
variant = basename((char *)ssh_argv[0]);
|
|
|
|
/*
|
|
|
|
* At this point, variant points into the buffer
|
|
|
|
* referenced by p, hence we do not need ssh_argv
|
|
|
|
* any longer.
|
|
|
|
*/
|
|
|
|
free(ssh_argv);
|
2017-04-21 04:21:58 +08:00
|
|
|
} else {
|
|
|
|
free(p);
|
2017-10-17 01:55:31 +08:00
|
|
|
return ssh_variant;
|
2017-04-21 04:21:58 +08:00
|
|
|
}
|
2017-02-01 20:01:10 +08:00
|
|
|
}
|
|
|
|
|
2017-10-17 01:55:31 +08:00
|
|
|
if (!strcasecmp(variant, "ssh") ||
|
|
|
|
!strcasecmp(variant, "ssh.exe"))
|
|
|
|
ssh_variant = VARIANT_SSH;
|
|
|
|
else if (!strcasecmp(variant, "plink") ||
|
|
|
|
!strcasecmp(variant, "plink.exe"))
|
|
|
|
ssh_variant = VARIANT_PLINK;
|
2017-02-01 20:01:10 +08:00
|
|
|
else if (!strcasecmp(variant, "tortoiseplink") ||
|
2017-10-17 01:55:31 +08:00
|
|
|
!strcasecmp(variant, "tortoiseplink.exe"))
|
|
|
|
ssh_variant = VARIANT_TORTOISEPLINK;
|
|
|
|
|
2017-02-01 20:01:10 +08:00
|
|
|
free(p);
|
2017-10-17 01:55:31 +08:00
|
|
|
return ssh_variant;
|
2017-02-01 20:01:10 +08:00
|
|
|
}
|
|
|
|
|
2017-11-21 05:23:27 +08:00
|
|
|
/*
|
|
|
|
* Open a connection using Git's native protocol.
|
|
|
|
*
|
|
|
|
* The caller is responsible for freeing hostandport, but this function may
|
|
|
|
* modify it (for example, to truncate it to remove the port part).
|
|
|
|
*/
|
|
|
|
static struct child_process *git_connect_git(int fd[2], char *hostandport,
|
|
|
|
const char *path, const char *prog,
|
2018-03-16 01:31:30 +08:00
|
|
|
enum protocol_version version,
|
2017-11-21 05:23:27 +08:00
|
|
|
int flags)
|
|
|
|
{
|
|
|
|
struct child_process *conn;
|
|
|
|
struct strbuf request = STRBUF_INIT;
|
|
|
|
/*
|
|
|
|
* Set up virtual host information based on where we will
|
|
|
|
* connect, unless the user has overridden us in
|
|
|
|
* the environment.
|
|
|
|
*/
|
|
|
|
char *target_host = getenv("GIT_OVERRIDE_VIRTUAL_HOST");
|
|
|
|
if (target_host)
|
|
|
|
target_host = xstrdup(target_host);
|
|
|
|
else
|
|
|
|
target_host = xstrdup(hostandport);
|
|
|
|
|
|
|
|
transport_check_allowed("git");
|
git_connect_git(): forbid newlines in host and path
When we connect to a git:// server, we send an initial request that
looks something like:
002dgit-upload-pack repo.git\0host=example.com
If the repo path contains a newline, then it's included literally, and
we get:
002egit-upload-pack repo
.git\0host=example.com
This works fine if you really do have a newline in your repository name;
the server side uses the pktline framing to parse the string, not
newlines. However, there are many _other_ protocols in the wild that do
parse on newlines, such as HTTP. So a carefully constructed git:// URL
can actually turn into a valid HTTP request. For example:
git://localhost:1234/%0d%0a%0d%0aGET%20/%20HTTP/1.1 %0d%0aHost:localhost%0d%0a%0d%0a
becomes:
0050git-upload-pack /
GET / HTTP/1.1
Host:localhost
host=localhost:1234
on the wire. Again, this isn't a problem for a real Git server, but it
does mean that feeding a malicious URL to Git (e.g., through a
submodule) can cause it to make unexpected cross-protocol requests.
Since repository names with newlines are presumably quite rare (and
indeed, we already disallow them in git-over-http), let's just disallow
them over this protocol.
Hostnames could likewise inject a newline, but this is unlikely a
problem in practice; we'd try resolving the hostname with a newline in
it, which wouldn't work. Still, it doesn't hurt to err on the side of
caution there, since we would not expect them to work in the first
place.
The ssh and local code paths are unaffected by this patch. In both cases
we're trying to run upload-pack via a shell, and will quote the newline
so that it makes it intact. An attacker can point an ssh url at an
arbitrary port, of course, but unless there's an actual ssh server
there, we'd never get as far as sending our shell command anyway. We
_could_ similarly restrict newlines in those protocols out of caution,
but there seems little benefit to doing so.
The new test here is run alongside the git-daemon tests, which cover the
same protocol, but it shouldn't actually contact the daemon at all. In
theory we could make the test more robust by setting up an actual
repository with a newline in it (so that our clone would succeed if our
new check didn't kick in). But a repo directory with newline in it is
likely not portable across all filesystems. Likewise, we could check
git-daemon's log that it was not contacted at all, but we do not
currently record the log (and anyway, it would make the test racy with
the daemon's log write). We'll just check the client-side stderr to make
sure we hit the expected code path.
Reported-by: Harold Kim <h.kim@flatt.tech>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-01-07 17:43:58 +08:00
|
|
|
if (strchr(target_host, '\n') || strchr(path, '\n'))
|
|
|
|
die(_("newline is forbidden in git:// hosts and repo paths"));
|
2017-11-21 05:23:27 +08:00
|
|
|
|
2017-11-21 06:04:58 +08:00
|
|
|
/*
|
|
|
|
* These underlying connection commands die() if they
|
2017-11-21 05:23:27 +08:00
|
|
|
* cannot connect.
|
|
|
|
*/
|
|
|
|
if (git_use_proxy(hostandport))
|
|
|
|
conn = git_proxy_connect(fd, hostandport);
|
|
|
|
else
|
|
|
|
conn = git_tcp_connect(fd, hostandport, flags);
|
|
|
|
/*
|
|
|
|
* Separate original protocol components prog and path
|
|
|
|
* from extended host header with a NUL byte.
|
|
|
|
*
|
|
|
|
* Note: Do not add any other headers here! Doing so
|
|
|
|
* will cause older git-daemon servers to crash.
|
|
|
|
*/
|
|
|
|
strbuf_addf(&request,
|
|
|
|
"%s %s%chost=%s%c",
|
|
|
|
prog, path, 0,
|
|
|
|
target_host, 0);
|
|
|
|
|
|
|
|
/* If using a new version put that stuff here after a second null byte */
|
2018-03-16 01:31:30 +08:00
|
|
|
if (version > 0) {
|
2017-11-21 05:23:27 +08:00
|
|
|
strbuf_addch(&request, '\0');
|
|
|
|
strbuf_addf(&request, "version=%d%c",
|
2018-03-16 01:31:30 +08:00
|
|
|
version, '\0');
|
2017-11-21 05:23:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
packet_write(fd[1], request.buf, request.len);
|
|
|
|
|
|
|
|
free(target_host);
|
|
|
|
strbuf_release(&request);
|
|
|
|
return conn;
|
|
|
|
}
|
|
|
|
|
2017-11-21 05:26:19 +08:00
|
|
|
/*
|
|
|
|
* Append the appropriate environment variables to `env` and options to
|
|
|
|
* `args` for running ssh in Git's SSH-tunneled transport.
|
|
|
|
*/
|
2020-07-29 04:24:53 +08:00
|
|
|
static void push_ssh_options(struct strvec *args, struct strvec *env,
|
2017-11-21 05:26:19 +08:00
|
|
|
enum ssh_variant variant, const char *port,
|
2018-03-16 01:31:30 +08:00
|
|
|
enum protocol_version version, int flags)
|
2017-11-21 05:26:19 +08:00
|
|
|
{
|
|
|
|
if (variant == VARIANT_SSH &&
|
2018-03-16 01:31:30 +08:00
|
|
|
version > 0) {
|
2020-07-29 04:24:53 +08:00
|
|
|
strvec_push(args, "-o");
|
|
|
|
strvec_push(args, "SendEnv=" GIT_PROTOCOL_ENVIRONMENT);
|
|
|
|
strvec_pushf(env, GIT_PROTOCOL_ENVIRONMENT "=version=%d",
|
strvec: fix indentation in renamed calls
Code which split an argv_array call across multiple lines, like:
argv_array_pushl(&args, "one argument",
"another argument", "and more",
NULL);
was recently mechanically renamed to use strvec, which results in
mis-matched indentation like:
strvec_pushl(&args, "one argument",
"another argument", "and more",
NULL);
Let's fix these up to align the arguments with the opening paren. I did
this manually by sifting through the results of:
git jump grep 'strvec_.*,$'
and liberally applying my editor's auto-format. Most of the changes are
of the form shown above, though I also normalized a few that had
originally used a single-tab indentation (rather than our usual style of
aligning with the open paren). I also rewrapped a couple of obvious
cases (e.g., where previously too-long lines became short enough to fit
on one), but I wasn't aggressive about it. In cases broken to three or
more lines, the grouping of arguments is sometimes meaningful, and it
wasn't worth my time or reviewer time to ponder each case individually.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-07-29 04:26:31 +08:00
|
|
|
version);
|
2017-11-21 05:26:19 +08:00
|
|
|
}
|
|
|
|
|
2017-11-21 05:30:30 +08:00
|
|
|
if (flags & CONNECT_IPV4) {
|
|
|
|
switch (variant) {
|
|
|
|
case VARIANT_AUTO:
|
|
|
|
BUG("VARIANT_AUTO passed to push_ssh_options");
|
|
|
|
case VARIANT_SIMPLE:
|
2018-07-21 15:49:28 +08:00
|
|
|
die(_("ssh variant 'simple' does not support -4"));
|
2017-11-21 05:30:30 +08:00
|
|
|
case VARIANT_SSH:
|
|
|
|
case VARIANT_PLINK:
|
|
|
|
case VARIANT_PUTTY:
|
|
|
|
case VARIANT_TORTOISEPLINK:
|
2020-07-29 04:24:53 +08:00
|
|
|
strvec_push(args, "-4");
|
2017-11-21 05:30:30 +08:00
|
|
|
}
|
|
|
|
} else if (flags & CONNECT_IPV6) {
|
|
|
|
switch (variant) {
|
|
|
|
case VARIANT_AUTO:
|
|
|
|
BUG("VARIANT_AUTO passed to push_ssh_options");
|
|
|
|
case VARIANT_SIMPLE:
|
2018-07-21 15:49:28 +08:00
|
|
|
die(_("ssh variant 'simple' does not support -6"));
|
2017-11-21 05:30:30 +08:00
|
|
|
case VARIANT_SSH:
|
|
|
|
case VARIANT_PLINK:
|
|
|
|
case VARIANT_PUTTY:
|
|
|
|
case VARIANT_TORTOISEPLINK:
|
2020-07-29 04:24:53 +08:00
|
|
|
strvec_push(args, "-6");
|
2017-11-21 05:30:30 +08:00
|
|
|
}
|
2017-11-21 05:26:19 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (variant == VARIANT_TORTOISEPLINK)
|
2020-07-29 04:24:53 +08:00
|
|
|
strvec_push(args, "-batch");
|
2017-11-21 05:26:19 +08:00
|
|
|
|
2017-11-21 05:31:01 +08:00
|
|
|
if (port) {
|
|
|
|
switch (variant) {
|
|
|
|
case VARIANT_AUTO:
|
|
|
|
BUG("VARIANT_AUTO passed to push_ssh_options");
|
|
|
|
case VARIANT_SIMPLE:
|
2018-07-21 15:49:28 +08:00
|
|
|
die(_("ssh variant 'simple' does not support setting port"));
|
2017-11-21 05:31:01 +08:00
|
|
|
case VARIANT_SSH:
|
2020-07-29 04:24:53 +08:00
|
|
|
strvec_push(args, "-p");
|
2017-11-21 05:31:01 +08:00
|
|
|
break;
|
|
|
|
case VARIANT_PLINK:
|
|
|
|
case VARIANT_PUTTY:
|
|
|
|
case VARIANT_TORTOISEPLINK:
|
2020-07-29 04:24:53 +08:00
|
|
|
strvec_push(args, "-P");
|
2017-11-21 05:31:01 +08:00
|
|
|
}
|
2017-11-21 05:26:19 +08:00
|
|
|
|
2020-07-29 04:24:53 +08:00
|
|
|
strvec_push(args, port);
|
2017-11-21 05:26:19 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-11-21 06:19:43 +08:00
|
|
|
/* Prepare a child_process for use by Git's SSH-tunneled transport. */
|
|
|
|
static void fill_ssh_args(struct child_process *conn, const char *ssh_host,
|
2018-03-16 01:31:30 +08:00
|
|
|
const char *port, enum protocol_version version,
|
|
|
|
int flags)
|
2017-11-21 06:19:43 +08:00
|
|
|
{
|
|
|
|
const char *ssh;
|
|
|
|
enum ssh_variant variant;
|
|
|
|
|
|
|
|
if (looks_like_command_line_option(ssh_host))
|
2018-07-21 15:49:28 +08:00
|
|
|
die(_("strange hostname '%s' blocked"), ssh_host);
|
2017-11-21 06:19:43 +08:00
|
|
|
|
|
|
|
ssh = get_ssh_command();
|
|
|
|
if (ssh) {
|
|
|
|
variant = determine_ssh_variant(ssh, 1);
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* GIT_SSH is the no-shell version of
|
|
|
|
* GIT_SSH_COMMAND (and must remain so for
|
|
|
|
* historical compatibility).
|
|
|
|
*/
|
|
|
|
conn->use_shell = 0;
|
|
|
|
|
|
|
|
ssh = getenv("GIT_SSH");
|
|
|
|
if (!ssh)
|
|
|
|
ssh = "ssh";
|
|
|
|
variant = determine_ssh_variant(ssh, 0);
|
|
|
|
}
|
|
|
|
|
ssh: 'auto' variant to select between 'ssh' and 'simple'
Android's "repo" tool is a tool for managing a large codebase
consisting of multiple smaller repositories, similar to Git's
submodule feature. Starting with Git 94b8ae5a (ssh: introduce a
'simple' ssh variant, 2017-10-16), users noticed that it stopped
handling the port in ssh:// URLs.
The cause: when it encounters ssh:// URLs, repo pre-connects to the
server and sets GIT_SSH to a helper ".repo/repo/git_ssh" that reuses
that connection. Before 94b8ae5a, the helper was assumed to support
OpenSSH options for lack of a better guess and got passed a -p option
to set the port. After that patch, it uses the new default of a
simple helper that does not accept an option to set the port.
The next release of "repo" will set GIT_SSH_VARIANT to "ssh" to avoid
that. But users of old versions and of other similar GIT_SSH
implementations would not get the benefit of that fix.
So update the default to use OpenSSH options again, with a twist. As
observed in 94b8ae5a, we cannot assume that $GIT_SSH always handles
OpenSSH options: common helpers such as travis-ci's dpl[*] are
configured using GIT_SSH and do not accept OpenSSH options. So make
the default a new variant "auto", with the following behavior:
1. First, check for a recognized basename, like today.
2. If the basename is not recognized, check whether $GIT_SSH supports
OpenSSH options by running
$GIT_SSH -G <options> <host>
This returns status 0 and prints configuration in OpenSSH if it
recognizes all <options> and returns status 255 if it encounters
an unrecognized option. A wrapper script like
exec ssh -- "$@"
would fail with
ssh: Could not resolve hostname -g: Name or service not known
, correctly reflecting that it does not support OpenSSH options.
The command is run with stdin, stdout, and stderr redirected to
/dev/null so even a command that expects a terminal would exit
immediately.
3. Based on the result from step (2), behave like "ssh" (if it
succeeded) or "simple" (if it failed).
This way, the default ssh variant for unrecognized commands can handle
both the repo and dpl cases as intended.
This autodetection has been running on Google workstations since
2017-10-23 with no reported negative effects.
[*] https://github.com/travis-ci/dpl/blob/6c3fddfda1f2a85944c544446b068bac0a77c049/lib/dpl/provider.rb#L215
Reported-by: William Yan <wyan@google.com>
Improved-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-11-21 05:30:04 +08:00
|
|
|
if (variant == VARIANT_AUTO) {
|
|
|
|
struct child_process detect = CHILD_PROCESS_INIT;
|
|
|
|
|
|
|
|
detect.use_shell = conn->use_shell;
|
|
|
|
detect.no_stdin = detect.no_stdout = detect.no_stderr = 1;
|
|
|
|
|
2020-07-29 04:24:53 +08:00
|
|
|
strvec_push(&detect.args, ssh);
|
|
|
|
strvec_push(&detect.args, "-G");
|
2022-06-02 17:09:50 +08:00
|
|
|
push_ssh_options(&detect.args, &detect.env,
|
2018-03-16 01:31:30 +08:00
|
|
|
VARIANT_SSH, port, version, flags);
|
2020-07-29 04:24:53 +08:00
|
|
|
strvec_push(&detect.args, ssh_host);
|
ssh: 'auto' variant to select between 'ssh' and 'simple'
Android's "repo" tool is a tool for managing a large codebase
consisting of multiple smaller repositories, similar to Git's
submodule feature. Starting with Git 94b8ae5a (ssh: introduce a
'simple' ssh variant, 2017-10-16), users noticed that it stopped
handling the port in ssh:// URLs.
The cause: when it encounters ssh:// URLs, repo pre-connects to the
server and sets GIT_SSH to a helper ".repo/repo/git_ssh" that reuses
that connection. Before 94b8ae5a, the helper was assumed to support
OpenSSH options for lack of a better guess and got passed a -p option
to set the port. After that patch, it uses the new default of a
simple helper that does not accept an option to set the port.
The next release of "repo" will set GIT_SSH_VARIANT to "ssh" to avoid
that. But users of old versions and of other similar GIT_SSH
implementations would not get the benefit of that fix.
So update the default to use OpenSSH options again, with a twist. As
observed in 94b8ae5a, we cannot assume that $GIT_SSH always handles
OpenSSH options: common helpers such as travis-ci's dpl[*] are
configured using GIT_SSH and do not accept OpenSSH options. So make
the default a new variant "auto", with the following behavior:
1. First, check for a recognized basename, like today.
2. If the basename is not recognized, check whether $GIT_SSH supports
OpenSSH options by running
$GIT_SSH -G <options> <host>
This returns status 0 and prints configuration in OpenSSH if it
recognizes all <options> and returns status 255 if it encounters
an unrecognized option. A wrapper script like
exec ssh -- "$@"
would fail with
ssh: Could not resolve hostname -g: Name or service not known
, correctly reflecting that it does not support OpenSSH options.
The command is run with stdin, stdout, and stderr redirected to
/dev/null so even a command that expects a terminal would exit
immediately.
3. Based on the result from step (2), behave like "ssh" (if it
succeeded) or "simple" (if it failed).
This way, the default ssh variant for unrecognized commands can handle
both the repo and dpl cases as intended.
This autodetection has been running on Google workstations since
2017-10-23 with no reported negative effects.
[*] https://github.com/travis-ci/dpl/blob/6c3fddfda1f2a85944c544446b068bac0a77c049/lib/dpl/provider.rb#L215
Reported-by: William Yan <wyan@google.com>
Improved-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-11-21 05:30:04 +08:00
|
|
|
|
|
|
|
variant = run_command(&detect) ? VARIANT_SIMPLE : VARIANT_SSH;
|
|
|
|
}
|
|
|
|
|
2020-07-29 04:24:53 +08:00
|
|
|
strvec_push(&conn->args, ssh);
|
2022-06-02 17:09:50 +08:00
|
|
|
push_ssh_options(&conn->args, &conn->env, variant, port, version,
|
|
|
|
flags);
|
2020-07-29 04:24:53 +08:00
|
|
|
strvec_push(&conn->args, ssh_host);
|
2017-11-21 06:19:43 +08:00
|
|
|
}
|
|
|
|
|
2013-11-29 03:49:01 +08:00
|
|
|
/*
|
2017-11-21 05:22:54 +08:00
|
|
|
* This returns the dummy child_process `no_fork` if the transport protocol
|
|
|
|
* does not need fork(2), or a struct child_process object if it does. Once
|
|
|
|
* done, finish the connection with finish_connect() with the value returned
|
|
|
|
* from this function (it is safe to call finish_connect() with NULL to
|
|
|
|
* support the former case).
|
2013-11-29 03:49:01 +08:00
|
|
|
*
|
|
|
|
* If it returns, the connect is successful; it just dies on errors (this
|
|
|
|
* will hopefully be changed in a libification effort, to return NULL when
|
|
|
|
* the connection failed).
|
|
|
|
*/
|
|
|
|
struct child_process *git_connect(int fd[2], const char *url,
|
git_connect(): fix corner cases in downgrading v2 to v0
There's code in git_connect() that checks whether we are doing a push
with protocol_v2, and if so, drops us to protocol_v0 (since we know
how to do v2 only for fetches). But it misses some corner cases:
1. it checks the "prog" variable, which is actually the path to
receive-pack on the remote side. By default this is just
"git-receive-pack", but it could be an arbitrary string (like
"/path/to/git receive-pack", etc). We'd accidentally stay in v2
mode in this case.
2. besides "receive-pack" and "upload-pack", there's one other value
we'd expect: "upload-archive" for handling "git archive --remote".
Like receive-pack, this doesn't understand v2, and should use the
v0 protocol.
In practice, neither of these causes bugs in the real world so far. We
do send a "we understand v2" probe to the server, but since no server
implements v2 for anything but upload-pack, it's simply ignored. But
this would eventually become a problem if we do implement v2 for those
endpoints, as older clients would falsely claim to understand it,
leading to a server response they can't parse.
We can fix (1) by passing in both the program path and the "name" of the
operation. I treat the name as a string here, because that's the pattern
set in transport_connect(), which is one of our callers (we were simply
throwing away the "name" value there before).
We can fix (2) by allowing only known-v2 protocols ("upload-pack"),
rather than blocking unknown ones ("receive-pack" and "upload-archive").
That will mean whoever eventually implements v2 push will have to adjust
this list, but that's reasonable. We'll do the safe, conservative thing
(sticking to v0) by default, and anybody working on v2 will quickly
realize this spot needs to be updated.
The new tests cover the receive-pack and upload-archive cases above, and
re-confirm that we allow v2 with an arbitrary "--upload-pack" path (that
already worked before this patch, of course, but it would be an easy
thing to break if we flipped the allow/block logic without also handling
"name" separately).
Here are a few miscellaneous implementation notes, since I had to do a
little head-scratching to understand who calls what:
- transport_connect() is called only for git-upload-archive. For
non-http git remotes, that resolves to the virtual connect_git()
function (which then calls git_connect(); confused yet?). So
plumbing through "name" in connect_git() covers that.
- for regular fetches and pushes, callers use higher-level functions
like transport_fetch_refs(). For non-http git remotes, that means
calling git_connect() under the hood via connect_setup(). And that
uses the "for_push" flag to decide which name to use.
- likewise, plumbing like fetch-pack and send-pack may call
git_connect() directly; they each know which name to use.
- for remote helpers (including http), we already have separate
parameters for "name" and "exec" (another name for "prog"). In
process_connect_service(), we feed the "name" to the helper via
"connect" or "stateless-connect" directives.
There's also a "servpath" option, which can be used to tell the
helper about the "exec" path. But no helpers we implement support
it! For http it would be useless anyway (no reasonable server
implementation will allow you to send a shell command to run the
server). In theory it would be useful for more obscure helpers like
remote-ext, but even there it is not implemented.
It's tempting to get rid of it simply to reduce confusion, but we
have publicly documented it since it was added in fa8c097cc9
(Support remote helpers implementing smart transports, 2009-12-09),
so it's possible some helper in the wild is using it.
- So for v2, helpers (again, including http) are mainly used via
stateless-connect, driven by the main program. But they do still
need to decide whether to do a v2 probe. And so there's similar
logic in remote-curl.c's discover_refs() that looks for
"git-receive-pack". But it's not buggy in the same way. Since it
doesn't support servpath, it is always dealing with a "service"
string like "git-receive-pack". And since it doesn't support
straight "connect", it can't be used for "upload-archive".
So we could leave that spot alone. But I've updated it here to match
the logic we're changing in connect_git(). That seems like the least
confusing thing for somebody who has to touch both of these spots
later (say, to add v2 push support). I didn't add a new test to make
sure this doesn't break anything; we already have several tests (in
t5551 and elsewhere) that make sure we are using v2 over http.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-18 03:08:51 +08:00
|
|
|
const char *name,
|
2013-11-29 03:49:01 +08:00
|
|
|
const char *prog, int flags)
|
|
|
|
{
|
2013-11-29 03:50:15 +08:00
|
|
|
char *hostandport, *path;
|
2017-11-21 05:22:54 +08:00
|
|
|
struct child_process *conn;
|
2013-11-29 03:49:01 +08:00
|
|
|
enum protocol protocol;
|
2018-03-16 01:31:30 +08:00
|
|
|
enum protocol_version version = get_protocol_version_config();
|
2013-11-29 03:49:01 +08:00
|
|
|
|
2018-03-16 01:31:31 +08:00
|
|
|
/*
|
|
|
|
* NEEDSWORK: If we are trying to use protocol v2 and we are planning
|
git_connect(): fix corner cases in downgrading v2 to v0
There's code in git_connect() that checks whether we are doing a push
with protocol_v2, and if so, drops us to protocol_v0 (since we know
how to do v2 only for fetches). But it misses some corner cases:
1. it checks the "prog" variable, which is actually the path to
receive-pack on the remote side. By default this is just
"git-receive-pack", but it could be an arbitrary string (like
"/path/to/git receive-pack", etc). We'd accidentally stay in v2
mode in this case.
2. besides "receive-pack" and "upload-pack", there's one other value
we'd expect: "upload-archive" for handling "git archive --remote".
Like receive-pack, this doesn't understand v2, and should use the
v0 protocol.
In practice, neither of these causes bugs in the real world so far. We
do send a "we understand v2" probe to the server, but since no server
implements v2 for anything but upload-pack, it's simply ignored. But
this would eventually become a problem if we do implement v2 for those
endpoints, as older clients would falsely claim to understand it,
leading to a server response they can't parse.
We can fix (1) by passing in both the program path and the "name" of the
operation. I treat the name as a string here, because that's the pattern
set in transport_connect(), which is one of our callers (we were simply
throwing away the "name" value there before).
We can fix (2) by allowing only known-v2 protocols ("upload-pack"),
rather than blocking unknown ones ("receive-pack" and "upload-archive").
That will mean whoever eventually implements v2 push will have to adjust
this list, but that's reasonable. We'll do the safe, conservative thing
(sticking to v0) by default, and anybody working on v2 will quickly
realize this spot needs to be updated.
The new tests cover the receive-pack and upload-archive cases above, and
re-confirm that we allow v2 with an arbitrary "--upload-pack" path (that
already worked before this patch, of course, but it would be an easy
thing to break if we flipped the allow/block logic without also handling
"name" separately).
Here are a few miscellaneous implementation notes, since I had to do a
little head-scratching to understand who calls what:
- transport_connect() is called only for git-upload-archive. For
non-http git remotes, that resolves to the virtual connect_git()
function (which then calls git_connect(); confused yet?). So
plumbing through "name" in connect_git() covers that.
- for regular fetches and pushes, callers use higher-level functions
like transport_fetch_refs(). For non-http git remotes, that means
calling git_connect() under the hood via connect_setup(). And that
uses the "for_push" flag to decide which name to use.
- likewise, plumbing like fetch-pack and send-pack may call
git_connect() directly; they each know which name to use.
- for remote helpers (including http), we already have separate
parameters for "name" and "exec" (another name for "prog"). In
process_connect_service(), we feed the "name" to the helper via
"connect" or "stateless-connect" directives.
There's also a "servpath" option, which can be used to tell the
helper about the "exec" path. But no helpers we implement support
it! For http it would be useless anyway (no reasonable server
implementation will allow you to send a shell command to run the
server). In theory it would be useful for more obscure helpers like
remote-ext, but even there it is not implemented.
It's tempting to get rid of it simply to reduce confusion, but we
have publicly documented it since it was added in fa8c097cc9
(Support remote helpers implementing smart transports, 2009-12-09),
so it's possible some helper in the wild is using it.
- So for v2, helpers (again, including http) are mainly used via
stateless-connect, driven by the main program. But they do still
need to decide whether to do a v2 probe. And so there's similar
logic in remote-curl.c's discover_refs() that looks for
"git-receive-pack". But it's not buggy in the same way. Since it
doesn't support servpath, it is always dealing with a "service"
string like "git-receive-pack". And since it doesn't support
straight "connect", it can't be used for "upload-archive".
So we could leave that spot alone. But I've updated it here to match
the logic we're changing in connect_git(). That seems like the least
confusing thing for somebody who has to touch both of these spots
later (say, to add v2 push support). I didn't add a new test to make
sure this doesn't break anything; we already have several tests (in
t5551 and elsewhere) that make sure we are using v2 over http.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-18 03:08:51 +08:00
|
|
|
* to perform any operation that doesn't involve upload-pack (i.e., a
|
|
|
|
* fetch, ls-remote, etc), then fallback to v0 since we don't know how
|
|
|
|
* to do anything else (like push or remote archive) via v2.
|
2018-03-16 01:31:31 +08:00
|
|
|
*/
|
git_connect(): fix corner cases in downgrading v2 to v0
There's code in git_connect() that checks whether we are doing a push
with protocol_v2, and if so, drops us to protocol_v0 (since we know
how to do v2 only for fetches). But it misses some corner cases:
1. it checks the "prog" variable, which is actually the path to
receive-pack on the remote side. By default this is just
"git-receive-pack", but it could be an arbitrary string (like
"/path/to/git receive-pack", etc). We'd accidentally stay in v2
mode in this case.
2. besides "receive-pack" and "upload-pack", there's one other value
we'd expect: "upload-archive" for handling "git archive --remote".
Like receive-pack, this doesn't understand v2, and should use the
v0 protocol.
In practice, neither of these causes bugs in the real world so far. We
do send a "we understand v2" probe to the server, but since no server
implements v2 for anything but upload-pack, it's simply ignored. But
this would eventually become a problem if we do implement v2 for those
endpoints, as older clients would falsely claim to understand it,
leading to a server response they can't parse.
We can fix (1) by passing in both the program path and the "name" of the
operation. I treat the name as a string here, because that's the pattern
set in transport_connect(), which is one of our callers (we were simply
throwing away the "name" value there before).
We can fix (2) by allowing only known-v2 protocols ("upload-pack"),
rather than blocking unknown ones ("receive-pack" and "upload-archive").
That will mean whoever eventually implements v2 push will have to adjust
this list, but that's reasonable. We'll do the safe, conservative thing
(sticking to v0) by default, and anybody working on v2 will quickly
realize this spot needs to be updated.
The new tests cover the receive-pack and upload-archive cases above, and
re-confirm that we allow v2 with an arbitrary "--upload-pack" path (that
already worked before this patch, of course, but it would be an easy
thing to break if we flipped the allow/block logic without also handling
"name" separately).
Here are a few miscellaneous implementation notes, since I had to do a
little head-scratching to understand who calls what:
- transport_connect() is called only for git-upload-archive. For
non-http git remotes, that resolves to the virtual connect_git()
function (which then calls git_connect(); confused yet?). So
plumbing through "name" in connect_git() covers that.
- for regular fetches and pushes, callers use higher-level functions
like transport_fetch_refs(). For non-http git remotes, that means
calling git_connect() under the hood via connect_setup(). And that
uses the "for_push" flag to decide which name to use.
- likewise, plumbing like fetch-pack and send-pack may call
git_connect() directly; they each know which name to use.
- for remote helpers (including http), we already have separate
parameters for "name" and "exec" (another name for "prog"). In
process_connect_service(), we feed the "name" to the helper via
"connect" or "stateless-connect" directives.
There's also a "servpath" option, which can be used to tell the
helper about the "exec" path. But no helpers we implement support
it! For http it would be useless anyway (no reasonable server
implementation will allow you to send a shell command to run the
server). In theory it would be useful for more obscure helpers like
remote-ext, but even there it is not implemented.
It's tempting to get rid of it simply to reduce confusion, but we
have publicly documented it since it was added in fa8c097cc9
(Support remote helpers implementing smart transports, 2009-12-09),
so it's possible some helper in the wild is using it.
- So for v2, helpers (again, including http) are mainly used via
stateless-connect, driven by the main program. But they do still
need to decide whether to do a v2 probe. And so there's similar
logic in remote-curl.c's discover_refs() that looks for
"git-receive-pack". But it's not buggy in the same way. Since it
doesn't support servpath, it is always dealing with a "service"
string like "git-receive-pack". And since it doesn't support
straight "connect", it can't be used for "upload-archive".
So we could leave that spot alone. But I've updated it here to match
the logic we're changing in connect_git(). That seems like the least
confusing thing for somebody who has to touch both of these spots
later (say, to add v2 push support). I didn't add a new test to make
sure this doesn't break anything; we already have several tests (in
t5551 and elsewhere) that make sure we are using v2 over http.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-03-18 03:08:51 +08:00
|
|
|
if (version == protocol_v2 && strcmp("git-upload-pack", name))
|
2018-03-16 01:31:31 +08:00
|
|
|
version = protocol_v0;
|
|
|
|
|
2013-11-29 03:49:01 +08:00
|
|
|
/* Without this we cannot rely on waitpid() to tell
|
|
|
|
* what happened to our children.
|
2007-09-01 17:36:31 +08:00
|
|
|
*/
|
2013-11-29 03:49:01 +08:00
|
|
|
signal(SIGCHLD, SIG_DFL);
|
2007-09-01 17:36:31 +08:00
|
|
|
|
2013-11-29 03:50:15 +08:00
|
|
|
protocol = parse_connect_url(url, &hostandport, &path);
|
2015-02-21 23:52:55 +08:00
|
|
|
if ((flags & CONNECT_DIAG_URL) && (protocol != PROTO_SSH)) {
|
2013-11-29 03:49:17 +08:00
|
|
|
printf("Diag: url=%s\n", url ? url : "NULL");
|
|
|
|
printf("Diag: protocol=%s\n", prot_name(protocol));
|
2013-11-29 03:50:15 +08:00
|
|
|
printf("Diag: hostandport=%s\n", hostandport ? hostandport : "NULL");
|
2013-11-29 03:49:17 +08:00
|
|
|
printf("Diag: path=%s\n", path ? path : "NULL");
|
2013-11-29 03:50:15 +08:00
|
|
|
conn = NULL;
|
|
|
|
} else if (protocol == PROTO_GIT) {
|
2018-03-16 01:31:30 +08:00
|
|
|
conn = git_connect_git(fd, hostandport, path, prog, version, flags);
|
2019-02-23 06:25:05 +08:00
|
|
|
conn->trace2_child_class = "transport/git";
|
2013-11-29 03:50:15 +08:00
|
|
|
} else {
|
2017-08-31 01:49:39 +08:00
|
|
|
struct strbuf cmd = STRBUF_INIT;
|
2017-10-17 01:55:28 +08:00
|
|
|
const char *const *var;
|
2017-08-31 01:49:39 +08:00
|
|
|
|
2014-08-20 03:10:48 +08:00
|
|
|
conn = xmalloc(sizeof(*conn));
|
|
|
|
child_process_init(conn);
|
2013-11-29 03:50:15 +08:00
|
|
|
|
2017-07-29 03:28:55 +08:00
|
|
|
if (looks_like_command_line_option(path))
|
2018-07-21 15:49:28 +08:00
|
|
|
die(_("strange pathname '%s' blocked"), path);
|
2017-07-29 03:28:55 +08:00
|
|
|
|
2013-11-29 03:50:15 +08:00
|
|
|
strbuf_addstr(&cmd, prog);
|
|
|
|
strbuf_addch(&cmd, ' ');
|
|
|
|
sq_quote_buf(&cmd, path);
|
|
|
|
|
2015-09-05 06:40:08 +08:00
|
|
|
/* remove repo-local variables from the environment */
|
2017-10-17 01:55:28 +08:00
|
|
|
for (var = local_repo_env; *var; var++)
|
2022-06-02 17:09:50 +08:00
|
|
|
strvec_push(&conn->env, *var);
|
2017-10-17 01:55:28 +08:00
|
|
|
|
2015-09-08 16:33:14 +08:00
|
|
|
conn->use_shell = 1;
|
2013-11-29 03:50:15 +08:00
|
|
|
conn->in = conn->out = -1;
|
|
|
|
if (protocol == PROTO_SSH) {
|
|
|
|
char *ssh_host = hostandport;
|
|
|
|
const char *port = NULL;
|
transport: add a protocol-whitelist environment variable
If we are cloning an untrusted remote repository into a
sandbox, we may also want to fetch remote submodules in
order to get the complete view as intended by the other
side. However, that opens us up to attacks where a malicious
user gets us to clone something they would not otherwise
have access to (this is not necessarily a problem by itself,
but we may then act on the cloned contents in a way that
exposes them to the attacker).
Ideally such a setup would sandbox git entirely away from
high-value items, but this is not always practical or easy
to set up (e.g., OS network controls may block multiple
protocols, and we would want to enable some but not others).
We can help this case by providing a way to restrict
particular protocols. We use a whitelist in the environment.
This is more annoying to set up than a blacklist, but
defaults to safety if the set of protocols git supports
grows). If no whitelist is specified, we continue to default
to allowing all protocols (this is an "unsafe" default, but
since the minority of users will want this sandboxing
effect, it is the only sensible one).
A note on the tests: ideally these would all be in a single
test file, but the git-daemon and httpd test infrastructure
is an all-or-nothing proposition rather than a test-by-test
prerequisite. By putting them all together, we would be
unable to test the file-local code on machines without
apache.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-09-17 01:12:52 +08:00
|
|
|
transport_check_allowed("ssh");
|
2013-11-29 03:50:15 +08:00
|
|
|
get_host_and_port(&ssh_host, &port);
|
|
|
|
|
2015-02-21 23:52:48 +08:00
|
|
|
if (!port)
|
|
|
|
port = get_port(ssh_host);
|
2015-03-06 04:45:44 +08:00
|
|
|
|
2015-02-21 23:52:55 +08:00
|
|
|
if (flags & CONNECT_DIAG_URL) {
|
|
|
|
printf("Diag: url=%s\n", url ? url : "NULL");
|
|
|
|
printf("Diag: protocol=%s\n", prot_name(protocol));
|
|
|
|
printf("Diag: userandhost=%s\n", ssh_host ? ssh_host : "NULL");
|
|
|
|
printf("Diag: port=%s\n", port ? port : "NONE");
|
|
|
|
printf("Diag: path=%s\n", path ? path : "NULL");
|
2013-11-29 03:50:15 +08:00
|
|
|
|
2015-02-21 23:52:55 +08:00
|
|
|
free(hostandport);
|
|
|
|
free(path);
|
2015-03-10 00:58:22 +08:00
|
|
|
free(conn);
|
2017-08-31 01:49:39 +08:00
|
|
|
strbuf_release(&cmd);
|
2015-02-21 23:52:55 +08:00
|
|
|
return NULL;
|
2015-04-27 04:30:10 +08:00
|
|
|
}
|
2019-02-23 06:25:05 +08:00
|
|
|
conn->trace2_child_class = "transport/ssh";
|
2018-03-16 01:31:30 +08:00
|
|
|
fill_ssh_args(conn, ssh_host, port, version, flags);
|
2014-03-13 19:45:31 +08:00
|
|
|
} else {
|
transport: add a protocol-whitelist environment variable
If we are cloning an untrusted remote repository into a
sandbox, we may also want to fetch remote submodules in
order to get the complete view as intended by the other
side. However, that opens us up to attacks where a malicious
user gets us to clone something they would not otherwise
have access to (this is not necessarily a problem by itself,
but we may then act on the cloned contents in a way that
exposes them to the attacker).
Ideally such a setup would sandbox git entirely away from
high-value items, but this is not always practical or easy
to set up (e.g., OS network controls may block multiple
protocols, and we would want to enable some but not others).
We can help this case by providing a way to restrict
particular protocols. We use a whitelist in the environment.
This is more annoying to set up than a blacklist, but
defaults to safety if the set of protocols git supports
grows). If no whitelist is specified, we continue to default
to allowing all protocols (this is an "unsafe" default, but
since the minority of users will want this sandboxing
effect, it is the only sensible one).
A note on the tests: ideally these would all be in a single
test file, but the git-daemon and httpd test infrastructure
is an all-or-nothing proposition rather than a test-by-test
prerequisite. By putting them all together, we would be
unable to test the file-local code on machines without
apache.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-09-17 01:12:52 +08:00
|
|
|
transport_check_allowed("file");
|
2019-02-23 06:25:05 +08:00
|
|
|
conn->trace2_child_class = "transport/file";
|
2018-03-16 01:31:30 +08:00
|
|
|
if (version > 0) {
|
2022-06-02 17:09:50 +08:00
|
|
|
strvec_pushf(&conn->env,
|
strvec: fix indentation in renamed calls
Code which split an argv_array call across multiple lines, like:
argv_array_pushl(&args, "one argument",
"another argument", "and more",
NULL);
was recently mechanically renamed to use strvec, which results in
mis-matched indentation like:
strvec_pushl(&args, "one argument",
"another argument", "and more",
NULL);
Let's fix these up to align the arguments with the opening paren. I did
this manually by sifting through the results of:
git jump grep 'strvec_.*,$'
and liberally applying my editor's auto-format. Most of the changes are
of the form shown above, though I also normalized a few that had
originally used a single-tab indentation (rather than our usual style of
aligning with the open paren). I also rewrapped a couple of obvious
cases (e.g., where previously too-long lines became short enough to fit
on one), but I wasn't aggressive about it. In cases broken to three or
more lines, the grouping of arguments is sometimes meaningful, and it
wasn't worth my time or reviewer time to ponder each case individually.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-07-29 04:26:31 +08:00
|
|
|
GIT_PROTOCOL_ENVIRONMENT "=version=%d",
|
|
|
|
version);
|
2017-10-17 01:55:28 +08:00
|
|
|
}
|
2005-08-03 23:15:42 +08:00
|
|
|
}
|
2020-07-29 04:24:53 +08:00
|
|
|
strvec_push(&conn->args, cmd.buf);
|
2007-10-20 03:47:54 +08:00
|
|
|
|
2013-11-29 03:50:15 +08:00
|
|
|
if (start_command(conn))
|
2018-07-21 15:49:28 +08:00
|
|
|
die(_("unable to fork"));
|
2007-10-20 03:47:54 +08:00
|
|
|
|
2013-11-29 03:50:15 +08:00
|
|
|
fd[0] = conn->out; /* read from child's stdout */
|
|
|
|
fd[1] = conn->in; /* write to child's stdin */
|
|
|
|
strbuf_release(&cmd);
|
|
|
|
}
|
|
|
|
free(hostandport);
|
2013-11-29 03:49:01 +08:00
|
|
|
free(path);
|
2007-10-20 03:47:53 +08:00
|
|
|
return conn;
|
2005-07-05 02:57:58 +08:00
|
|
|
}
|
|
|
|
|
2007-10-20 03:47:53 +08:00
|
|
|
int finish_connect(struct child_process *conn)
|
2005-07-05 02:57:58 +08:00
|
|
|
{
|
2007-10-20 03:47:54 +08:00
|
|
|
int code;
|
2011-05-16 14:52:11 +08:00
|
|
|
if (!conn || git_connection_is_socket(conn))
|
2006-09-12 17:00:13 +08:00
|
|
|
return 0;
|
|
|
|
|
2007-10-20 03:47:54 +08:00
|
|
|
code = finish_command(conn);
|
2007-10-20 03:47:53 +08:00
|
|
|
free(conn);
|
2007-10-20 03:47:54 +08:00
|
|
|
return code;
|
2005-07-05 02:57:58 +08:00
|
|
|
}
|