2007-09-11 11:03:04 +08:00
|
|
|
#ifndef TRANSPORT_H
|
|
|
|
#define TRANSPORT_H
|
|
|
|
|
|
|
|
#include "cache.h"
|
2013-07-09 04:56:53 +08:00
|
|
|
#include "run-command.h"
|
2007-09-11 11:03:04 +08:00
|
|
|
#include "remote.h"
|
2017-12-08 23:58:40 +08:00
|
|
|
#include "list-objects-filter-options.h"
|
fetch-pack: support more than one pack lockfile
Whenever a fetch results in a packfile being downloaded, a .keep file is
generated, so that the packfile can be preserved (from, say, a running
"git repack") until refs are written referring to the contents of the
packfile.
In a subsequent patch, a successful fetch using protocol v2 may result
in more than one .keep file being generated. Therefore, teach
fetch_pack() and the transport mechanism to support multiple .keep
files.
Implementation notes:
- builtin/fetch-pack.c normally does not generate .keep files, and thus
is unaffected by this or future changes. However, it has an
undocumented "--lock-pack" feature, used by remote-curl.c when
implementing the "fetch" remote helper command. In keeping with the
remote helper protocol, only one "lock" line will ever be written;
the rest will result in warnings to stderr. However, in practice,
warnings will never be written because the remote-curl.c "fetch" is
only used for protocol v0/v1 (which will not generate multiple .keep
files). (Protocol v2 uses the "stateless-connect" command, not the
"fetch" command.)
- connected.c has an optimization in that connectivity checks on a ref
need not be done if the target object is in a pack known to be
self-contained and connected. If there are multiple packfiles, this
optimization can no longer be done.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-06-11 04:57:22 +08:00
|
|
|
#include "string-list.h"
|
2016-06-12 18:54:04 +08:00
|
|
|
|
2009-12-09 23:26:30 +08:00
|
|
|
struct git_transport_options {
|
|
|
|
unsigned thin : 1;
|
|
|
|
unsigned keep : 1;
|
|
|
|
unsigned followtags : 1;
|
2013-05-26 09:16:17 +08:00
|
|
|
unsigned check_self_contained_and_connected : 1;
|
|
|
|
unsigned self_contained_and_connected : 1;
|
2013-12-05 21:02:42 +08:00
|
|
|
unsigned update_shallow : 1;
|
2021-04-01 18:46:59 +08:00
|
|
|
unsigned reject_shallow : 1;
|
fetch, upload-pack: --deepen=N extends shallow boundary by N commits
In git-fetch, --depth argument is always relative with the latest
remote refs. This makes it a bit difficult to cover this use case,
where the user wants to make the shallow history, say 3 levels
deeper. It would work if remote refs have not moved yet, but nobody
can guarantee that, especially when that use case is performed a
couple months after the last clone or "git fetch --depth". Also,
modifying shallow boundary using --depth does not work well with
clones created by --since or --not.
This patch fixes that. A new argument --deepen=<N> will add <N> more (*)
parent commits to the current history regardless of where remote refs
are.
Have/Want negotiation is still respected. So if remote refs move, the
server will send two chunks: one between "have" and "want" and another
to extend shallow history. In theory, the client could send no "want"s
in order to get the second chunk only. But the protocol does not allow
that. Either you send no want lines, which means ls-remote; or you
have to send at least one want line that carries deep-relative to the
server..
The main work was done by Dongcan Jiang. I fixed it up here and there.
And of course all the bugs belong to me.
(*) We could even support --deepen=<N> where <N> is negative. In that
case we can cut some history from the shallow clone. This operation
(and --depth=<shorter depth>) does not require interaction with remote
side (and more complicated to implement as a result).
Helped-by: Duy Nguyen <pclouds@gmail.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Dongcan Jiang <dongcan.jiang@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-12 18:54:09 +08:00
|
|
|
unsigned deepen_relative : 1;
|
2022-03-28 22:02:08 +08:00
|
|
|
unsigned refetch : 1;
|
2020-08-18 03:48:18 +08:00
|
|
|
|
|
|
|
/* see documentation of corresponding flag in fetch-pack.h */
|
introduce fetch-object: fetch one promisor object
Introduce fetch-object, providing the ability to fetch one object from a
promisor remote.
This uses fetch-pack. To do this, the transport mechanism has been
updated with 2 flags, "from-promisor" to indicate that the resulting
pack comes from a promisor remote (and thus should be annotated as such
by index-pack), and "no-dependents" to indicate that only the objects
themselves need to be fetched (but fetching additional objects is
nevertheless safe).
Whenever "no-dependents" is used, fetch-pack will refrain from using any
object flags, because it is most likely invoked as part of a dynamic
object fetch by another Git command (which may itself use object flags).
An alternative to this is to leave fetch-pack alone, and instead update
the allocation of flags so that fetch-pack's flags never overlap with
any others, but this will end up shrinking the number of flags available
to nearly every other Git command (that is, every Git command that
accesses objects), so the approach in this commit was used instead.
This will be tested in a subsequent commit.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-12-06 00:58:49 +08:00
|
|
|
unsigned from_promisor : 1;
|
2020-08-18 03:48:18 +08:00
|
|
|
|
fetch-pack: write shallow, then check connectivity
When fetching, connectivity is checked after the shallow file is
updated. There are 2 issues with this: (1) the connectivity check is
only performed up to ancestors of existing refs (which is not thorough
enough if we were deepening an existing ref in the first place), and (2)
there is no rollback of the shallow file if the connectivity check
fails.
To solve (1), update the connectivity check to check the ancestry chain
completely in the case of a deepening fetch by refraining from passing
"--not --all" when invoking rev-list in connected.c.
To solve (2), have fetch_pack() perform its own connectivity check
before updating the shallow file. To support existing use cases in which
"git fetch-pack" is used to download objects without much regard as to
the connectivity of the resulting objects with respect to the existing
repository, the connectivity check is only done if necessary (that is,
the fetch is not a clone, and the fetch involves shallow/deepen
functionality). "git fetch" still performs its own connectivity check,
preserving correctness but sometimes performing redundant work. This
redundancy is mitigated by the fact that fetch_pack() reports if it has
performed a connectivity check itself, and if the transport supports
connect or stateless-connect, it will bubble up that report so that "git
fetch" knows not to perform the connectivity check in such a case.
This was noticed when a user tried to deepen an existing repository by
fetching with --no-shallow from a server that did not send all necessary
objects - the connectivity check as run by "git fetch" succeeded, but a
subsequent "git fsck" failed.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-03 06:08:43 +08:00
|
|
|
/*
|
|
|
|
* If this transport supports connect or stateless-connect,
|
|
|
|
* the corresponding field in struct fetch_pack_args is copied
|
|
|
|
* here after fetching.
|
|
|
|
*
|
|
|
|
* See the definition of connectivity_checked in struct
|
|
|
|
* fetch_pack_args for more information.
|
|
|
|
*/
|
|
|
|
unsigned connectivity_checked:1;
|
|
|
|
|
2009-12-09 23:26:30 +08:00
|
|
|
int depth;
|
2016-06-12 18:53:59 +08:00
|
|
|
const char *deepen_since;
|
2016-06-12 18:54:04 +08:00
|
|
|
const struct string_list *deepen_not;
|
2009-12-09 23:26:30 +08:00
|
|
|
const char *uploadpack;
|
|
|
|
const char *receivepack;
|
2013-07-10 02:01:06 +08:00
|
|
|
struct push_cas_option *cas;
|
2017-12-08 23:58:40 +08:00
|
|
|
struct list_objects_filter_options filter_options;
|
2018-07-03 06:39:44 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* This is only used during fetch. See the documentation of
|
|
|
|
* negotiation_tips in struct fetch_pack_args.
|
|
|
|
*
|
|
|
|
* This field is only supported by transports that support connect or
|
|
|
|
* stateless_connect. Set this field directly instead of using
|
|
|
|
* transport_set_option().
|
|
|
|
*/
|
|
|
|
struct oid_array *negotiation_tips;
|
fetch: teach independent negotiation (no packfile)
Currently, the packfile negotiation step within a Git fetch cannot be
done independent of sending the packfile, even though there is at least
one application wherein this is useful. Therefore, make it possible for
this negotiation step to be done independently. A subsequent commit will
use this for one such application - push negotiation.
This feature is for protocol v2 only. (An implementation for protocol v0
would require a separate implementation in the fetch, transport, and
transport helper code.)
In the protocol, the main hindrance towards independent negotiation is
that the server can unilaterally decide to send the packfile. This is
solved by a "wait-for-done" argument: the server will then wait for the
client to say "done". In practice, the client will never say it; instead
it will cease requests once it is satisfied.
In the client, the main change lies in the transport and transport
helper code. fetch_refs_via_pack() performs everything needed - protocol
version and capability checks, and the negotiation itself.
There are 2 code paths that do not go through fetch_refs_via_pack() that
needed to be individually excluded: the bundle transport (excluded
through requiring smart_options, which the bundle transport doesn't
support) and transport helpers that do not support takeover. If or when
we support independent negotiation for protocol v0, we will need to
modify these 2 code paths to support it. But for now, report failure if
independent negotiation is requested in these cases.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-05-05 05:16:01 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If allocated, whenever transport_fetch_refs() is called, add known
|
|
|
|
* common commits to this oidset instead of fetching any packfiles.
|
|
|
|
*/
|
|
|
|
struct oidset *acked_commits;
|
2009-12-09 23:26:30 +08:00
|
|
|
};
|
|
|
|
|
2016-02-03 12:09:14 +08:00
|
|
|
enum transport_family {
|
|
|
|
TRANSPORT_FAMILY_ALL = 0,
|
|
|
|
TRANSPORT_FAMILY_IPV4,
|
|
|
|
TRANSPORT_FAMILY_IPV6
|
|
|
|
};
|
|
|
|
|
2007-09-11 11:03:04 +08:00
|
|
|
struct transport {
|
2017-12-15 05:44:45 +08:00
|
|
|
const struct transport_vtable *vtable;
|
|
|
|
|
2007-09-11 11:03:04 +08:00
|
|
|
struct remote *remote;
|
|
|
|
const char *url;
|
|
|
|
void *data;
|
2007-10-30 09:05:40 +08:00
|
|
|
const struct ref *remote_refs;
|
2007-09-11 11:03:04 +08:00
|
|
|
|
2010-02-16 15:18:21 +08:00
|
|
|
/**
|
|
|
|
* Indicates whether we already called get_refs_list(); set by
|
|
|
|
* transport.c::transport_get_remote_refs().
|
|
|
|
*/
|
|
|
|
unsigned got_remote_refs : 1;
|
|
|
|
|
fetch: work around "transport-take-over" hack
A Git-aware "connect" transport allows the "transport_take_over" to
redirect generic transport requests like fetch(), push_refs() and
get_refs_list() to the native Git transport handling methods. The
take-over process replaces transport->data with a fake data that
these method implementations understand.
While this hack works OK for a single request, it breaks when the
transport needs to make more than one requests. transport->data
that used to hold necessary information for the specific helper to
work correctly is destroyed during the take-over process.
One codepath that this matters is "git fetch" in auto-follow mode;
when it does not get all the tags that ought to point at the history
it got (which can be determined by looking at the peeled tags in the
initial advertisement) from the primary transfer, it internally
makes a second request to complete the fetch. Because "take-over"
hack has already destroyed the data necessary to talk to the
transport helper by the time this happens, the second request cannot
make a request to the helper to make another connection to fetch
these additional tags.
Mark such a transport as "cannot_reuse", and use a separate
transport to perform the backfill fetch in order to work around
this breakage.
Note that this problem does not manifest itself when running t5802,
because our upload-pack gives you all the necessary auto-followed
tags during the primary transfer. You would need to step through
"git fetch" in a debugger, stop immediately after the primary
transfer finishes and writes these auto-followed tags, remove the
tag references and repack/prune the repository to convince the
"find-non-local-tags" procedure that the primary transfer failed to
give us all the necessary tags, and then let it continue, in order
to trigger the bug in the secondary transfer this patch fixes.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-08 06:47:18 +08:00
|
|
|
/*
|
|
|
|
* Transports that call take-over destroys the data specific to
|
|
|
|
* the transport type while doing so, and cannot be reused.
|
|
|
|
*/
|
|
|
|
unsigned cannot_reuse : 1;
|
|
|
|
|
2013-12-05 21:02:39 +08:00
|
|
|
/*
|
|
|
|
* A hint from caller that it will be performing a clone, not
|
|
|
|
* normal fetch. IOW the repository is guaranteed empty.
|
|
|
|
*/
|
|
|
|
unsigned cloning : 1;
|
|
|
|
|
2018-03-16 01:31:34 +08:00
|
|
|
/*
|
|
|
|
* Indicates that the transport is connected via a half-duplex
|
|
|
|
* connection and should operate in stateless-rpc mode.
|
|
|
|
*/
|
|
|
|
unsigned stateless_rpc : 1;
|
|
|
|
|
2016-07-15 05:49:47 +08:00
|
|
|
/*
|
|
|
|
* These strings will be passed to the {pre, post}-receive hook,
|
|
|
|
* on the remote side, if both sides support the push options capability.
|
|
|
|
*/
|
|
|
|
const struct string_list *push_options;
|
|
|
|
|
2018-04-24 06:46:23 +08:00
|
|
|
/*
|
|
|
|
* These strings will be passed to the remote side on each command
|
|
|
|
* request, if both sides support the server-option capability.
|
|
|
|
*/
|
|
|
|
const struct string_list *server_options;
|
|
|
|
|
fetch-pack: support more than one pack lockfile
Whenever a fetch results in a packfile being downloaded, a .keep file is
generated, so that the packfile can be preserved (from, say, a running
"git repack") until refs are written referring to the contents of the
packfile.
In a subsequent patch, a successful fetch using protocol v2 may result
in more than one .keep file being generated. Therefore, teach
fetch_pack() and the transport mechanism to support multiple .keep
files.
Implementation notes:
- builtin/fetch-pack.c normally does not generate .keep files, and thus
is unaffected by this or future changes. However, it has an
undocumented "--lock-pack" feature, used by remote-curl.c when
implementing the "fetch" remote helper command. In keeping with the
remote helper protocol, only one "lock" line will ever be written;
the rest will result in warnings to stderr. However, in practice,
warnings will never be written because the remote-curl.c "fetch" is
only used for protocol v0/v1 (which will not generate multiple .keep
files). (Protocol v2 uses the "stateless-connect" command, not the
"fetch" command.)
- connected.c has an optimization in that connectivity checks on a ref
need not be done if the target object is in a pack known to be
self-contained and connected. If there are multiple packfiles, this
optimization can no longer be done.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2020-06-11 04:57:22 +08:00
|
|
|
struct string_list pack_lockfiles;
|
|
|
|
|
2009-10-31 08:47:27 +08:00
|
|
|
signed verbose : 3;
|
2010-02-24 20:50:26 +08:00
|
|
|
/**
|
|
|
|
* Transports should not set this directly, and should use this
|
|
|
|
* value without having to check isatty(2), -q/--quiet
|
|
|
|
* (transport->verbose < 0), etc. - checking has already been done
|
|
|
|
* in transport_set_verbosity().
|
|
|
|
**/
|
2008-10-09 07:40:32 +08:00
|
|
|
unsigned progress : 1;
|
2009-12-09 23:26:30 +08:00
|
|
|
/*
|
|
|
|
* If transport is at least potentially smart, this points to
|
|
|
|
* git_transport_options structure to use in case transport
|
|
|
|
* actually turns out to be smart.
|
|
|
|
*/
|
|
|
|
struct git_transport_options *smart_options;
|
2016-02-03 12:09:14 +08:00
|
|
|
|
|
|
|
enum transport_family family;
|
2020-05-26 03:58:55 +08:00
|
|
|
|
|
|
|
const struct git_hash_algo *hash_algo;
|
2007-09-11 11:03:04 +08:00
|
|
|
};
|
|
|
|
|
2016-12-20 02:25:31 +08:00
|
|
|
#define TRANSPORT_PUSH_ALL (1<<0)
|
|
|
|
#define TRANSPORT_PUSH_FORCE (1<<1)
|
|
|
|
#define TRANSPORT_PUSH_DRY_RUN (1<<2)
|
|
|
|
#define TRANSPORT_PUSH_MIRROR (1<<3)
|
|
|
|
#define TRANSPORT_PUSH_PORCELAIN (1<<4)
|
|
|
|
#define TRANSPORT_PUSH_SET_UPSTREAM (1<<5)
|
|
|
|
#define TRANSPORT_RECURSE_SUBMODULES_CHECK (1<<6)
|
|
|
|
#define TRANSPORT_PUSH_PRUNE (1<<7)
|
|
|
|
#define TRANSPORT_RECURSE_SUBMODULES_ON_DEMAND (1<<8)
|
|
|
|
#define TRANSPORT_PUSH_NO_HOOK (1<<9)
|
|
|
|
#define TRANSPORT_PUSH_FOLLOW_TAGS (1<<10)
|
|
|
|
#define TRANSPORT_PUSH_CERT_ALWAYS (1<<11)
|
|
|
|
#define TRANSPORT_PUSH_CERT_IF_ASKED (1<<12)
|
|
|
|
#define TRANSPORT_PUSH_ATOMIC (1<<13)
|
|
|
|
#define TRANSPORT_PUSH_OPTIONS (1<<14)
|
2016-12-20 02:25:33 +08:00
|
|
|
#define TRANSPORT_RECURSE_SUBMODULES_ONLY (1<<15)
|
2020-10-03 20:10:45 +08:00
|
|
|
#define TRANSPORT_PUSH_FORCE_IF_INCLUDES (1<<16)
|
2007-09-19 12:49:31 +08:00
|
|
|
|
2019-04-29 16:28:14 +08:00
|
|
|
int transport_summary_width(const struct ref *refs);
|
2007-09-19 12:49:31 +08:00
|
|
|
|
2007-09-11 11:03:04 +08:00
|
|
|
/* Returns a transport suitable for the url */
|
2007-09-15 15:23:14 +08:00
|
|
|
struct transport *transport_get(struct remote *, const char *);
|
2007-09-11 11:03:04 +08:00
|
|
|
|
2015-09-23 06:03:49 +08:00
|
|
|
/*
|
2016-12-15 06:39:54 +08:00
|
|
|
* Check whether a transport is allowed by the environment.
|
|
|
|
*
|
|
|
|
* Type should generally be the URL scheme, as described in
|
|
|
|
* Documentation/git.txt
|
|
|
|
*
|
|
|
|
* from_user specifies if the transport was given by the user. If unknown pass
|
|
|
|
* a -1 to read from the environment to determine if the transport was given by
|
|
|
|
* the user.
|
|
|
|
*
|
2015-09-23 06:03:49 +08:00
|
|
|
*/
|
2016-12-15 06:39:54 +08:00
|
|
|
int is_transport_allowed(const char *type, int from_user);
|
2015-09-23 06:03:49 +08:00
|
|
|
|
transport: add a protocol-whitelist environment variable
If we are cloning an untrusted remote repository into a
sandbox, we may also want to fetch remote submodules in
order to get the complete view as intended by the other
side. However, that opens us up to attacks where a malicious
user gets us to clone something they would not otherwise
have access to (this is not necessarily a problem by itself,
but we may then act on the cloned contents in a way that
exposes them to the attacker).
Ideally such a setup would sandbox git entirely away from
high-value items, but this is not always practical or easy
to set up (e.g., OS network controls may block multiple
protocols, and we would want to enable some but not others).
We can help this case by providing a way to restrict
particular protocols. We use a whitelist in the environment.
This is more annoying to set up than a blacklist, but
defaults to safety if the set of protocols git supports
grows). If no whitelist is specified, we continue to default
to allowing all protocols (this is an "unsafe" default, but
since the minority of users will want this sandboxing
effect, it is the only sensible one).
A note on the tests: ideally these would all be in a single
test file, but the git-daemon and httpd test infrastructure
is an all-or-nothing proposition rather than a test-by-test
prerequisite. By putting them all together, we would be
unable to test the file-local code on machines without
apache.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-09-17 01:12:52 +08:00
|
|
|
/*
|
|
|
|
* Check whether a transport is allowed by the environment,
|
2015-09-23 06:03:49 +08:00
|
|
|
* and die otherwise.
|
transport: add a protocol-whitelist environment variable
If we are cloning an untrusted remote repository into a
sandbox, we may also want to fetch remote submodules in
order to get the complete view as intended by the other
side. However, that opens us up to attacks where a malicious
user gets us to clone something they would not otherwise
have access to (this is not necessarily a problem by itself,
but we may then act on the cloned contents in a way that
exposes them to the attacker).
Ideally such a setup would sandbox git entirely away from
high-value items, but this is not always practical or easy
to set up (e.g., OS network controls may block multiple
protocols, and we would want to enable some but not others).
We can help this case by providing a way to restrict
particular protocols. We use a whitelist in the environment.
This is more annoying to set up than a blacklist, but
defaults to safety if the set of protocols git supports
grows). If no whitelist is specified, we continue to default
to allowing all protocols (this is an "unsafe" default, but
since the minority of users will want this sandboxing
effect, it is the only sensible one).
A note on the tests: ideally these would all be in a single
test file, but the git-daemon and httpd test infrastructure
is an all-or-nothing proposition rather than a test-by-test
prerequisite. By putting them all together, we would be
unable to test the file-local code on machines without
apache.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-09-17 01:12:52 +08:00
|
|
|
*/
|
|
|
|
void transport_check_allowed(const char *type);
|
|
|
|
|
2007-09-11 11:03:04 +08:00
|
|
|
/* Transport options which apply to git:// and scp-style URLs */
|
|
|
|
|
2007-09-11 11:03:11 +08:00
|
|
|
/* The program to use on the remote side to send a pack */
|
|
|
|
#define TRANS_OPT_UPLOADPACK "uploadpack"
|
|
|
|
|
2007-09-11 11:03:04 +08:00
|
|
|
/* The program to use on the remote side to receive a pack */
|
|
|
|
#define TRANS_OPT_RECEIVEPACK "receivepack"
|
|
|
|
|
|
|
|
/* Transfer the data as a thin pack if not null */
|
|
|
|
#define TRANS_OPT_THIN "thin"
|
|
|
|
|
2013-07-10 02:01:06 +08:00
|
|
|
/* Check the current value of the remote ref */
|
|
|
|
#define TRANS_OPT_CAS "cas"
|
|
|
|
|
2007-09-11 11:03:11 +08:00
|
|
|
/* Keep the pack that was transferred if not null */
|
|
|
|
#define TRANS_OPT_KEEP "keep"
|
|
|
|
|
|
|
|
/* Limit the depth of the fetch if not null */
|
|
|
|
#define TRANS_OPT_DEPTH "depth"
|
|
|
|
|
2016-06-12 18:53:59 +08:00
|
|
|
/* Limit the depth of the fetch based on time if not null */
|
|
|
|
#define TRANS_OPT_DEEPEN_SINCE "deepen-since"
|
|
|
|
|
2016-06-12 18:54:04 +08:00
|
|
|
/* Limit the depth of the fetch based on revs if not null */
|
|
|
|
#define TRANS_OPT_DEEPEN_NOT "deepen-not"
|
|
|
|
|
fetch, upload-pack: --deepen=N extends shallow boundary by N commits
In git-fetch, --depth argument is always relative with the latest
remote refs. This makes it a bit difficult to cover this use case,
where the user wants to make the shallow history, say 3 levels
deeper. It would work if remote refs have not moved yet, but nobody
can guarantee that, especially when that use case is performed a
couple months after the last clone or "git fetch --depth". Also,
modifying shallow boundary using --depth does not work well with
clones created by --since or --not.
This patch fixes that. A new argument --deepen=<N> will add <N> more (*)
parent commits to the current history regardless of where remote refs
are.
Have/Want negotiation is still respected. So if remote refs move, the
server will send two chunks: one between "have" and "want" and another
to extend shallow history. In theory, the client could send no "want"s
in order to get the second chunk only. But the protocol does not allow
that. Either you send no want lines, which means ls-remote; or you
have to send at least one want line that carries deep-relative to the
server..
The main work was done by Dongcan Jiang. I fixed it up here and there.
And of course all the bugs belong to me.
(*) We could even support --deepen=<N> where <N> is negative. In that
case we can cut some history from the shallow clone. This operation
(and --depth=<shorter depth>) does not require interaction with remote
side (and more complicated to implement as a result).
Helped-by: Duy Nguyen <pclouds@gmail.com>
Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Dongcan Jiang <dongcan.jiang@gmail.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2016-06-12 18:54:09 +08:00
|
|
|
/* Limit the deepen of the fetch if not null */
|
|
|
|
#define TRANS_OPT_DEEPEN_RELATIVE "deepen-relative"
|
|
|
|
|
2008-03-04 11:27:40 +08:00
|
|
|
/* Aggressively fetch annotated tags if possible */
|
|
|
|
#define TRANS_OPT_FOLLOWTAGS "followtags"
|
|
|
|
|
2021-04-01 18:46:59 +08:00
|
|
|
/* Reject shallow repo transport */
|
|
|
|
#define TRANS_OPT_REJECT_SHALLOW "rejectshallow"
|
|
|
|
|
2013-12-05 21:02:42 +08:00
|
|
|
/* Accept refs that may update .git/shallow without --depth */
|
|
|
|
#define TRANS_OPT_UPDATE_SHALLOW "updateshallow"
|
|
|
|
|
push: the beginning of "git push --signed"
While signed tags and commits assert that the objects thusly signed
came from you, who signed these objects, there is not a good way to
assert that you wanted to have a particular object at the tip of a
particular branch. My signing v2.0.1 tag only means I want to call
the version v2.0.1, and it does not mean I want to push it out to my
'master' branch---it is likely that I only want it in 'maint', so
the signature on the object alone is insufficient.
The only assurance to you that 'maint' points at what I wanted to
place there comes from your trust on the hosting site and my
authentication with it, which cannot easily audited later.
Introduce a mechanism that allows you to sign a "push certificate"
(for the lack of better name) every time you push, asserting that
what object you are pushing to update which ref that used to point
at what other object. Think of it as a cryptographic protection for
ref updates, similar to signed tags/commits but working on an
orthogonal axis.
The basic flow based on this mechanism goes like this:
1. You push out your work with "git push --signed".
2. The sending side learns where the remote refs are as usual,
together with what protocol extension the receiving end
supports. If the receiving end does not advertise the protocol
extension "push-cert", an attempt to "git push --signed" fails.
Otherwise, a text file, that looks like the following, is
prepared in core:
certificate version 0.1
pusher Junio C Hamano <gitster@pobox.com> 1315427886 -0700
7339ca65... 21580ecb... refs/heads/master
3793ac56... 12850bec... refs/heads/next
The file begins with a few header lines, which may grow as we
gain more experience. The 'pusher' header records the name of
the signer (the value of user.signingkey configuration variable,
falling back to GIT_COMMITTER_{NAME|EMAIL}) and the time of the
certificate generation. After the header, a blank line follows,
followed by a copy of the protocol message lines.
Each line shows the old and the new object name at the tip of
the ref this push tries to update, in the way identical to how
the underlying "git push" protocol exchange tells the ref
updates to the receiving end (by recording the "old" object
name, the push certificate also protects against replaying). It
is expected that new command packet types other than the
old-new-refname kind will be included in push certificate in the
same way as would appear in the plain vanilla command packets in
unsigned pushes.
The user then is asked to sign this push certificate using GPG,
formatted in a way similar to how signed tag objects are signed,
and the result is sent to the other side (i.e. receive-pack).
In the protocol exchange, this step comes immediately before the
sender tells what the result of the push should be, which in
turn comes before it sends the pack data.
3. When the receiving end sees a push certificate, the certificate
is written out as a blob. The pre-receive hook can learn about
the certificate by checking GIT_PUSH_CERT environment variable,
which, if present, tells the object name of this blob, and make
the decision to allow or reject this push. Additionally, the
post-receive hook can also look at the certificate, which may be
a good place to log all the received certificates for later
audits.
Because a push certificate carry the same information as the usual
command packets in the protocol exchange, we can omit the latter
when a push certificate is in use and reduce the protocol overhead.
This however is not included in this patch to make it easier to
review (in other words, the series at this step should never be
released without the remainder of the series, as it implements an
interim protocol that will be incompatible with the final one).
As such, the documentation update for the protocol is left out of
this step.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-09-13 02:17:07 +08:00
|
|
|
/* Send push certificates */
|
|
|
|
#define TRANS_OPT_PUSH_CERT "pushcert"
|
|
|
|
|
introduce fetch-object: fetch one promisor object
Introduce fetch-object, providing the ability to fetch one object from a
promisor remote.
This uses fetch-pack. To do this, the transport mechanism has been
updated with 2 flags, "from-promisor" to indicate that the resulting
pack comes from a promisor remote (and thus should be annotated as such
by index-pack), and "no-dependents" to indicate that only the objects
themselves need to be fetched (but fetching additional objects is
nevertheless safe).
Whenever "no-dependents" is used, fetch-pack will refrain from using any
object flags, because it is most likely invoked as part of a dynamic
object fetch by another Git command (which may itself use object flags).
An alternative to this is to leave fetch-pack alone, and instead update
the allocation of flags so that fetch-pack's flags never overlap with
any others, but this will end up shrinking the number of flags available
to nearly every other Git command (that is, every Git command that
accesses objects), so the approach in this commit was used instead.
This will be tested in a subsequent commit.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-12-06 00:58:49 +08:00
|
|
|
/* Indicate that these objects are being fetched by a promisor */
|
|
|
|
#define TRANS_OPT_FROM_PROMISOR "from-promisor"
|
|
|
|
|
2017-12-08 23:58:40 +08:00
|
|
|
/* Filter objects for partial clone and fetch */
|
|
|
|
#define TRANS_OPT_LIST_OBJECTS_FILTER "filter"
|
|
|
|
|
2022-03-28 22:02:08 +08:00
|
|
|
/* Refetch all objects without negotiating */
|
|
|
|
#define TRANS_OPT_REFETCH "refetch"
|
|
|
|
|
remote-curl: pass on atomic capability to remote side
When pushing more than one reference with the --atomic option, the
server is supposed to perform a single atomic transaction to update the
references, leaving them either all to succeed or all to fail. This
works fine when pushing locally or over SSH, but when pushing over HTTP,
we fail to pass the atomic capability to the remote side. In fact, we
have not reported this capability to any remote helpers during the life
of the feature.
Now normally, things happen to work nevertheless, since we actually
check for most types of failures, such as non-fast-forward updates, on
the client side, and just abort the entire attempt. However, if the
server side reports a problem, such as the inability to lock a ref, the
transaction isn't atomic, because we haven't passed the appropriate
capability over and the remote side has no way of knowing that we wanted
atomic behavior.
Fix this by passing the option from the transport code through to remote
helpers, and from the HTTP remote helper down to send-pack. With this
change, we can detect if the server side rejects the push and report
back appropriately. Note the difference in the messages: the remote
side reports "atomic transaction failed", while our own checking rejects
pushes with the message "atomic push failed".
Document the atomic option in the remote helper documentation, so other
implementers can implement it if they like.
Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2019-10-17 07:45:34 +08:00
|
|
|
/* Request atomic (all-or-nothing) updates when pushing */
|
|
|
|
#define TRANS_OPT_ATOMIC "atomic"
|
|
|
|
|
2020-10-03 20:10:45 +08:00
|
|
|
/* Require remote changes to be integrated locally. */
|
|
|
|
#define TRANS_OPT_FORCE_IF_INCLUDES "force-if-includes"
|
|
|
|
|
2007-09-11 11:03:04 +08:00
|
|
|
/**
|
|
|
|
* Returns 0 if the option was used, non-zero otherwise. Prints a
|
|
|
|
* message to stderr if the option is not used.
|
|
|
|
**/
|
|
|
|
int transport_set_option(struct transport *transport, const char *name,
|
|
|
|
const char *value);
|
2010-02-24 20:50:26 +08:00
|
|
|
void transport_set_verbosity(struct transport *transport, int verbosity,
|
|
|
|
int force_progress);
|
2007-09-11 11:03:04 +08:00
|
|
|
|
2020-10-03 20:10:45 +08:00
|
|
|
#define REJECT_NON_FF_HEAD 0x01
|
|
|
|
#define REJECT_NON_FF_OTHER 0x02
|
|
|
|
#define REJECT_ALREADY_EXISTS 0x04
|
|
|
|
#define REJECT_FETCH_FIRST 0x08
|
|
|
|
#define REJECT_NEEDS_FORCE 0x10
|
|
|
|
#define REJECT_REF_NEEDS_UPDATE 0x20
|
2012-11-30 09:41:33 +08:00
|
|
|
|
2018-11-10 13:48:55 +08:00
|
|
|
int transport_push(struct repository *repo,
|
|
|
|
struct transport *connection,
|
2018-05-17 06:58:17 +08:00
|
|
|
struct refspec *rs, int flags,
|
2012-11-30 09:41:33 +08:00
|
|
|
unsigned int * reject_reasons);
|
2007-09-11 11:03:04 +08:00
|
|
|
|
2021-02-06 04:48:48 +08:00
|
|
|
struct transport_ls_refs_options {
|
|
|
|
/*
|
|
|
|
* Optionally, a list of ref prefixes can be provided which can be sent
|
|
|
|
* to the server (when communicating using protocol v2) to enable it to
|
|
|
|
* limit the ref advertisement. Since ref filtering is done on the
|
|
|
|
* server's end (and only when using protocol v2),
|
|
|
|
* transport_get_remote_refs() could return refs which don't match the
|
|
|
|
* provided ref_prefixes.
|
|
|
|
*/
|
|
|
|
struct strvec ref_prefixes;
|
2021-02-06 04:48:49 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If unborn_head_target is not NULL, and the remote reports HEAD as
|
|
|
|
* pointing to an unborn branch, transport_get_remote_refs() stores the
|
2022-02-05 08:08:14 +08:00
|
|
|
* unborn branch in unborn_head_target.
|
2021-02-06 04:48:49 +08:00
|
|
|
*/
|
2022-02-05 08:08:14 +08:00
|
|
|
const char *unborn_head_target;
|
2021-02-06 04:48:48 +08:00
|
|
|
};
|
2021-09-27 20:54:27 +08:00
|
|
|
#define TRANSPORT_LS_REFS_OPTIONS_INIT { \
|
|
|
|
.ref_prefixes = STRVEC_INIT, \
|
|
|
|
}
|
2021-02-06 04:48:48 +08:00
|
|
|
|
2022-02-05 08:08:14 +08:00
|
|
|
/**
|
|
|
|
* Release the "struct transport_ls_refs_options".
|
|
|
|
*/
|
|
|
|
void transport_ls_refs_options_release(struct transport_ls_refs_options *opts);
|
|
|
|
|
2018-03-16 01:31:23 +08:00
|
|
|
/*
|
|
|
|
* Retrieve refs from a remote.
|
|
|
|
*/
|
|
|
|
const struct ref *transport_get_remote_refs(struct transport *transport,
|
2021-02-06 04:48:48 +08:00
|
|
|
struct transport_ls_refs_options *transport_options);
|
2007-09-11 11:03:11 +08:00
|
|
|
|
2020-05-26 03:58:55 +08:00
|
|
|
/*
|
|
|
|
* Fetch the hash algorithm used by a remote.
|
|
|
|
*
|
|
|
|
* This can only be called after fetching the remote refs.
|
|
|
|
*/
|
|
|
|
const struct git_hash_algo *transport_get_hash_algo(struct transport *transport);
|
fetch-pack: unify ref in and out param
When a user fetches:
- at least one up-to-date ref and at least one non-up-to-date ref,
- using HTTP with protocol v0 (or something else that uses the fetch
command of a remote helper)
some refs might not be updated after the fetch.
This bug was introduced in commit 989b8c4452 ("fetch-pack: put shallow
info in output parameter", 2018-06-28) which allowed transports to
report the refs that they have fetched in a new out-parameter
"fetched_refs". If they do so, transport_fetch_refs() makes this
information available to its caller.
Users of "fetched_refs" rely on the following 3 properties:
(1) it is the complete list of refs that was passed to
transport_fetch_refs(),
(2) it has shallow information (REF_STATUS_REJECT_SHALLOW set if
relevant), and
(3) it has updated OIDs if ref-in-want was used (introduced after
989b8c4452).
In an effort to satisfy (1), whenever transport_fetch_refs()
filters the refs sent to the transport, it re-adds the filtered refs to
whatever the transport supplies before returning it to the user.
However, the implementation in 989b8c4452 unconditionally re-adds the
filtered refs without checking if the transport refrained from reporting
anything in "fetched_refs" (which it is allowed to do), resulting in an
incomplete list, no longer satisfying (1).
An earlier effort to resolve this [1] solved the issue by readding the
filtered refs only if the transport did not refrain from reporting in
"fetched_refs", but after further discussion, it seems that the better
solution is to revert the API change that introduced "fetched_refs".
This API change was first suggested as part of a ref-in-want
implementation that allowed for ref patterns and, thus, there could be
drastic differences between the input refs and the refs actually fetched
[2]; we eventually decided to only allow exact ref names, but this API
change remained even though its necessity was decreased.
Therefore, revert this API change by reverting commit 989b8c4452, and
make receive_wanted_refs() update the OIDs in the sought array (like how
update_shallow() updates shallow information in the sought array)
instead. A test is also included to show that the user-visible bug
discussed at the beginning of this commit message no longer exists.
[1] https://public-inbox.org/git/20180801171806.GA122458@google.com/
[2] https://public-inbox.org/git/86a128c5fb710a41791e7183207c4d64889f9307.1485381677.git.jonathantanmy@google.com/
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-08-02 04:13:20 +08:00
|
|
|
int transport_fetch_refs(struct transport *transport, struct ref *refs);
|
fetch: fix deadlock when cleaning up lockfiles in async signals
When fetching packfiles, we write a bunch of lockfiles for the packfiles
we're writing into the repository. In order to not leave behind any
cruft in case we exit or receive a signal, we register both an exit
handler as well as signal handlers for common signals like SIGINT. These
handlers will then unlink the locks and free the data structure tracking
them. We have observed a deadlock in this logic though:
(gdb) bt
#0 __lll_lock_wait_private () at ../sysdeps/unix/sysv/linux/x86_64/lowlevellock.S:95
#1 0x00007f4932bea2cd in _int_free (av=0x7f4932f2eb20 <main_arena>, p=0x3e3e4200, have_lock=0) at malloc.c:3969
#2 0x00007f4932bee58c in __GI___libc_free (mem=<optimized out>) at malloc.c:2975
#3 0x0000000000662ab1 in string_list_clear ()
#4 0x000000000044f5bc in unlock_pack_on_signal ()
#5 <signal handler called>
#6 _int_free (av=0x7f4932f2eb20 <main_arena>, p=<optimized out>, have_lock=0) at malloc.c:4024
#7 0x00007f4932bee58c in __GI___libc_free (mem=<optimized out>) at malloc.c:2975
#8 0x000000000065afd5 in strbuf_release ()
#9 0x000000000066ddb9 in delete_tempfile ()
#10 0x0000000000610d0b in files_transaction_cleanup.isra ()
#11 0x0000000000611718 in files_transaction_abort ()
#12 0x000000000060d2ef in ref_transaction_abort ()
#13 0x000000000060d441 in ref_transaction_prepare ()
#14 0x000000000060e0b5 in ref_transaction_commit ()
#15 0x00000000004511c2 in fetch_and_consume_refs ()
#16 0x000000000045279a in cmd_fetch ()
#17 0x0000000000407c48 in handle_builtin ()
#18 0x0000000000408df2 in cmd_main ()
#19 0x00000000004078b5 in main ()
The process was killed with a signal, which caused the signal handler to
kick in and try free the data structures after we have unlinked the
locks. It then deadlocks while calling free(3P).
The root cause of this is that it is not allowed to call certain
functions in async-signal handlers, as specified by signal-safety(7).
Next to most I/O functions, this list of disallowed functions also
includes memory-handling functions like malloc(3P) and free(3P) because
they may not be reentrant. As a result, if we execute such functions in
the signal handler, then they may operate on inconistent state and fail
in unexpected ways.
Fix this bug by not calling non-async-signal-safe functions when running
in the signal handler. We're about to re-raise the signal anyway and
will thus exit, so it's not much of a problem to keep the string list of
lockfiles untouched. Note that it's fine though to call unlink(2), so
we'll still clean up the lockfiles correctly.
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Reviewed-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-01-07 18:55:47 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* If this flag is set, unlocking will avoid to call non-async-signal-safe
|
|
|
|
* functions. This will necessarily leave behind some data structures which
|
|
|
|
* cannot be cleaned up.
|
|
|
|
*/
|
|
|
|
#define TRANSPORT_UNLOCK_PACK_IN_SIGNAL_HANDLER (1 << 0)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Unlock all packfiles locked by the transport.
|
|
|
|
*/
|
|
|
|
void transport_unlock_pack(struct transport *transport, unsigned int flags);
|
|
|
|
|
2007-09-11 11:03:04 +08:00
|
|
|
int transport_disconnect(struct transport *transport);
|
2009-04-17 16:20:11 +08:00
|
|
|
char *transport_anonymize_url(const char *url);
|
2009-12-09 23:26:31 +08:00
|
|
|
void transport_take_over(struct transport *transport,
|
|
|
|
struct child_process *child);
|
2007-09-11 11:03:04 +08:00
|
|
|
|
2009-12-09 23:26:33 +08:00
|
|
|
int transport_connect(struct transport *transport, const char *name,
|
|
|
|
const char *exec, int fd[2]);
|
|
|
|
|
2009-08-05 13:01:53 +08:00
|
|
|
/* Transport methods defined outside transport.c */
|
2009-09-04 10:13:49 +08:00
|
|
|
int transport_helper_init(struct transport *transport, const char *name);
|
2010-10-13 00:39:41 +08:00
|
|
|
int bidirectional_transfer_loop(int input, int output);
|
2009-08-05 13:01:53 +08:00
|
|
|
|
2013-06-19 01:44:58 +08:00
|
|
|
/* common methods used by transport.c and builtin/send-pack.c */
|
2010-02-17 07:42:52 +08:00
|
|
|
void transport_update_tracking_ref(struct remote *remote, struct ref *ref, int verbose);
|
|
|
|
|
|
|
|
int transport_refs_pushed(struct ref *ref);
|
|
|
|
|
|
|
|
void transport_print_push_status(const char *dest, struct ref *refs,
|
2012-11-30 09:41:33 +08:00
|
|
|
int verbose, int porcelain, unsigned int *reject_reasons);
|
2010-02-17 07:42:52 +08:00
|
|
|
|
2020-04-17 17:45:36 +08:00
|
|
|
/* common method used by transport-helper.c and send-pack.c */
|
|
|
|
void reject_atomic_push(struct ref *refs, int mirror_mode);
|
|
|
|
|
2007-09-11 11:03:04 +08:00
|
|
|
#endif
|