2008-01-15 00:36:34 +08:00
|
|
|
#include "cache.h"
|
2017-06-15 02:07:36 +08:00
|
|
|
#include "config.h"
|
2015-06-22 22:03:05 +08:00
|
|
|
#include "refs.h"
|
2018-05-16 07:42:15 +08:00
|
|
|
#include "object-store.h"
|
2008-01-15 00:36:34 +08:00
|
|
|
#include "commit.h"
|
2008-07-25 18:41:22 +08:00
|
|
|
#include "tree-walk.h"
|
2008-01-15 00:36:34 +08:00
|
|
|
#include "attr.h"
|
2008-07-15 03:22:24 +08:00
|
|
|
#include "archive.h"
|
2008-07-25 18:41:26 +08:00
|
|
|
#include "parse-options.h"
|
2009-04-18 06:18:05 +08:00
|
|
|
#include "unpack-trees.h"
|
2014-09-21 11:55:06 +08:00
|
|
|
#include "dir.h"
|
2008-07-25 18:41:26 +08:00
|
|
|
|
|
|
|
static char const * const archive_usage[] = {
|
2015-01-13 15:44:47 +08:00
|
|
|
N_("git archive [<options>] <tree-ish> [<path>...]"),
|
2012-08-20 20:31:51 +08:00
|
|
|
N_("git archive --list"),
|
2015-01-13 15:44:47 +08:00
|
|
|
N_("git archive --remote <repo> [--exec <cmd>] [<options>] <tree-ish> [<path>...]"),
|
2012-08-20 20:31:51 +08:00
|
|
|
N_("git archive --remote <repo> [--exec <cmd>] --list"),
|
2008-07-25 18:41:26 +08:00
|
|
|
NULL
|
|
|
|
};
|
2008-07-25 18:41:22 +08:00
|
|
|
|
2011-06-22 09:23:33 +08:00
|
|
|
static const struct archiver **archivers;
|
|
|
|
static int nr_archivers;
|
|
|
|
static int alloc_archivers;
|
add uploadarchive.allowUnreachable option
In commit ee27ca4, we started restricting remote git-archive
invocations to only accessing reachable commits. This
matches what upload-pack allows, but does restrict some
useful cases (e.g., HEAD:foo). We loosened this in 0f544ee,
which allows `foo:bar` as long as `foo` is a ref tip.
However, that still doesn't allow many useful things, like:
1. Commits accessible from a ref, like `foo^:bar`, which
are reachable
2. Arbitrary sha1s, even if they are reachable.
We can do a full object-reachability check for these cases,
but it can be quite expensive if the client has sent us the
sha1 of a tree; we have to visit every sub-tree of every
commit in the worst case.
Let's instead give site admins an escape hatch, in case they
prefer the more liberal behavior. For many sites, the full
object database is public anyway (e.g., if you allow dumb
walker access), or the site admin may simply decide the
security/convenience tradeoff is not worth it.
This patch adds a new config option to disable the
restrictions added in ee27ca4. It defaults to off, meaning
there is no change in behavior by default.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-02-28 18:04:19 +08:00
|
|
|
static int remote_allow_unreachable;
|
2011-06-22 09:23:33 +08:00
|
|
|
|
|
|
|
void register_archiver(struct archiver *ar)
|
|
|
|
{
|
|
|
|
ALLOC_GROW(archivers, nr_archivers + 1, alloc_archivers);
|
|
|
|
archivers[nr_archivers++] = ar;
|
|
|
|
}
|
2008-07-25 18:41:22 +08:00
|
|
|
|
2018-10-26 04:32:14 +08:00
|
|
|
void init_archivers(void)
|
|
|
|
{
|
|
|
|
init_tar_archiver();
|
|
|
|
init_zip_archiver();
|
|
|
|
}
|
|
|
|
|
2008-01-15 00:36:34 +08:00
|
|
|
static void format_subst(const struct commit *commit,
|
2018-12-06 23:42:06 +08:00
|
|
|
const char *src, size_t len,
|
2021-02-28 19:22:47 +08:00
|
|
|
struct strbuf *buf, struct pretty_print_context *ctx)
|
2008-01-15 00:36:34 +08:00
|
|
|
{
|
|
|
|
char *to_free = NULL;
|
2008-10-10 03:12:12 +08:00
|
|
|
struct strbuf fmt = STRBUF_INIT;
|
2008-01-15 00:36:34 +08:00
|
|
|
|
|
|
|
if (src == buf->buf)
|
|
|
|
to_free = strbuf_detach(buf, NULL);
|
|
|
|
for (;;) {
|
|
|
|
const char *b, *c;
|
|
|
|
|
|
|
|
b = memmem(src, len, "$Format:", 8);
|
2008-04-23 09:06:27 +08:00
|
|
|
if (!b)
|
2008-01-15 00:36:34 +08:00
|
|
|
break;
|
2008-04-23 09:06:27 +08:00
|
|
|
c = memchr(b + 8, '$', (src + len) - b - 8);
|
2008-01-15 00:36:34 +08:00
|
|
|
if (!c)
|
|
|
|
break;
|
|
|
|
|
|
|
|
strbuf_reset(&fmt);
|
|
|
|
strbuf_add(&fmt, b + 8, c - b - 8);
|
|
|
|
|
|
|
|
strbuf_add(buf, src, b - src);
|
2021-02-28 19:22:47 +08:00
|
|
|
format_commit_message(commit, fmt.buf, buf, ctx);
|
2008-01-15 00:36:34 +08:00
|
|
|
len -= c + 1 - src;
|
|
|
|
src = c + 1;
|
|
|
|
}
|
|
|
|
strbuf_add(buf, src, len);
|
|
|
|
strbuf_release(&fmt);
|
|
|
|
free(to_free);
|
|
|
|
}
|
|
|
|
|
2020-09-20 05:23:32 +08:00
|
|
|
static void *object_file_to_archive(const struct archiver_args *args,
|
|
|
|
const char *path,
|
|
|
|
const struct object_id *oid,
|
|
|
|
unsigned int mode,
|
|
|
|
enum object_type *type,
|
|
|
|
unsigned long *sizep)
|
2008-01-15 00:36:34 +08:00
|
|
|
{
|
|
|
|
void *buffer;
|
2012-05-03 09:51:03 +08:00
|
|
|
const struct commit *commit = args->convert ? args->commit : NULL;
|
2020-03-17 02:05:03 +08:00
|
|
|
struct checkout_metadata meta;
|
|
|
|
|
|
|
|
init_checkout_metadata(&meta, args->refname,
|
|
|
|
args->commit_oid ? args->commit_oid :
|
|
|
|
(args->tree ? &args->tree->object.oid : NULL), oid);
|
2008-01-15 00:36:34 +08:00
|
|
|
|
2012-05-03 09:51:03 +08:00
|
|
|
path += args->baselen;
|
sha1_file: convert read_sha1_file to struct object_id
Convert read_sha1_file to take a pointer to struct object_id and rename
it read_object_file. Do the same for read_sha1_file_extended.
Convert one use in grep.c to use the new function without any other code
change, since the pointer being passed is a void pointer that is already
initialized with a pointer to struct object_id. Update the declaration
and definitions of the modified functions, and apply the following
semantic patch to convert the remaining callers:
@@
expression E1, E2, E3;
@@
- read_sha1_file(E1.hash, E2, E3)
+ read_object_file(&E1, E2, E3)
@@
expression E1, E2, E3;
@@
- read_sha1_file(E1->hash, E2, E3)
+ read_object_file(E1, E2, E3)
@@
expression E1, E2, E3, E4;
@@
- read_sha1_file_extended(E1.hash, E2, E3, E4)
+ read_object_file_extended(&E1, E2, E3, E4)
@@
expression E1, E2, E3, E4;
@@
- read_sha1_file_extended(E1->hash, E2, E3, E4)
+ read_object_file_extended(E1, E2, E3, E4)
Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-03-12 10:27:53 +08:00
|
|
|
buffer = read_object_file(oid, type, sizep);
|
2008-01-15 00:36:34 +08:00
|
|
|
if (buffer && S_ISREG(mode)) {
|
2008-10-10 03:12:12 +08:00
|
|
|
struct strbuf buf = STRBUF_INIT;
|
2008-01-15 00:36:34 +08:00
|
|
|
size_t size = 0;
|
|
|
|
|
|
|
|
strbuf_attach(&buf, buffer, *sizep, *sizep + 1);
|
2020-03-17 02:05:03 +08:00
|
|
|
convert_to_working_tree(args->repo->index, path, buf.buf, buf.len, &buf, &meta);
|
2008-07-15 03:22:29 +08:00
|
|
|
if (commit)
|
2021-02-28 19:22:47 +08:00
|
|
|
format_subst(commit, buf.buf, buf.len, &buf, args->pretty_ctx);
|
2008-01-15 00:36:34 +08:00
|
|
|
buffer = strbuf_detach(&buf, &size);
|
|
|
|
*sizep = size;
|
|
|
|
}
|
|
|
|
|
|
|
|
return buffer;
|
|
|
|
}
|
|
|
|
|
2014-09-21 11:55:06 +08:00
|
|
|
struct directory {
|
|
|
|
struct directory *up;
|
2015-03-14 07:39:30 +08:00
|
|
|
struct object_id oid;
|
2014-09-21 11:55:06 +08:00
|
|
|
int baselen, len;
|
|
|
|
unsigned mode;
|
|
|
|
char path[FLEX_ARRAY];
|
|
|
|
};
|
|
|
|
|
2008-07-15 03:22:24 +08:00
|
|
|
struct archiver_context {
|
|
|
|
struct archiver_args *args;
|
|
|
|
write_archive_entry_fn_t write_entry;
|
2014-09-21 11:55:06 +08:00
|
|
|
struct directory *bottom;
|
2008-07-15 03:22:24 +08:00
|
|
|
};
|
|
|
|
|
2018-08-14 00:14:35 +08:00
|
|
|
static const struct attr_check *get_archive_attrs(struct index_state *istate,
|
|
|
|
const char *path)
|
2017-08-19 13:29:43 +08:00
|
|
|
{
|
|
|
|
static struct attr_check *check;
|
|
|
|
if (!check)
|
|
|
|
check = attr_check_initl("export-ignore", "export-subst", NULL);
|
2018-09-13 03:32:02 +08:00
|
|
|
git_check_attr(istate, path, check);
|
|
|
|
return check;
|
2017-08-19 13:29:43 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int check_attr_export_ignore(const struct attr_check *check)
|
|
|
|
{
|
|
|
|
return check && ATTR_TRUE(check->items[0].value);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int check_attr_export_subst(const struct attr_check *check)
|
|
|
|
{
|
|
|
|
return check && ATTR_TRUE(check->items[1].value);
|
|
|
|
}
|
|
|
|
|
2018-03-12 10:27:35 +08:00
|
|
|
static int write_archive_entry(const struct object_id *oid, const char *base,
|
2021-03-21 06:37:49 +08:00
|
|
|
int baselen, const char *filename, unsigned mode,
|
2008-07-15 03:22:24 +08:00
|
|
|
void *context)
|
|
|
|
{
|
|
|
|
static struct strbuf path = STRBUF_INIT;
|
|
|
|
struct archiver_context *c = context;
|
|
|
|
struct archiver_args *args = c->args;
|
|
|
|
write_archive_entry_fn_t write_entry = c->write_entry;
|
|
|
|
int err;
|
2017-08-19 13:32:37 +08:00
|
|
|
const char *path_without_prefix;
|
2020-09-20 05:23:32 +08:00
|
|
|
unsigned long size;
|
|
|
|
void *buffer;
|
|
|
|
enum object_type type;
|
2008-07-15 03:22:24 +08:00
|
|
|
|
2012-05-03 09:51:03 +08:00
|
|
|
args->convert = 0;
|
2008-07-15 03:22:24 +08:00
|
|
|
strbuf_reset(&path);
|
|
|
|
strbuf_grow(&path, PATH_MAX);
|
2009-10-09 00:46:54 +08:00
|
|
|
strbuf_add(&path, args->base, args->baselen);
|
2008-07-15 03:22:24 +08:00
|
|
|
strbuf_add(&path, base, baselen);
|
|
|
|
strbuf_addstr(&path, filename);
|
2012-12-09 04:04:39 +08:00
|
|
|
if (S_ISDIR(mode) || S_ISGITLINK(mode))
|
|
|
|
strbuf_addch(&path, '/');
|
2008-07-15 03:22:29 +08:00
|
|
|
path_without_prefix = path.buf + args->baselen;
|
2008-07-15 03:22:24 +08:00
|
|
|
|
2017-09-13 06:43:57 +08:00
|
|
|
if (!S_ISDIR(mode)) {
|
2017-08-19 13:32:37 +08:00
|
|
|
const struct attr_check *check;
|
2018-08-14 00:14:35 +08:00
|
|
|
check = get_archive_attrs(args->repo->index, path_without_prefix);
|
2017-08-19 13:32:37 +08:00
|
|
|
if (check_attr_export_ignore(check))
|
2008-07-15 03:22:29 +08:00
|
|
|
return 0;
|
2017-08-19 13:32:37 +08:00
|
|
|
args->convert = check_attr_export_subst(check);
|
2008-07-15 03:22:29 +08:00
|
|
|
}
|
2008-07-15 03:22:24 +08:00
|
|
|
|
|
|
|
if (S_ISDIR(mode) || S_ISGITLINK(mode)) {
|
|
|
|
if (args->verbose)
|
|
|
|
fprintf(stderr, "%.*s\n", (int)path.len, path.buf);
|
2020-09-20 05:23:32 +08:00
|
|
|
err = write_entry(args, oid, path.buf, path.len, mode, NULL, 0);
|
2008-07-15 03:22:24 +08:00
|
|
|
if (err)
|
|
|
|
return err;
|
2009-01-25 08:52:05 +08:00
|
|
|
return (S_ISDIR(mode) ? READ_TREE_RECURSIVE : 0);
|
2008-07-15 03:22:24 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (args->verbose)
|
|
|
|
fprintf(stderr, "%.*s\n", (int)path.len, path.buf);
|
2020-09-20 05:23:32 +08:00
|
|
|
|
|
|
|
/* Stream it? */
|
|
|
|
if (S_ISREG(mode) && !args->convert &&
|
|
|
|
oid_object_info(args->repo, oid, &size) == OBJ_BLOB &&
|
|
|
|
size > big_file_threshold)
|
|
|
|
return write_entry(args, oid, path.buf, path.len, mode, NULL, size);
|
|
|
|
|
|
|
|
buffer = object_file_to_archive(args, path.buf, oid, mode, &type, &size);
|
|
|
|
if (!buffer)
|
|
|
|
return error(_("cannot read %s"), oid_to_hex(oid));
|
|
|
|
err = write_entry(args, oid, path.buf, path.len, mode, buffer, size);
|
|
|
|
free(buffer);
|
|
|
|
return err;
|
2008-07-15 03:22:24 +08:00
|
|
|
}
|
|
|
|
|
2014-09-21 11:55:06 +08:00
|
|
|
static void queue_directory(const unsigned char *sha1,
|
2014-11-30 17:05:00 +08:00
|
|
|
struct strbuf *base, const char *filename,
|
2021-03-21 06:37:49 +08:00
|
|
|
unsigned mode, struct archiver_context *c)
|
2014-09-21 11:55:06 +08:00
|
|
|
{
|
|
|
|
struct directory *d;
|
2016-02-23 06:44:35 +08:00
|
|
|
size_t len = st_add4(base->len, 1, strlen(filename), 1);
|
|
|
|
d = xmalloc(st_add(sizeof(*d), len));
|
2014-09-21 11:55:06 +08:00
|
|
|
d->up = c->bottom;
|
2014-11-30 17:05:00 +08:00
|
|
|
d->baselen = base->len;
|
2014-09-21 11:55:06 +08:00
|
|
|
d->mode = mode;
|
|
|
|
c->bottom = d;
|
avoid sprintf and strcpy with flex arrays
When we are allocating a struct with a FLEX_ARRAY member, we
generally compute the size of the array and then sprintf or
strcpy into it. Normally we could improve a dynamic allocation
like this by using xstrfmt, but it doesn't work here; we
have to account for the size of the rest of the struct.
But we can improve things a bit by storing the length that
we use for the allocation, and then feeding it to xsnprintf
or memcpy, which makes it more obvious that we are not
writing more than the allocated number of bytes.
It would be nice if we had some kind of helper for
allocating generic flex arrays, but it doesn't work that
well:
- the call signature is a little bit unwieldy:
d = flex_struct(sizeof(*d), offsetof(d, path), fmt, ...);
You need offsetof here instead of just writing to the
end of the base size, because we don't know how the
struct is packed (partially this is because FLEX_ARRAY
might not be zero, though we can account for that; but
the size of the struct may actually be rounded up for
alignment, and we can't know that).
- some sites do clever things, like over-allocating because
they know they will write larger things into the buffer
later (e.g., struct packed_git here).
So we're better off to just write out each allocation (or
add type-specific helpers, though many of these are one-off
allocations anyway).
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2015-09-25 05:08:12 +08:00
|
|
|
d->len = xsnprintf(d->path, len, "%.*s%s/", (int)base->len, base->buf, filename);
|
2015-03-14 07:39:30 +08:00
|
|
|
hashcpy(d->oid.hash, sha1);
|
2014-09-21 11:55:06 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int write_directory(struct archiver_context *c)
|
|
|
|
{
|
|
|
|
struct directory *d = c->bottom;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (!d)
|
|
|
|
return 0;
|
|
|
|
c->bottom = d->up;
|
|
|
|
d->path[d->len - 1] = '\0'; /* no trailing slash */
|
|
|
|
ret =
|
|
|
|
write_directory(c) ||
|
2018-03-12 10:27:35 +08:00
|
|
|
write_archive_entry(&d->oid, d->path, d->baselen,
|
2014-09-21 11:55:06 +08:00
|
|
|
d->path + d->baselen, d->mode,
|
2021-03-21 06:37:49 +08:00
|
|
|
c) != READ_TREE_RECURSIVE;
|
2014-09-21 11:55:06 +08:00
|
|
|
free(d);
|
|
|
|
return ret ? -1 : 0;
|
|
|
|
}
|
|
|
|
|
2018-03-12 10:27:26 +08:00
|
|
|
static int queue_or_write_archive_entry(const struct object_id *oid,
|
2014-11-30 17:05:00 +08:00
|
|
|
struct strbuf *base, const char *filename,
|
2021-03-21 06:37:51 +08:00
|
|
|
unsigned mode, void *context)
|
2014-09-21 11:55:06 +08:00
|
|
|
{
|
|
|
|
struct archiver_context *c = context;
|
|
|
|
|
|
|
|
while (c->bottom &&
|
2014-11-30 17:05:00 +08:00
|
|
|
!(base->len >= c->bottom->len &&
|
|
|
|
!strncmp(base->buf, c->bottom->path, c->bottom->len))) {
|
2014-09-21 11:55:06 +08:00
|
|
|
struct directory *next = c->bottom->up;
|
|
|
|
free(c->bottom);
|
|
|
|
c->bottom = next;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (S_ISDIR(mode)) {
|
2017-08-19 13:32:37 +08:00
|
|
|
size_t baselen = base->len;
|
|
|
|
const struct attr_check *check;
|
|
|
|
|
|
|
|
/* Borrow base, but restore its original value when done. */
|
|
|
|
strbuf_addstr(base, filename);
|
|
|
|
strbuf_addch(base, '/');
|
2018-08-14 00:14:35 +08:00
|
|
|
check = get_archive_attrs(c->args->repo->index, base->buf);
|
2017-08-19 13:32:37 +08:00
|
|
|
strbuf_setlen(base, baselen);
|
|
|
|
|
|
|
|
if (check_attr_export_ignore(check))
|
|
|
|
return 0;
|
2018-03-12 10:27:26 +08:00
|
|
|
queue_directory(oid->hash, base, filename,
|
2021-03-21 06:37:49 +08:00
|
|
|
mode, c);
|
2014-09-21 11:55:06 +08:00
|
|
|
return READ_TREE_RECURSIVE;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (write_directory(c))
|
|
|
|
return -1;
|
2018-03-12 10:27:35 +08:00
|
|
|
return write_archive_entry(oid, base->buf, base->len, filename, mode,
|
2021-03-21 06:37:49 +08:00
|
|
|
context);
|
2014-09-21 11:55:06 +08:00
|
|
|
}
|
|
|
|
|
2020-09-20 05:23:42 +08:00
|
|
|
struct extra_file_info {
|
|
|
|
char *base;
|
|
|
|
struct stat stat;
|
|
|
|
};
|
|
|
|
|
2008-07-15 03:22:24 +08:00
|
|
|
int write_archive_entries(struct archiver_args *args,
|
|
|
|
write_archive_entry_fn_t write_entry)
|
|
|
|
{
|
|
|
|
struct archiver_context context;
|
2009-04-18 06:18:05 +08:00
|
|
|
struct unpack_trees_options opts;
|
|
|
|
struct tree_desc t;
|
2008-07-15 03:22:24 +08:00
|
|
|
int err;
|
2020-09-20 05:23:42 +08:00
|
|
|
struct strbuf path_in_archive = STRBUF_INIT;
|
|
|
|
struct strbuf content = STRBUF_INIT;
|
|
|
|
struct object_id fake_oid = null_oid;
|
|
|
|
int i;
|
2008-07-15 03:22:24 +08:00
|
|
|
|
|
|
|
if (args->baselen > 0 && args->base[args->baselen - 1] == '/') {
|
|
|
|
size_t len = args->baselen;
|
|
|
|
|
|
|
|
while (len > 1 && args->base[len - 2] == '/')
|
|
|
|
len--;
|
|
|
|
if (args->verbose)
|
|
|
|
fprintf(stderr, "%.*s\n", (int)len, args->base);
|
2018-03-12 10:27:35 +08:00
|
|
|
err = write_entry(args, &args->tree->object.oid, args->base,
|
2020-09-20 05:23:32 +08:00
|
|
|
len, 040777, NULL, 0);
|
2008-07-15 03:22:24 +08:00
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2014-09-21 11:55:06 +08:00
|
|
|
memset(&context, 0, sizeof(context));
|
2008-07-15 03:22:24 +08:00
|
|
|
context.args = args;
|
|
|
|
context.write_entry = write_entry;
|
|
|
|
|
2009-04-18 06:18:05 +08:00
|
|
|
/*
|
|
|
|
* Setup index and instruct attr to read index only
|
|
|
|
*/
|
|
|
|
if (!args->worktree_attributes) {
|
|
|
|
memset(&opts, 0, sizeof(opts));
|
|
|
|
opts.index_only = 1;
|
|
|
|
opts.head_idx = -1;
|
2018-08-14 00:14:35 +08:00
|
|
|
opts.src_index = args->repo->index;
|
|
|
|
opts.dst_index = args->repo->index;
|
2009-04-18 06:18:05 +08:00
|
|
|
opts.fn = oneway_merge;
|
|
|
|
init_tree_desc(&t, args->tree->buffer, args->tree->size);
|
|
|
|
if (unpack_trees(1, &t, &opts))
|
|
|
|
return -1;
|
2018-08-14 00:14:33 +08:00
|
|
|
git_attr_set_direction(GIT_ATTR_INDEX);
|
2009-04-18 06:18:05 +08:00
|
|
|
}
|
|
|
|
|
2021-03-21 06:37:51 +08:00
|
|
|
err = read_tree(args->repo, args->tree,
|
|
|
|
&args->pathspec,
|
|
|
|
queue_or_write_archive_entry,
|
|
|
|
&context);
|
2008-07-15 03:22:24 +08:00
|
|
|
if (err == READ_TREE_RECURSIVE)
|
|
|
|
err = 0;
|
2014-09-21 11:55:06 +08:00
|
|
|
while (context.bottom) {
|
|
|
|
struct directory *next = context.bottom->up;
|
|
|
|
free(context.bottom);
|
|
|
|
context.bottom = next;
|
|
|
|
}
|
2020-09-20 05:23:42 +08:00
|
|
|
|
|
|
|
for (i = 0; i < args->extra_files.nr; i++) {
|
|
|
|
struct string_list_item *item = args->extra_files.items + i;
|
|
|
|
char *path = item->string;
|
|
|
|
struct extra_file_info *info = item->util;
|
|
|
|
|
|
|
|
put_be64(fake_oid.hash, i + 1);
|
|
|
|
|
|
|
|
strbuf_reset(&path_in_archive);
|
|
|
|
if (info->base)
|
|
|
|
strbuf_addstr(&path_in_archive, info->base);
|
|
|
|
strbuf_addstr(&path_in_archive, basename(path));
|
|
|
|
|
|
|
|
strbuf_reset(&content);
|
|
|
|
if (strbuf_read_file(&content, path, info->stat.st_size) < 0)
|
|
|
|
err = error_errno(_("could not read '%s'"), path);
|
|
|
|
else
|
|
|
|
err = write_entry(args, &fake_oid, path_in_archive.buf,
|
|
|
|
path_in_archive.len,
|
|
|
|
info->stat.st_mode,
|
|
|
|
content.buf, content.len);
|
|
|
|
if (err)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
strbuf_release(&path_in_archive);
|
|
|
|
strbuf_release(&content);
|
|
|
|
|
2008-07-15 03:22:24 +08:00
|
|
|
return err;
|
|
|
|
}
|
2008-07-25 18:41:21 +08:00
|
|
|
|
2008-07-25 18:41:22 +08:00
|
|
|
static const struct archiver *lookup_archiver(const char *name)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2008-07-25 18:41:26 +08:00
|
|
|
if (!name)
|
|
|
|
return NULL;
|
|
|
|
|
2011-06-22 09:23:33 +08:00
|
|
|
for (i = 0; i < nr_archivers; i++) {
|
|
|
|
if (!strcmp(name, archivers[i]->name))
|
|
|
|
return archivers[i];
|
2008-07-25 18:41:22 +08:00
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2018-08-14 00:14:35 +08:00
|
|
|
struct path_exists_context {
|
|
|
|
struct pathspec pathspec;
|
|
|
|
struct archiver_args *args;
|
|
|
|
};
|
|
|
|
|
2018-03-12 10:27:26 +08:00
|
|
|
static int reject_entry(const struct object_id *oid, struct strbuf *base,
|
2014-11-30 17:05:00 +08:00
|
|
|
const char *filename, unsigned mode,
|
2021-03-21 06:37:49 +08:00
|
|
|
void *context)
|
2009-12-12 23:00:41 +08:00
|
|
|
{
|
2014-09-21 11:55:06 +08:00
|
|
|
int ret = -1;
|
2018-08-14 00:14:35 +08:00
|
|
|
struct path_exists_context *ctx = context;
|
|
|
|
|
2014-09-21 11:55:06 +08:00
|
|
|
if (S_ISDIR(mode)) {
|
|
|
|
struct strbuf sb = STRBUF_INIT;
|
2014-11-30 17:05:00 +08:00
|
|
|
strbuf_addbuf(&sb, base);
|
2014-09-21 11:55:06 +08:00
|
|
|
strbuf_addstr(&sb, filename);
|
2018-08-14 00:14:35 +08:00
|
|
|
if (!match_pathspec(ctx->args->repo->index,
|
|
|
|
&ctx->pathspec,
|
|
|
|
sb.buf, sb.len, 0, NULL, 1))
|
2014-09-21 11:55:06 +08:00
|
|
|
ret = READ_TREE_RECURSIVE;
|
|
|
|
strbuf_release(&sb);
|
|
|
|
}
|
|
|
|
return ret;
|
2009-12-12 23:00:41 +08:00
|
|
|
}
|
|
|
|
|
2018-08-14 00:14:35 +08:00
|
|
|
static int path_exists(struct archiver_args *args, const char *path)
|
2009-12-12 23:00:41 +08:00
|
|
|
{
|
2011-03-25 17:34:19 +08:00
|
|
|
const char *paths[] = { path, NULL };
|
2018-08-14 00:14:35 +08:00
|
|
|
struct path_exists_context ctx;
|
2011-03-25 17:34:19 +08:00
|
|
|
int ret;
|
|
|
|
|
2018-08-14 00:14:35 +08:00
|
|
|
ctx.args = args;
|
|
|
|
parse_pathspec(&ctx.pathspec, 0, 0, "", paths);
|
|
|
|
ctx.pathspec.recursive = 1;
|
2021-03-21 06:37:51 +08:00
|
|
|
ret = read_tree(args->repo, args->tree,
|
|
|
|
&ctx.pathspec,
|
|
|
|
reject_entry, &ctx);
|
2018-08-14 00:14:35 +08:00
|
|
|
clear_pathspec(&ctx.pathspec);
|
2011-03-25 17:34:19 +08:00
|
|
|
return ret != 0;
|
2009-12-12 23:00:41 +08:00
|
|
|
}
|
|
|
|
|
2008-07-25 18:41:22 +08:00
|
|
|
static void parse_pathspec_arg(const char **pathspec,
|
|
|
|
struct archiver_args *ar_args)
|
|
|
|
{
|
2013-07-14 16:35:44 +08:00
|
|
|
/*
|
|
|
|
* must be consistent with parse_pathspec in path_exists()
|
|
|
|
* Also if pathspec patterns are dependent, we're in big
|
|
|
|
* trouble as we test each one separately
|
|
|
|
*/
|
|
|
|
parse_pathspec(&ar_args->pathspec, 0,
|
|
|
|
PATHSPEC_PREFER_FULL,
|
|
|
|
"", pathspec);
|
2014-09-21 11:55:06 +08:00
|
|
|
ar_args->pathspec.recursive = 1;
|
2009-12-12 23:00:41 +08:00
|
|
|
if (pathspec) {
|
|
|
|
while (*pathspec) {
|
2018-08-14 00:14:35 +08:00
|
|
|
if (**pathspec && !path_exists(ar_args, *pathspec))
|
2013-07-14 16:35:44 +08:00
|
|
|
die(_("pathspec '%s' did not match any files"), *pathspec);
|
2009-12-12 23:00:41 +08:00
|
|
|
pathspec++;
|
|
|
|
}
|
|
|
|
}
|
2008-07-25 18:41:22 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void parse_treeish_arg(const char **argv,
|
archive: don't let remote clients get unreachable commits
Usually git is careful not to allow clients to fetch
arbitrary objects from the database; for example, objects
received via upload-pack must be reachable from a ref.
Upload-archive breaks this by feeding the client's tree-ish
directly to get_sha1, which will accept arbitrary hex sha1s,
reflogs, etc.
This is not a problem if all of your objects are publicly
reachable anyway (or at least public to anybody who can run
upload-archive). Or if you are making the repo available by
dumb protocols like http or rsync (in which case the client
can read your whole object db directly).
But for sites which allow access only through smart
protocols, clients may be able to fetch trees from commits
that exist in the server's object database but are not
referenced (e.g., because history was rewound).
This patch tightens upload-archive's lookup to use dwim_ref
rather than get_sha1. This means a remote client can only
fetch the tip of a named ref, not an arbitrary sha1 or
reflog entry.
This also restricts some legitimate requests, too:
1. Reachable non-tip commits, like:
git archive --remote=$url v1.0~5
2. Sub-trees of reachable commits, like:
git archive --remote=$url v1.7.7:Documentation
Local requests continue to use get_sha1, and are not
restricted at all.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-11-18 07:04:22 +08:00
|
|
|
struct archiver_args *ar_args, const char *prefix,
|
|
|
|
int remote)
|
2008-07-25 18:41:22 +08:00
|
|
|
{
|
|
|
|
const char *name = argv[0];
|
2019-02-19 08:05:20 +08:00
|
|
|
const struct object_id *commit_oid;
|
2008-07-25 18:41:22 +08:00
|
|
|
time_t archive_time;
|
|
|
|
struct tree *tree;
|
|
|
|
const struct commit *commit;
|
2015-03-14 07:39:30 +08:00
|
|
|
struct object_id oid;
|
2020-03-17 02:05:03 +08:00
|
|
|
char *ref = NULL;
|
2008-07-25 18:41:22 +08:00
|
|
|
|
archive: don't let remote clients get unreachable commits
Usually git is careful not to allow clients to fetch
arbitrary objects from the database; for example, objects
received via upload-pack must be reachable from a ref.
Upload-archive breaks this by feeding the client's tree-ish
directly to get_sha1, which will accept arbitrary hex sha1s,
reflogs, etc.
This is not a problem if all of your objects are publicly
reachable anyway (or at least public to anybody who can run
upload-archive). Or if you are making the repo available by
dumb protocols like http or rsync (in which case the client
can read your whole object db directly).
But for sites which allow access only through smart
protocols, clients may be able to fetch trees from commits
that exist in the server's object database but are not
referenced (e.g., because history was rewound).
This patch tightens upload-archive's lookup to use dwim_ref
rather than get_sha1. This means a remote client can only
fetch the tip of a named ref, not an arbitrary sha1 or
reflog entry.
This also restricts some legitimate requests, too:
1. Reachable non-tip commits, like:
git archive --remote=$url v1.0~5
2. Sub-trees of reachable commits, like:
git archive --remote=$url v1.7.7:Documentation
Local requests continue to use get_sha1, and are not
restricted at all.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-11-18 07:04:22 +08:00
|
|
|
/* Remotes are only allowed to fetch actual refs */
|
add uploadarchive.allowUnreachable option
In commit ee27ca4, we started restricting remote git-archive
invocations to only accessing reachable commits. This
matches what upload-pack allows, but does restrict some
useful cases (e.g., HEAD:foo). We loosened this in 0f544ee,
which allows `foo:bar` as long as `foo` is a ref tip.
However, that still doesn't allow many useful things, like:
1. Commits accessible from a ref, like `foo^:bar`, which
are reachable
2. Arbitrary sha1s, even if they are reachable.
We can do a full object-reachability check for these cases,
but it can be quite expensive if the client has sent us the
sha1 of a tree; we have to visit every sub-tree of every
commit in the worst case.
Let's instead give site admins an escape hatch, in case they
prefer the more liberal behavior. For many sites, the full
object database is public anyway (e.g., if you allow dumb
walker access), or the site admin may simply decide the
security/convenience tradeoff is not worth it.
This patch adds a new config option to disable the
restrictions added in ee27ca4. It defaults to off, meaning
there is no change in behavior by default.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2014-02-28 18:04:19 +08:00
|
|
|
if (remote && !remote_allow_unreachable) {
|
2014-03-08 14:48:31 +08:00
|
|
|
const char *colon = strchrnul(name, ':');
|
|
|
|
int refnamelen = colon - name;
|
2012-05-18 13:15:17 +08:00
|
|
|
|
2020-09-02 06:28:09 +08:00
|
|
|
if (!dwim_ref(name, refnamelen, &oid, &ref, 0))
|
2018-11-10 13:16:02 +08:00
|
|
|
die(_("no such ref: %.*s"), refnamelen, name);
|
2020-03-17 02:05:03 +08:00
|
|
|
} else {
|
2020-09-02 06:28:09 +08:00
|
|
|
dwim_ref(name, strlen(name), &oid, &ref, 0);
|
archive: don't let remote clients get unreachable commits
Usually git is careful not to allow clients to fetch
arbitrary objects from the database; for example, objects
received via upload-pack must be reachable from a ref.
Upload-archive breaks this by feeding the client's tree-ish
directly to get_sha1, which will accept arbitrary hex sha1s,
reflogs, etc.
This is not a problem if all of your objects are publicly
reachable anyway (or at least public to anybody who can run
upload-archive). Or if you are making the repo available by
dumb protocols like http or rsync (in which case the client
can read your whole object db directly).
But for sites which allow access only through smart
protocols, clients may be able to fetch trees from commits
that exist in the server's object database but are not
referenced (e.g., because history was rewound).
This patch tightens upload-archive's lookup to use dwim_ref
rather than get_sha1. This means a remote client can only
fetch the tip of a named ref, not an arbitrary sha1 or
reflog entry.
This also restricts some legitimate requests, too:
1. Reachable non-tip commits, like:
git archive --remote=$url v1.0~5
2. Sub-trees of reachable commits, like:
git archive --remote=$url v1.7.7:Documentation
Local requests continue to use get_sha1, and are not
restricted at all.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-11-18 07:04:22 +08:00
|
|
|
}
|
2012-01-11 20:12:38 +08:00
|
|
|
|
sha1_name: convert get_sha1* to get_oid*
Now that all the callers of get_sha1 directly or indirectly use struct
object_id, rename the functions starting with get_sha1 to start with
get_oid. Convert the internals in sha1_name.c to use struct object_id
as well, and eliminate explicit length checks where possible. Convert a
use of 40 in get_oid_basic to GIT_SHA1_HEXSZ.
Outside of sha1_name.c and cache.h, this transition was made with the
following semantic patch:
@@
expression E1, E2;
@@
- get_sha1(E1, E2.hash)
+ get_oid(E1, &E2)
@@
expression E1, E2;
@@
- get_sha1(E1, E2->hash)
+ get_oid(E1, E2)
@@
expression E1, E2;
@@
- get_sha1_committish(E1, E2.hash)
+ get_oid_committish(E1, &E2)
@@
expression E1, E2;
@@
- get_sha1_committish(E1, E2->hash)
+ get_oid_committish(E1, E2)
@@
expression E1, E2;
@@
- get_sha1_treeish(E1, E2.hash)
+ get_oid_treeish(E1, &E2)
@@
expression E1, E2;
@@
- get_sha1_treeish(E1, E2->hash)
+ get_oid_treeish(E1, E2)
@@
expression E1, E2;
@@
- get_sha1_commit(E1, E2.hash)
+ get_oid_commit(E1, &E2)
@@
expression E1, E2;
@@
- get_sha1_commit(E1, E2->hash)
+ get_oid_commit(E1, E2)
@@
expression E1, E2;
@@
- get_sha1_tree(E1, E2.hash)
+ get_oid_tree(E1, &E2)
@@
expression E1, E2;
@@
- get_sha1_tree(E1, E2->hash)
+ get_oid_tree(E1, E2)
@@
expression E1, E2;
@@
- get_sha1_blob(E1, E2.hash)
+ get_oid_blob(E1, &E2)
@@
expression E1, E2;
@@
- get_sha1_blob(E1, E2->hash)
+ get_oid_blob(E1, E2)
@@
expression E1, E2, E3, E4;
@@
- get_sha1_with_context(E1, E2, E3.hash, E4)
+ get_oid_with_context(E1, E2, &E3, E4)
@@
expression E1, E2, E3, E4;
@@
- get_sha1_with_context(E1, E2, E3->hash, E4)
+ get_oid_with_context(E1, E2, E3, E4)
Signed-off-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2017-07-14 07:49:28 +08:00
|
|
|
if (get_oid(name, &oid))
|
2018-11-10 13:16:02 +08:00
|
|
|
die(_("not a valid object name: %s"), name);
|
2008-07-25 18:41:22 +08:00
|
|
|
|
2018-09-16 00:17:37 +08:00
|
|
|
commit = lookup_commit_reference_gently(ar_args->repo, &oid, 1);
|
2008-07-25 18:41:22 +08:00
|
|
|
if (commit) {
|
2019-02-19 08:05:20 +08:00
|
|
|
commit_oid = &commit->object.oid;
|
2008-07-25 18:41:22 +08:00
|
|
|
archive_time = commit->date;
|
|
|
|
} else {
|
2019-02-19 08:05:20 +08:00
|
|
|
commit_oid = NULL;
|
2008-07-25 18:41:22 +08:00
|
|
|
archive_time = time(NULL);
|
|
|
|
}
|
|
|
|
|
2017-05-07 06:10:37 +08:00
|
|
|
tree = parse_tree_indirect(&oid);
|
2008-07-25 18:41:22 +08:00
|
|
|
if (tree == NULL)
|
2018-11-10 13:16:02 +08:00
|
|
|
die(_("not a tree object: %s"), oid_to_hex(&oid));
|
2008-07-25 18:41:22 +08:00
|
|
|
|
|
|
|
if (prefix) {
|
2015-03-14 07:39:30 +08:00
|
|
|
struct object_id tree_oid;
|
2019-04-05 23:00:12 +08:00
|
|
|
unsigned short mode;
|
2008-07-25 18:41:22 +08:00
|
|
|
int err;
|
|
|
|
|
2019-06-27 17:28:49 +08:00
|
|
|
err = get_tree_entry(ar_args->repo,
|
|
|
|
&tree->object.oid,
|
|
|
|
prefix, &tree_oid,
|
2018-03-12 10:27:51 +08:00
|
|
|
&mode);
|
2008-07-25 18:41:22 +08:00
|
|
|
if (err || !S_ISDIR(mode))
|
2018-11-10 13:16:02 +08:00
|
|
|
die(_("current working directory is untracked"));
|
2008-07-25 18:41:22 +08:00
|
|
|
|
2017-05-07 06:10:37 +08:00
|
|
|
tree = parse_tree_indirect(&tree_oid);
|
2008-07-25 18:41:22 +08:00
|
|
|
}
|
2020-03-17 02:05:03 +08:00
|
|
|
ar_args->refname = ref;
|
2008-07-25 18:41:22 +08:00
|
|
|
ar_args->tree = tree;
|
2019-02-19 08:05:20 +08:00
|
|
|
ar_args->commit_oid = commit_oid;
|
2008-07-25 18:41:22 +08:00
|
|
|
ar_args->commit = commit;
|
|
|
|
ar_args->time = archive_time;
|
|
|
|
}
|
|
|
|
|
2020-09-20 05:23:42 +08:00
|
|
|
static void extra_file_info_clear(void *util, const char *str)
|
|
|
|
{
|
|
|
|
struct extra_file_info *info = util;
|
|
|
|
free(info->base);
|
|
|
|
free(info);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int add_file_cb(const struct option *opt, const char *arg, int unset)
|
|
|
|
{
|
|
|
|
struct archiver_args *args = opt->value;
|
|
|
|
const char **basep = (const char **)opt->defval;
|
|
|
|
const char *base = *basep;
|
|
|
|
char *path;
|
|
|
|
struct string_list_item *item;
|
|
|
|
struct extra_file_info *info;
|
|
|
|
|
|
|
|
if (unset) {
|
|
|
|
string_list_clear_func(&args->extra_files,
|
|
|
|
extra_file_info_clear);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!arg)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
path = prefix_filename(args->prefix, arg);
|
|
|
|
item = string_list_append_nodup(&args->extra_files, path);
|
|
|
|
item->util = info = xmalloc(sizeof(*info));
|
|
|
|
info->base = xstrdup_or_null(base);
|
|
|
|
if (stat(path, &info->stat))
|
|
|
|
die(_("File not found: %s"), path);
|
|
|
|
if (!S_ISREG(info->stat.st_mode))
|
|
|
|
die(_("Not a regular file: %s"), path);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-11-10 00:05:31 +08:00
|
|
|
static int number_callback(const struct option *opt, const char *arg, int unset)
|
|
|
|
{
|
|
|
|
BUG_ON_OPT_NEG(unset);
|
|
|
|
*(int *)opt->value = strtol(arg, NULL, 10);
|
|
|
|
return 0;
|
|
|
|
}
|
2008-07-25 18:41:26 +08:00
|
|
|
|
2008-07-25 18:41:22 +08:00
|
|
|
static int parse_archive_args(int argc, const char **argv,
|
archive: move file extension format-guessing lower
The process for guessing an archive output format based on
the filename is something like this:
a. parse --output in cmd_archive; check the filename
against a static set of mapping heuristics (right now
it just matches ".zip" for zip files).
b. if found, stick a fake "--format=zip" at the beginning
of the arguments list (if the user did specify a
--format manually, the later option will override our
fake one)
c. if it's a remote call, ship the arguments to the remote
(including the fake), which will call write_archive on
their end
d. if it's local, ship the arguments to write_archive
locally
There are two problems:
1. The set of mappings is static and at too high a level.
The write_archive level is going to check config for
user-defined formats, some of which will specify
extensions. We need to delay lookup until those are
parsed, so we can match against them.
2. For a remote archive call, our set of mappings (or
formats) may not match the remote side's. This is OK in
practice right now, because all versions of git
understand "zip" and "tar". But as new formats are
added, there is going to be a mismatch between what the
client can do and what the remote server can do.
To fix (1), this patch refactors the location guessing to
happen at the write_archive level, instead of the
cmd_archive level. So instead of sticking a fake --format
field in the argv list, we actually pass a "name hint" down
the callchain; this hint is used at the appropriate time to
guess the format (if one hasn't been given already).
This patch leaves (2) unfixed. The name_hint is converted to
a "--format" option as before, and passed to the remote.
This means the local side's idea of how extensions map to
formats will take precedence.
Another option would be to pass the name hint to the remote
side and let the remote choose. This isn't a good idea for
two reasons:
1. There's no room in the protocol for passing that
information. We can pass a new argument, but older
versions of git on the server will choke on it.
2. Letting the remote side decide creates a silent
inconsistency in user experience. Consider the case
that the locally installed git knows about the "tar.gz"
format, but a remote server doesn't.
Running "git archive -o foo.tar.gz" will use the tar.gz
format. If we use --remote, and the local side chooses
the format, then we send "--format=tar.gz" to the
remote, which will complain about the unknown format.
But if we let the remote side choose the format, then
it will realize that it doesn't know about "tar.gz" and
output uncompressed tar without even issuing a warning.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-06-22 09:24:48 +08:00
|
|
|
const struct archiver **ar, struct archiver_args *args,
|
2011-06-22 11:17:35 +08:00
|
|
|
const char *name_hint, int is_remote)
|
2008-07-25 18:41:22 +08:00
|
|
|
{
|
archive: move file extension format-guessing lower
The process for guessing an archive output format based on
the filename is something like this:
a. parse --output in cmd_archive; check the filename
against a static set of mapping heuristics (right now
it just matches ".zip" for zip files).
b. if found, stick a fake "--format=zip" at the beginning
of the arguments list (if the user did specify a
--format manually, the later option will override our
fake one)
c. if it's a remote call, ship the arguments to the remote
(including the fake), which will call write_archive on
their end
d. if it's local, ship the arguments to write_archive
locally
There are two problems:
1. The set of mappings is static and at too high a level.
The write_archive level is going to check config for
user-defined formats, some of which will specify
extensions. We need to delay lookup until those are
parsed, so we can match against them.
2. For a remote archive call, our set of mappings (or
formats) may not match the remote side's. This is OK in
practice right now, because all versions of git
understand "zip" and "tar". But as new formats are
added, there is going to be a mismatch between what the
client can do and what the remote server can do.
To fix (1), this patch refactors the location guessing to
happen at the write_archive level, instead of the
cmd_archive level. So instead of sticking a fake --format
field in the argv list, we actually pass a "name hint" down
the callchain; this hint is used at the appropriate time to
guess the format (if one hasn't been given already).
This patch leaves (2) unfixed. The name_hint is converted to
a "--format" option as before, and passed to the remote.
This means the local side's idea of how extensions map to
formats will take precedence.
Another option would be to pass the name hint to the remote
side and let the remote choose. This isn't a good idea for
two reasons:
1. There's no room in the protocol for passing that
information. We can pass a new argument, but older
versions of git on the server will choke on it.
2. Letting the remote side decide creates a silent
inconsistency in user experience. Consider the case
that the locally installed git knows about the "tar.gz"
format, but a remote server doesn't.
Running "git archive -o foo.tar.gz" will use the tar.gz
format. If we use --remote, and the local side chooses
the format, then we send "--format=tar.gz" to the
remote, which will complain about the unknown format.
But if we let the remote side choose the format, then
it will realize that it doesn't know about "tar.gz" and
output uncompressed tar without even issuing a warning.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-06-22 09:24:48 +08:00
|
|
|
const char *format = NULL;
|
2008-07-25 18:41:26 +08:00
|
|
|
const char *base = NULL;
|
|
|
|
const char *remote = NULL;
|
|
|
|
const char *exec = NULL;
|
2009-02-17 01:20:25 +08:00
|
|
|
const char *output = NULL;
|
2008-07-25 18:41:22 +08:00
|
|
|
int compression_level = -1;
|
|
|
|
int verbose = 0;
|
|
|
|
int i;
|
2008-07-25 18:41:26 +08:00
|
|
|
int list = 0;
|
2009-04-18 06:18:05 +08:00
|
|
|
int worktree_attributes = 0;
|
2008-07-25 18:41:26 +08:00
|
|
|
struct option opts[] = {
|
|
|
|
OPT_GROUP(""),
|
2012-08-20 20:31:51 +08:00
|
|
|
OPT_STRING(0, "format", &format, N_("fmt"), N_("archive format")),
|
|
|
|
OPT_STRING(0, "prefix", &base, N_("prefix"),
|
|
|
|
N_("prepend prefix to each pathname in the archive")),
|
2020-09-20 05:23:42 +08:00
|
|
|
{ OPTION_CALLBACK, 0, "add-file", args, N_("file"),
|
|
|
|
N_("add untracked file to archive"), 0, add_file_cb,
|
|
|
|
(intptr_t)&base },
|
2012-08-20 20:31:51 +08:00
|
|
|
OPT_STRING('o', "output", &output, N_("file"),
|
|
|
|
N_("write the archive to this file")),
|
2011-09-28 07:59:01 +08:00
|
|
|
OPT_BOOL(0, "worktree-attributes", &worktree_attributes,
|
2012-08-20 20:31:51 +08:00
|
|
|
N_("read .gitattributes in working directory")),
|
|
|
|
OPT__VERBOSE(&verbose, N_("report archived files on stderr")),
|
2020-11-10 00:05:31 +08:00
|
|
|
OPT_NUMBER_CALLBACK(&compression_level,
|
|
|
|
N_("set compression level"), number_callback),
|
2008-07-25 18:41:26 +08:00
|
|
|
OPT_GROUP(""),
|
2011-09-28 07:59:01 +08:00
|
|
|
OPT_BOOL('l', "list", &list,
|
2012-08-20 20:31:51 +08:00
|
|
|
N_("list supported archive formats")),
|
2008-07-25 18:41:26 +08:00
|
|
|
OPT_GROUP(""),
|
2012-08-20 20:31:51 +08:00
|
|
|
OPT_STRING(0, "remote", &remote, N_("repo"),
|
|
|
|
N_("retrieve the archive from remote repository <repo>")),
|
2012-08-20 20:32:54 +08:00
|
|
|
OPT_STRING(0, "exec", &exec, N_("command"),
|
2012-08-20 20:31:51 +08:00
|
|
|
N_("path to the remote git-upload-archive command")),
|
2008-07-25 18:41:26 +08:00
|
|
|
OPT_END()
|
|
|
|
};
|
|
|
|
|
2009-05-24 02:53:12 +08:00
|
|
|
argc = parse_options(argc, argv, NULL, opts, archive_usage, 0);
|
2008-07-25 18:41:26 +08:00
|
|
|
|
|
|
|
if (remote)
|
2016-08-08 19:16:00 +08:00
|
|
|
die(_("Unexpected option --remote"));
|
2008-07-25 18:41:26 +08:00
|
|
|
if (exec)
|
2016-08-08 19:16:00 +08:00
|
|
|
die(_("Option --exec can only be used together with --remote"));
|
2009-03-09 02:21:53 +08:00
|
|
|
if (output)
|
2016-08-08 19:16:00 +08:00
|
|
|
die(_("Unexpected option --output"));
|
2020-09-20 05:23:42 +08:00
|
|
|
if (is_remote && args->extra_files.nr)
|
|
|
|
die(_("Options --add-file and --remote cannot be used together"));
|
2008-07-25 18:41:26 +08:00
|
|
|
|
|
|
|
if (!base)
|
|
|
|
base = "";
|
|
|
|
|
|
|
|
if (list) {
|
2011-06-22 09:23:33 +08:00
|
|
|
for (i = 0; i < nr_archivers; i++)
|
2011-06-22 11:17:35 +08:00
|
|
|
if (!is_remote || archivers[i]->flags & ARCHIVER_REMOTE)
|
|
|
|
printf("%s\n", archivers[i]->name);
|
2008-07-25 18:41:26 +08:00
|
|
|
exit(0);
|
2008-07-25 18:41:22 +08:00
|
|
|
}
|
|
|
|
|
archive: move file extension format-guessing lower
The process for guessing an archive output format based on
the filename is something like this:
a. parse --output in cmd_archive; check the filename
against a static set of mapping heuristics (right now
it just matches ".zip" for zip files).
b. if found, stick a fake "--format=zip" at the beginning
of the arguments list (if the user did specify a
--format manually, the later option will override our
fake one)
c. if it's a remote call, ship the arguments to the remote
(including the fake), which will call write_archive on
their end
d. if it's local, ship the arguments to write_archive
locally
There are two problems:
1. The set of mappings is static and at too high a level.
The write_archive level is going to check config for
user-defined formats, some of which will specify
extensions. We need to delay lookup until those are
parsed, so we can match against them.
2. For a remote archive call, our set of mappings (or
formats) may not match the remote side's. This is OK in
practice right now, because all versions of git
understand "zip" and "tar". But as new formats are
added, there is going to be a mismatch between what the
client can do and what the remote server can do.
To fix (1), this patch refactors the location guessing to
happen at the write_archive level, instead of the
cmd_archive level. So instead of sticking a fake --format
field in the argv list, we actually pass a "name hint" down
the callchain; this hint is used at the appropriate time to
guess the format (if one hasn't been given already).
This patch leaves (2) unfixed. The name_hint is converted to
a "--format" option as before, and passed to the remote.
This means the local side's idea of how extensions map to
formats will take precedence.
Another option would be to pass the name hint to the remote
side and let the remote choose. This isn't a good idea for
two reasons:
1. There's no room in the protocol for passing that
information. We can pass a new argument, but older
versions of git on the server will choke on it.
2. Letting the remote side decide creates a silent
inconsistency in user experience. Consider the case
that the locally installed git knows about the "tar.gz"
format, but a remote server doesn't.
Running "git archive -o foo.tar.gz" will use the tar.gz
format. If we use --remote, and the local side chooses
the format, then we send "--format=tar.gz" to the
remote, which will complain about the unknown format.
But if we let the remote side choose the format, then
it will realize that it doesn't know about "tar.gz" and
output uncompressed tar without even issuing a warning.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-06-22 09:24:48 +08:00
|
|
|
if (!format && name_hint)
|
|
|
|
format = archive_format_from_filename(name_hint);
|
|
|
|
if (!format)
|
|
|
|
format = "tar";
|
|
|
|
|
2008-07-25 18:41:22 +08:00
|
|
|
/* We need at least one parameter -- tree-ish */
|
2008-07-25 18:41:26 +08:00
|
|
|
if (argc < 1)
|
|
|
|
usage_with_options(archive_usage, opts);
|
2008-07-25 18:41:22 +08:00
|
|
|
*ar = lookup_archiver(format);
|
2011-06-22 11:17:35 +08:00
|
|
|
if (!*ar || (is_remote && !((*ar)->flags & ARCHIVER_REMOTE)))
|
2016-08-08 19:16:00 +08:00
|
|
|
die(_("Unknown archive format '%s'"), format);
|
2008-07-25 18:41:22 +08:00
|
|
|
|
|
|
|
args->compression_level = Z_DEFAULT_COMPRESSION;
|
|
|
|
if (compression_level != -1) {
|
2020-11-10 00:05:31 +08:00
|
|
|
int levels_ok = (*ar)->flags & ARCHIVER_WANT_COMPRESSION_LEVELS;
|
|
|
|
int high_ok = (*ar)->flags & ARCHIVER_HIGH_COMPRESSION_LEVELS;
|
|
|
|
if (levels_ok && (compression_level <= 9 || high_ok))
|
2008-07-25 18:41:22 +08:00
|
|
|
args->compression_level = compression_level;
|
|
|
|
else {
|
2016-08-08 19:16:00 +08:00
|
|
|
die(_("Argument not supported for format '%s': -%d"),
|
2008-07-25 18:41:22 +08:00
|
|
|
format, compression_level);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
args->verbose = verbose;
|
|
|
|
args->base = base;
|
|
|
|
args->baselen = strlen(base);
|
2009-04-18 06:18:05 +08:00
|
|
|
args->worktree_attributes = worktree_attributes;
|
2008-07-25 18:41:22 +08:00
|
|
|
|
2008-07-25 18:41:26 +08:00
|
|
|
return argc;
|
2008-07-25 18:41:22 +08:00
|
|
|
}
|
|
|
|
|
2008-07-25 18:41:21 +08:00
|
|
|
int write_archive(int argc, const char **argv, const char *prefix,
|
2018-08-14 00:14:35 +08:00
|
|
|
struct repository *repo,
|
2016-11-23 05:37:04 +08:00
|
|
|
const char *name_hint, int remote)
|
2008-07-25 18:41:21 +08:00
|
|
|
{
|
|
|
|
const struct archiver *ar = NULL;
|
2021-02-28 19:22:47 +08:00
|
|
|
struct pretty_print_describe_status describe_status = {0};
|
|
|
|
struct pretty_print_context ctx = {0};
|
2008-07-25 18:41:21 +08:00
|
|
|
struct archiver_args args;
|
2020-09-20 05:23:42 +08:00
|
|
|
int rc;
|
2008-07-25 18:41:21 +08:00
|
|
|
|
2014-08-08 00:21:19 +08:00
|
|
|
git_config_get_bool("uploadarchive.allowunreachable", &remote_allow_unreachable);
|
|
|
|
git_config(git_default_config, NULL);
|
|
|
|
|
2021-02-28 19:22:47 +08:00
|
|
|
describe_status.max_invocations = 1;
|
|
|
|
ctx.date_mode.type = DATE_NORMAL;
|
|
|
|
ctx.abbrev = DEFAULT_ABBREV;
|
|
|
|
ctx.describe_status = &describe_status;
|
|
|
|
args.pretty_ctx = &ctx;
|
2018-08-14 00:14:35 +08:00
|
|
|
args.repo = repo;
|
2020-09-20 05:23:42 +08:00
|
|
|
args.prefix = prefix;
|
|
|
|
string_list_init(&args.extra_files, 1);
|
2011-06-22 11:17:35 +08:00
|
|
|
argc = parse_archive_args(argc, argv, &ar, &args, name_hint, remote);
|
2016-11-23 05:37:04 +08:00
|
|
|
if (!startup_info->have_repository) {
|
archive: reorder option parsing and config reading
The archive command does three things during its
initialization phase:
1. parse command-line options
2. setup the git directory
3. read config
During phase (1), if we see any options that do not require
a git directory (like "--list"), we handle them immediately
and exit, making it safe to abort step (2) if we are not in
a git directory.
Step (3) must come after step (2), since the git directory
may influence configuration. However, this leaves no
possibility of configuration from step (3) impacting the
command-line options in step (1) (which is useful, for
example, for supporting user-configurable output formats).
Instead, let's reorder this to:
1. setup the git directory, if it exists
2. read config
3. parse command-line options
4. if we are not in a git repository, die
This should have the same external behavior, but puts
configuration before command-line parsing.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-06-16 06:31:28 +08:00
|
|
|
/*
|
|
|
|
* We know this will die() with an error, so we could just
|
|
|
|
* die ourselves; but its error message will be more specific
|
|
|
|
* than what we could write here.
|
|
|
|
*/
|
|
|
|
setup_git_directory();
|
|
|
|
}
|
2008-07-25 18:41:21 +08:00
|
|
|
|
archive: don't let remote clients get unreachable commits
Usually git is careful not to allow clients to fetch
arbitrary objects from the database; for example, objects
received via upload-pack must be reachable from a ref.
Upload-archive breaks this by feeding the client's tree-ish
directly to get_sha1, which will accept arbitrary hex sha1s,
reflogs, etc.
This is not a problem if all of your objects are publicly
reachable anyway (or at least public to anybody who can run
upload-archive). Or if you are making the repo available by
dumb protocols like http or rsync (in which case the client
can read your whole object db directly).
But for sites which allow access only through smart
protocols, clients may be able to fetch trees from commits
that exist in the server's object database but are not
referenced (e.g., because history was rewound).
This patch tightens upload-archive's lookup to use dwim_ref
rather than get_sha1. This means a remote client can only
fetch the tip of a named ref, not an arbitrary sha1 or
reflog entry.
This also restricts some legitimate requests, too:
1. Reachable non-tip commits, like:
git archive --remote=$url v1.0~5
2. Sub-trees of reachable commits, like:
git archive --remote=$url v1.7.7:Documentation
Local requests continue to use get_sha1, and are not
restricted at all.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-11-18 07:04:22 +08:00
|
|
|
parse_treeish_arg(argv, &args, prefix, remote);
|
2008-07-25 18:41:21 +08:00
|
|
|
parse_pathspec_arg(argv + 1, &args);
|
|
|
|
|
2020-09-20 05:23:42 +08:00
|
|
|
rc = ar->write_archive(ar, &args);
|
|
|
|
|
|
|
|
string_list_clear_func(&args.extra_files, extra_file_info_clear);
|
2020-11-15 06:01:04 +08:00
|
|
|
free(args.refname);
|
2020-09-20 05:23:42 +08:00
|
|
|
|
|
|
|
return rc;
|
2008-07-25 18:41:21 +08:00
|
|
|
}
|
archive: move file extension format-guessing lower
The process for guessing an archive output format based on
the filename is something like this:
a. parse --output in cmd_archive; check the filename
against a static set of mapping heuristics (right now
it just matches ".zip" for zip files).
b. if found, stick a fake "--format=zip" at the beginning
of the arguments list (if the user did specify a
--format manually, the later option will override our
fake one)
c. if it's a remote call, ship the arguments to the remote
(including the fake), which will call write_archive on
their end
d. if it's local, ship the arguments to write_archive
locally
There are two problems:
1. The set of mappings is static and at too high a level.
The write_archive level is going to check config for
user-defined formats, some of which will specify
extensions. We need to delay lookup until those are
parsed, so we can match against them.
2. For a remote archive call, our set of mappings (or
formats) may not match the remote side's. This is OK in
practice right now, because all versions of git
understand "zip" and "tar". But as new formats are
added, there is going to be a mismatch between what the
client can do and what the remote server can do.
To fix (1), this patch refactors the location guessing to
happen at the write_archive level, instead of the
cmd_archive level. So instead of sticking a fake --format
field in the argv list, we actually pass a "name hint" down
the callchain; this hint is used at the appropriate time to
guess the format (if one hasn't been given already).
This patch leaves (2) unfixed. The name_hint is converted to
a "--format" option as before, and passed to the remote.
This means the local side's idea of how extensions map to
formats will take precedence.
Another option would be to pass the name hint to the remote
side and let the remote choose. This isn't a good idea for
two reasons:
1. There's no room in the protocol for passing that
information. We can pass a new argument, but older
versions of git on the server will choke on it.
2. Letting the remote side decide creates a silent
inconsistency in user experience. Consider the case
that the locally installed git knows about the "tar.gz"
format, but a remote server doesn't.
Running "git archive -o foo.tar.gz" will use the tar.gz
format. If we use --remote, and the local side chooses
the format, then we send "--format=tar.gz" to the
remote, which will complain about the unknown format.
But if we let the remote side choose the format, then
it will realize that it doesn't know about "tar.gz" and
output uncompressed tar without even issuing a warning.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-06-22 09:24:48 +08:00
|
|
|
|
2011-06-22 09:25:25 +08:00
|
|
|
static int match_extension(const char *filename, const char *ext)
|
|
|
|
{
|
|
|
|
int prefixlen = strlen(filename) - strlen(ext);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We need 1 character for the '.', and 1 character to ensure that the
|
|
|
|
* prefix is non-empty (k.e., we don't match .tar.gz with no actual
|
|
|
|
* filename).
|
|
|
|
*/
|
2013-10-16 06:27:17 +08:00
|
|
|
if (prefixlen < 2 || filename[prefixlen - 1] != '.')
|
2011-06-22 09:25:25 +08:00
|
|
|
return 0;
|
|
|
|
return !strcmp(filename + prefixlen, ext);
|
|
|
|
}
|
|
|
|
|
archive: move file extension format-guessing lower
The process for guessing an archive output format based on
the filename is something like this:
a. parse --output in cmd_archive; check the filename
against a static set of mapping heuristics (right now
it just matches ".zip" for zip files).
b. if found, stick a fake "--format=zip" at the beginning
of the arguments list (if the user did specify a
--format manually, the later option will override our
fake one)
c. if it's a remote call, ship the arguments to the remote
(including the fake), which will call write_archive on
their end
d. if it's local, ship the arguments to write_archive
locally
There are two problems:
1. The set of mappings is static and at too high a level.
The write_archive level is going to check config for
user-defined formats, some of which will specify
extensions. We need to delay lookup until those are
parsed, so we can match against them.
2. For a remote archive call, our set of mappings (or
formats) may not match the remote side's. This is OK in
practice right now, because all versions of git
understand "zip" and "tar". But as new formats are
added, there is going to be a mismatch between what the
client can do and what the remote server can do.
To fix (1), this patch refactors the location guessing to
happen at the write_archive level, instead of the
cmd_archive level. So instead of sticking a fake --format
field in the argv list, we actually pass a "name hint" down
the callchain; this hint is used at the appropriate time to
guess the format (if one hasn't been given already).
This patch leaves (2) unfixed. The name_hint is converted to
a "--format" option as before, and passed to the remote.
This means the local side's idea of how extensions map to
formats will take precedence.
Another option would be to pass the name hint to the remote
side and let the remote choose. This isn't a good idea for
two reasons:
1. There's no room in the protocol for passing that
information. We can pass a new argument, but older
versions of git on the server will choke on it.
2. Letting the remote side decide creates a silent
inconsistency in user experience. Consider the case
that the locally installed git knows about the "tar.gz"
format, but a remote server doesn't.
Running "git archive -o foo.tar.gz" will use the tar.gz
format. If we use --remote, and the local side chooses
the format, then we send "--format=tar.gz" to the
remote, which will complain about the unknown format.
But if we let the remote side choose the format, then
it will realize that it doesn't know about "tar.gz" and
output uncompressed tar without even issuing a warning.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-06-22 09:24:48 +08:00
|
|
|
const char *archive_format_from_filename(const char *filename)
|
|
|
|
{
|
2011-06-22 09:25:25 +08:00
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < nr_archivers; i++)
|
|
|
|
if (match_extension(filename, archivers[i]->name))
|
|
|
|
return archivers[i]->name;
|
archive: move file extension format-guessing lower
The process for guessing an archive output format based on
the filename is something like this:
a. parse --output in cmd_archive; check the filename
against a static set of mapping heuristics (right now
it just matches ".zip" for zip files).
b. if found, stick a fake "--format=zip" at the beginning
of the arguments list (if the user did specify a
--format manually, the later option will override our
fake one)
c. if it's a remote call, ship the arguments to the remote
(including the fake), which will call write_archive on
their end
d. if it's local, ship the arguments to write_archive
locally
There are two problems:
1. The set of mappings is static and at too high a level.
The write_archive level is going to check config for
user-defined formats, some of which will specify
extensions. We need to delay lookup until those are
parsed, so we can match against them.
2. For a remote archive call, our set of mappings (or
formats) may not match the remote side's. This is OK in
practice right now, because all versions of git
understand "zip" and "tar". But as new formats are
added, there is going to be a mismatch between what the
client can do and what the remote server can do.
To fix (1), this patch refactors the location guessing to
happen at the write_archive level, instead of the
cmd_archive level. So instead of sticking a fake --format
field in the argv list, we actually pass a "name hint" down
the callchain; this hint is used at the appropriate time to
guess the format (if one hasn't been given already).
This patch leaves (2) unfixed. The name_hint is converted to
a "--format" option as before, and passed to the remote.
This means the local side's idea of how extensions map to
formats will take precedence.
Another option would be to pass the name hint to the remote
side and let the remote choose. This isn't a good idea for
two reasons:
1. There's no room in the protocol for passing that
information. We can pass a new argument, but older
versions of git on the server will choke on it.
2. Letting the remote side decide creates a silent
inconsistency in user experience. Consider the case
that the locally installed git knows about the "tar.gz"
format, but a remote server doesn't.
Running "git archive -o foo.tar.gz" will use the tar.gz
format. If we use --remote, and the local side chooses
the format, then we send "--format=tar.gz" to the
remote, which will complain about the unknown format.
But if we let the remote side choose the format, then
it will realize that it doesn't know about "tar.gz" and
output uncompressed tar without even issuing a warning.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2011-06-22 09:24:48 +08:00
|
|
|
return NULL;
|
|
|
|
}
|