2008-04-24 03:17:43 +08:00
|
|
|
#!/bin/sh
|
|
|
|
|
|
|
|
test_description='git cat-file'
|
|
|
|
|
2024-08-14 14:52:00 +08:00
|
|
|
TEST_PASSES_SANITIZE_LEAK=true
|
2008-04-24 03:17:43 +08:00
|
|
|
. ./test-lib.sh
|
|
|
|
|
2021-12-28 21:28:41 +08:00
|
|
|
test_cmdmode_usage () {
|
|
|
|
test_expect_code 129 "$@" 2>err &&
|
2023-11-26 19:57:43 +08:00
|
|
|
grep "^error: .* cannot be used together" err
|
2021-12-28 21:28:41 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
for switches in \
|
|
|
|
'-e -p' \
|
|
|
|
'-p -t' \
|
|
|
|
'-t -s' \
|
|
|
|
'-s --textconv' \
|
2021-12-28 21:28:46 +08:00
|
|
|
'--textconv --filters' \
|
|
|
|
'--batch-all-objects -e'
|
2021-12-28 21:28:41 +08:00
|
|
|
do
|
|
|
|
test_expect_success "usage: cmdmode $switches" '
|
|
|
|
test_cmdmode_usage git cat-file $switches
|
|
|
|
'
|
|
|
|
done
|
|
|
|
|
|
|
|
test_incompatible_usage () {
|
|
|
|
test_expect_code 129 "$@" 2>err &&
|
2021-12-28 21:28:47 +08:00
|
|
|
grep -E "^(fatal|error):.*(requires|incompatible with|needs)" err
|
2021-12-28 21:28:41 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
for opt in --batch --batch-check
|
|
|
|
do
|
|
|
|
test_expect_success "usage: incompatible options: --path with $opt" '
|
|
|
|
test_incompatible_usage git cat-file --path=foo $opt
|
|
|
|
'
|
|
|
|
done
|
|
|
|
|
2021-12-28 21:28:47 +08:00
|
|
|
test_missing_usage () {
|
|
|
|
test_expect_code 129 "$@" 2>err &&
|
|
|
|
grep -E "^fatal:.*required" err
|
|
|
|
}
|
|
|
|
|
2021-12-28 21:28:41 +08:00
|
|
|
short_modes="-e -p -t -s"
|
|
|
|
cw_modes="--textconv --filters"
|
|
|
|
|
|
|
|
for opt in $cw_modes
|
|
|
|
do
|
|
|
|
test_expect_success "usage: $opt requires another option" '
|
2021-12-28 21:28:47 +08:00
|
|
|
test_missing_usage git cat-file $opt
|
2021-12-28 21:28:41 +08:00
|
|
|
'
|
|
|
|
done
|
|
|
|
|
|
|
|
for opt in $short_modes
|
|
|
|
do
|
|
|
|
test_expect_success "usage: $opt requires another option" '
|
2021-12-28 21:28:47 +08:00
|
|
|
test_missing_usage git cat-file $opt
|
2021-12-28 21:28:41 +08:00
|
|
|
'
|
|
|
|
|
|
|
|
for opt2 in --batch \
|
|
|
|
--batch-check \
|
2021-12-28 21:28:47 +08:00
|
|
|
--follow-symlinks \
|
|
|
|
"--path=foo HEAD:some-path.txt"
|
2021-12-28 21:28:41 +08:00
|
|
|
do
|
2021-12-28 21:28:47 +08:00
|
|
|
test_expect_success "usage: incompatible options: $opt and $opt2" '
|
2021-12-28 21:28:41 +08:00
|
|
|
test_incompatible_usage git cat-file $opt $opt2
|
|
|
|
'
|
|
|
|
done
|
|
|
|
done
|
|
|
|
|
2021-12-28 21:28:47 +08:00
|
|
|
test_too_many_arguments () {
|
|
|
|
test_expect_code 129 "$@" 2>err &&
|
|
|
|
grep -E "^fatal: too many arguments$" err
|
|
|
|
}
|
|
|
|
|
2021-12-28 21:28:41 +08:00
|
|
|
for opt in $short_modes $cw_modes
|
|
|
|
do
|
|
|
|
args="one two three"
|
|
|
|
test_expect_success "usage: too many arguments: $opt $args" '
|
2021-12-28 21:28:47 +08:00
|
|
|
test_too_many_arguments git cat-file $opt $args
|
2021-12-28 21:28:41 +08:00
|
|
|
'
|
|
|
|
|
|
|
|
for opt2 in --buffer --follow-symlinks
|
|
|
|
do
|
|
|
|
test_expect_success "usage: incompatible arguments: $opt with batch option $opt2" '
|
2021-12-28 21:28:47 +08:00
|
|
|
test_incompatible_usage git cat-file $opt $opt2
|
2021-12-28 21:28:41 +08:00
|
|
|
'
|
|
|
|
done
|
|
|
|
done
|
|
|
|
|
|
|
|
for opt in --buffer \
|
|
|
|
--follow-symlinks \
|
builtin/cat-file.c: support NUL-delimited input with `-z`
When callers are using `cat-file` via one of the stdin-driven `--batch`
modes, all input is newline-delimited. This presents a problem when
callers wish to ask about, e.g. tree-entries that have a newline
character present in their filename.
To support this niche scenario, introduce a new `-z` mode to the
`--batch`, `--batch-check`, and `--batch-command` suite of options that
instructs `cat-file` to treat its input as NUL-delimited, allowing the
individual commands themselves to have newlines present.
The refactoring here is slightly unfortunate, since we turn loops like:
while (strbuf_getline(&buf, stdin) != EOF)
into:
while (1) {
int ret;
if (opt->nul_terminated)
ret = strbuf_getline_nul(&input, stdin);
else
ret = strbuf_getline(&input, stdin);
if (ret == EOF)
break;
}
It's tempting to think that we could use `strbuf_getwholeline()` and
specify either `\n` or `\0` as the terminating character. But for input
on platforms that include a CR character preceeding the LF, this
wouldn't quite be the same, since `strbuf_getline(...)` will trim any
trailing CR, while `strbuf_getwholeline(&buf, stdin, '\n')` will not.
In the future, we could clean this up further by introducing a variant
of `strbuf_getwholeline()` that addresses the aforementioned gap, but
that approach felt too heavy-handed for this pair of uses.
Some tests are added in t1006 to ensure that `cat-file` produces the
same output in `--batch`, `--batch-check`, and `--batch-command` modes
with and without the new `-z` option.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-07-23 07:29:05 +08:00
|
|
|
--batch-all-objects \
|
cat-file: add option '-Z' that delimits input and output with NUL
In db9d67f2e9 (builtin/cat-file.c: support NUL-delimited input with
`-z`, 2022-07-22), we have introduced a new mode to read the input via
NUL-delimited records instead of newline-delimited records. This allows
the user to query for revisions that have newlines in their path
component. While unusual, such queries are perfectly valid and thus it
is clear that we should be able to support them properly.
Unfortunately, the commit only changed the input to be NUL-delimited,
but didn't change the output at the same time. While this is fine for
queries that are processed successfully, it is less so for queries that
aren't. In the case of missing commits for example the result can become
entirely unparsable:
```
$ printf "7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10\n1234567890\n\n\commit000" |
git cat-file --batch -z
7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10
1234567890
commit missing
```
This is of course a crafted query that is intentionally gaming the
deficiency, but more benign queries that contain newlines would have
similar problems.
Ideally, we should have also changed the output to be NUL-delimited when
`-z` is specified to avoid this problem. As the input is NUL-delimited,
it is clear that the output in this case cannot ever contain NUL
characters by itself. Furthermore, Git does not allow NUL characters in
revisions anyway, further stressing the point that using NUL-delimited
output is safe. The only exception is of course the object data itself,
but as git-cat-file(1) prints the size of the object data clients should
read until that specified size has been consumed.
But even though `-z` has only been introduced a few releases ago in Git
v2.38.0, changing the output format retroactively to also NUL-delimit
output would be a backwards incompatible change. And while one could
make the argument that the output is inherently broken already, we need
to assume that there are existing users out there that use it just fine
given that revisions containing newlines are quite exotic.
Instead, introduce a new option `-Z` that switches to NUL-delimited
input and output. While this new option could arguably only switch the
output format to be NUL-delimited, the consequence would be that users
have to always specify both `-z` and `-Z` when the input may contain
newlines. On the other hand, if the user knows that there never will be
newlines in the input, they don't have to use either of those options.
There is thus no usecase that would warrant treating input and output
format separately, which is why we instead opt to "do the right thing"
and have `-Z` mean to NUL-terminate both formats.
The old `-z` option is marked as deprecated with a hint that its output
may become unparsable. It is thus hidden both from the synopsis as well
as the command's help output.
Co-authored-by: Toon Claes <toon@iotcl.com>
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-06 13:19:45 +08:00
|
|
|
-z \
|
|
|
|
-Z
|
2021-12-28 21:28:41 +08:00
|
|
|
do
|
2021-12-28 21:28:47 +08:00
|
|
|
test_expect_success "usage: bad option combination: $opt without batch mode" '
|
|
|
|
test_incompatible_usage git cat-file $opt &&
|
|
|
|
test_incompatible_usage git cat-file $opt commit HEAD
|
2021-12-28 21:28:41 +08:00
|
|
|
'
|
|
|
|
done
|
|
|
|
|
2008-04-24 03:17:43 +08:00
|
|
|
echo_without_newline () {
|
|
|
|
printf '%s' "$*"
|
|
|
|
}
|
|
|
|
|
builtin/cat-file.c: support NUL-delimited input with `-z`
When callers are using `cat-file` via one of the stdin-driven `--batch`
modes, all input is newline-delimited. This presents a problem when
callers wish to ask about, e.g. tree-entries that have a newline
character present in their filename.
To support this niche scenario, introduce a new `-z` mode to the
`--batch`, `--batch-check`, and `--batch-command` suite of options that
instructs `cat-file` to treat its input as NUL-delimited, allowing the
individual commands themselves to have newlines present.
The refactoring here is slightly unfortunate, since we turn loops like:
while (strbuf_getline(&buf, stdin) != EOF)
into:
while (1) {
int ret;
if (opt->nul_terminated)
ret = strbuf_getline_nul(&input, stdin);
else
ret = strbuf_getline(&input, stdin);
if (ret == EOF)
break;
}
It's tempting to think that we could use `strbuf_getwholeline()` and
specify either `\n` or `\0` as the terminating character. But for input
on platforms that include a CR character preceeding the LF, this
wouldn't quite be the same, since `strbuf_getline(...)` will trim any
trailing CR, while `strbuf_getwholeline(&buf, stdin, '\n')` will not.
In the future, we could clean this up further by introducing a variant
of `strbuf_getwholeline()` that addresses the aforementioned gap, but
that approach felt too heavy-handed for this pair of uses.
Some tests are added in t1006 to ensure that `cat-file` produces the
same output in `--batch`, `--batch-check`, and `--batch-command` modes
with and without the new `-z` option.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-07-23 07:29:05 +08:00
|
|
|
echo_without_newline_nul () {
|
|
|
|
echo_without_newline "$@" | tr '\n' '\0'
|
|
|
|
}
|
|
|
|
|
2008-04-24 03:17:43 +08:00
|
|
|
strlen () {
|
|
|
|
echo_without_newline "$1" | wc -c | sed -e 's/^ *//'
|
|
|
|
}
|
|
|
|
|
|
|
|
run_tests () {
|
|
|
|
type=$1
|
2023-10-02 10:40:32 +08:00
|
|
|
oid=$2
|
2008-04-24 03:17:43 +08:00
|
|
|
size=$3
|
|
|
|
content=$4
|
|
|
|
pretty_content=$5
|
|
|
|
|
2023-10-02 10:40:32 +08:00
|
|
|
batch_output="$oid $type $size
|
2008-04-24 03:17:47 +08:00
|
|
|
$content"
|
|
|
|
|
2008-04-24 03:17:43 +08:00
|
|
|
test_expect_success "$type exists" '
|
2023-10-02 10:40:32 +08:00
|
|
|
git cat-file -e $oid
|
2008-04-24 03:17:43 +08:00
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success "Type of $type is correct" '
|
2013-07-10 19:36:43 +08:00
|
|
|
echo $type >expect &&
|
2023-10-02 10:40:32 +08:00
|
|
|
git cat-file -t $oid >actual &&
|
2013-07-10 19:36:43 +08:00
|
|
|
test_cmp expect actual
|
2008-04-24 03:17:43 +08:00
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success "Size of $type is correct" '
|
2013-07-10 19:36:43 +08:00
|
|
|
echo $size >expect &&
|
2023-10-02 10:40:32 +08:00
|
|
|
git cat-file -s $oid >actual &&
|
2013-07-10 19:36:43 +08:00
|
|
|
test_cmp expect actual
|
2008-04-24 03:17:43 +08:00
|
|
|
'
|
|
|
|
|
2015-05-03 22:30:02 +08:00
|
|
|
test_expect_success "Type of $type is correct using --allow-unknown-type" '
|
|
|
|
echo $type >expect &&
|
2023-10-02 10:40:32 +08:00
|
|
|
git cat-file -t --allow-unknown-type $oid >actual &&
|
2015-05-03 22:30:02 +08:00
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success "Size of $type is correct using --allow-unknown-type" '
|
|
|
|
echo $size >expect &&
|
2023-10-02 10:40:32 +08:00
|
|
|
git cat-file -s --allow-unknown-type $oid >actual &&
|
2015-05-03 22:30:02 +08:00
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
2008-04-24 03:17:43 +08:00
|
|
|
test -z "$content" ||
|
|
|
|
test_expect_success "Content of $type is correct" '
|
2023-06-06 13:19:29 +08:00
|
|
|
echo_without_newline "$content" >expect &&
|
2023-10-02 10:40:32 +08:00
|
|
|
git cat-file $type $oid >actual &&
|
2013-07-10 19:36:43 +08:00
|
|
|
test_cmp expect actual
|
2008-04-24 03:17:43 +08:00
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success "Pretty content of $type is correct" '
|
2023-06-06 13:19:29 +08:00
|
|
|
echo_without_newline "$pretty_content" >expect &&
|
2023-10-02 10:40:32 +08:00
|
|
|
git cat-file -p $oid >actual &&
|
2013-07-10 19:36:43 +08:00
|
|
|
test_cmp expect actual
|
2008-04-24 03:17:43 +08:00
|
|
|
'
|
2008-04-24 03:17:46 +08:00
|
|
|
|
2008-04-24 03:17:47 +08:00
|
|
|
test -z "$content" ||
|
|
|
|
test_expect_success "--batch output of $type is correct" '
|
2023-06-06 13:19:29 +08:00
|
|
|
echo "$batch_output" >expect &&
|
2023-10-02 10:40:32 +08:00
|
|
|
echo $oid | git cat-file --batch >actual &&
|
2013-07-10 19:36:43 +08:00
|
|
|
test_cmp expect actual
|
2008-04-24 03:17:47 +08:00
|
|
|
'
|
|
|
|
|
2008-04-24 03:17:46 +08:00
|
|
|
test_expect_success "--batch-check output of $type is correct" '
|
2023-10-02 10:40:32 +08:00
|
|
|
echo "$oid $type $size" >expect &&
|
|
|
|
echo_without_newline $oid | git cat-file --batch-check >actual &&
|
2013-07-10 19:36:43 +08:00
|
|
|
test_cmp expect actual
|
2008-04-24 03:17:46 +08:00
|
|
|
'
|
2013-07-10 19:45:47 +08:00
|
|
|
|
cat-file: add --batch-command mode
Add a new flag --batch-command that accepts commands and arguments
from stdin, similar to git-update-ref --stdin.
At GitLab, we use a pair of long running cat-file processes when
accessing object content. One for iterating over object metadata with
--batch-check, and the other to grab object contents with --batch.
However, if we had --batch-command, we wouldn't need to keep both
processes around, and instead just have one --batch-command process
where we can flip between getting object info, and getting object
contents. Since we have a pair of cat-file processes per repository,
this means we can get rid of roughly half of long lived git cat-file
processes. Given there are many repositories being accessed at any given
time, this can lead to huge savings.
git cat-file --batch-command
will enter an interactive command mode whereby the user can enter in
commands and their arguments that get queued in memory:
<command1> [arg1] [arg2] LF
<command2> [arg1] [arg2] LF
When --buffer mode is used, commands will be queued in memory until a
flush command is issued that execute them:
flush LF
The reason for a flush command is that when a consumer process (A)
talks to a git cat-file process (B) and interactively writes to and
reads from it in --buffer mode, (A) needs to be able to control when
the buffer is flushed to stdout.
Currently, from (A)'s perspective, the only way is to either
1. kill (B)'s process
2. send an invalid object to stdin.
1. is not ideal from a performance perspective as it will require
spawning a new cat-file process each time, and 2. is hacky and not a
good long term solution.
With this mechanism of queueing up commands and letting (A) issue a
flush command, process (A) can control when the buffer is flushed and
can guarantee it will receive all of the output when in --buffer mode.
--batch-command also will not allow (B) to flush to stdout until a flush
is received.
This patch adds the basic structure for adding command which can be
extended in the future to add more commands. It also adds the following
two commands (on top of the flush command):
contents <object> LF
info <object> LF
The contents command takes an <object> argument and prints out the object
contents.
The info command takes an <object> argument and prints out the object
metadata.
These can be used in the following way with --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
flush LF
info <object> LF
flush LF
When used without --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
info <object> LF
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: John Cai <johncai86@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-02-19 02:23:14 +08:00
|
|
|
for opt in --buffer --no-buffer
|
|
|
|
do
|
|
|
|
test -z "$content" ||
|
|
|
|
test_expect_success "--batch-command $opt output of $type content is correct" '
|
2023-06-06 13:19:29 +08:00
|
|
|
echo "$batch_output" >expect &&
|
2023-10-02 10:40:32 +08:00
|
|
|
test_write_lines "contents $oid" | git cat-file --batch-command $opt >actual &&
|
cat-file: add --batch-command mode
Add a new flag --batch-command that accepts commands and arguments
from stdin, similar to git-update-ref --stdin.
At GitLab, we use a pair of long running cat-file processes when
accessing object content. One for iterating over object metadata with
--batch-check, and the other to grab object contents with --batch.
However, if we had --batch-command, we wouldn't need to keep both
processes around, and instead just have one --batch-command process
where we can flip between getting object info, and getting object
contents. Since we have a pair of cat-file processes per repository,
this means we can get rid of roughly half of long lived git cat-file
processes. Given there are many repositories being accessed at any given
time, this can lead to huge savings.
git cat-file --batch-command
will enter an interactive command mode whereby the user can enter in
commands and their arguments that get queued in memory:
<command1> [arg1] [arg2] LF
<command2> [arg1] [arg2] LF
When --buffer mode is used, commands will be queued in memory until a
flush command is issued that execute them:
flush LF
The reason for a flush command is that when a consumer process (A)
talks to a git cat-file process (B) and interactively writes to and
reads from it in --buffer mode, (A) needs to be able to control when
the buffer is flushed to stdout.
Currently, from (A)'s perspective, the only way is to either
1. kill (B)'s process
2. send an invalid object to stdin.
1. is not ideal from a performance perspective as it will require
spawning a new cat-file process each time, and 2. is hacky and not a
good long term solution.
With this mechanism of queueing up commands and letting (A) issue a
flush command, process (A) can control when the buffer is flushed and
can guarantee it will receive all of the output when in --buffer mode.
--batch-command also will not allow (B) to flush to stdout until a flush
is received.
This patch adds the basic structure for adding command which can be
extended in the future to add more commands. It also adds the following
two commands (on top of the flush command):
contents <object> LF
info <object> LF
The contents command takes an <object> argument and prints out the object
contents.
The info command takes an <object> argument and prints out the object
metadata.
These can be used in the following way with --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
flush LF
info <object> LF
flush LF
When used without --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
info <object> LF
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: John Cai <johncai86@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-02-19 02:23:14 +08:00
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success "--batch-command $opt output of $type info is correct" '
|
2023-10-02 10:40:32 +08:00
|
|
|
echo "$oid $type $size" >expect &&
|
|
|
|
test_write_lines "info $oid" |
|
cat-file: add --batch-command mode
Add a new flag --batch-command that accepts commands and arguments
from stdin, similar to git-update-ref --stdin.
At GitLab, we use a pair of long running cat-file processes when
accessing object content. One for iterating over object metadata with
--batch-check, and the other to grab object contents with --batch.
However, if we had --batch-command, we wouldn't need to keep both
processes around, and instead just have one --batch-command process
where we can flip between getting object info, and getting object
contents. Since we have a pair of cat-file processes per repository,
this means we can get rid of roughly half of long lived git cat-file
processes. Given there are many repositories being accessed at any given
time, this can lead to huge savings.
git cat-file --batch-command
will enter an interactive command mode whereby the user can enter in
commands and their arguments that get queued in memory:
<command1> [arg1] [arg2] LF
<command2> [arg1] [arg2] LF
When --buffer mode is used, commands will be queued in memory until a
flush command is issued that execute them:
flush LF
The reason for a flush command is that when a consumer process (A)
talks to a git cat-file process (B) and interactively writes to and
reads from it in --buffer mode, (A) needs to be able to control when
the buffer is flushed to stdout.
Currently, from (A)'s perspective, the only way is to either
1. kill (B)'s process
2. send an invalid object to stdin.
1. is not ideal from a performance perspective as it will require
spawning a new cat-file process each time, and 2. is hacky and not a
good long term solution.
With this mechanism of queueing up commands and letting (A) issue a
flush command, process (A) can control when the buffer is flushed and
can guarantee it will receive all of the output when in --buffer mode.
--batch-command also will not allow (B) to flush to stdout until a flush
is received.
This patch adds the basic structure for adding command which can be
extended in the future to add more commands. It also adds the following
two commands (on top of the flush command):
contents <object> LF
info <object> LF
The contents command takes an <object> argument and prints out the object
contents.
The info command takes an <object> argument and prints out the object
metadata.
These can be used in the following way with --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
flush LF
info <object> LF
flush LF
When used without --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
info <object> LF
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: John Cai <johncai86@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-02-19 02:23:14 +08:00
|
|
|
git cat-file --batch-command $opt >actual &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
done
|
|
|
|
|
2013-07-10 19:45:47 +08:00
|
|
|
test_expect_success "custom --batch-check format" '
|
2023-10-02 10:40:32 +08:00
|
|
|
echo "$type $oid" >expect &&
|
|
|
|
echo $oid | git cat-file --batch-check="%(objecttype) %(objectname)" >actual &&
|
2013-07-10 19:45:47 +08:00
|
|
|
test_cmp expect actual
|
|
|
|
'
|
cat-file: only split on whitespace when %(rest) is used
Commit c334b87b (cat-file: split --batch input lines on whitespace,
2013-07-11) taught `cat-file --batch-check` to split input lines on
the first whitespace, and stash everything after the first token
into the %(rest) output format element. It claimed:
Object names cannot contain spaces, so any input with
spaces would have resulted in a "missing" line.
But that is not correct. Refs, object sha1s, and various peeling
suffixes cannot contain spaces, but some object names can. In
particular:
1. Tree paths like "[<tree>]:path with whitespace"
2. Reflog specifications like "@{2 days ago}"
3. Commit searches like "rev^{/grep me}" or ":/grep me"
To remain backwards compatible, we cannot split on whitespace by
default, hence we will ship 1.8.4 with the commit reverted.
Resurrect its attempt but in a weaker form; only do the splitting
when "%(rest)" is used in the output format. Since that element did
not exist at all before c334b87, old scripts cannot be affected.
The existence of object names with spaces does mean that you
cannot reliably do:
echo ":path with space and other data" |
git cat-file --batch-check="%(objectname) %(rest)"
as it would split the path and feed only ":path" to get_sha1. But
that command is nonsensical. If you wanted to see "and other data"
in "%(rest)", git cannot possibly know where the filename ends and
the "rest" begins.
It might be more robust to have something like "-z" to separate the
input elements. But this patch is still a reasonable step before
having that. It makes the easy cases easy; people who do not care
about %(rest) do not have to consider it, and the %(rest) code
handles the spaces and newlines of "rev-list --objects" correctly.
Hard cases remain hard but possible (if you might get whitespace in
your input, you do not get to use %(rest) and must split and join
the output yourself using more flexible tools). And most
importantly, it does not preclude us from having different splitting
rules later if a "-z" (or similar) option is added. So we can make
the hard cases easier later, if we choose to.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-02 19:59:07 +08:00
|
|
|
|
cat-file: add --batch-command mode
Add a new flag --batch-command that accepts commands and arguments
from stdin, similar to git-update-ref --stdin.
At GitLab, we use a pair of long running cat-file processes when
accessing object content. One for iterating over object metadata with
--batch-check, and the other to grab object contents with --batch.
However, if we had --batch-command, we wouldn't need to keep both
processes around, and instead just have one --batch-command process
where we can flip between getting object info, and getting object
contents. Since we have a pair of cat-file processes per repository,
this means we can get rid of roughly half of long lived git cat-file
processes. Given there are many repositories being accessed at any given
time, this can lead to huge savings.
git cat-file --batch-command
will enter an interactive command mode whereby the user can enter in
commands and their arguments that get queued in memory:
<command1> [arg1] [arg2] LF
<command2> [arg1] [arg2] LF
When --buffer mode is used, commands will be queued in memory until a
flush command is issued that execute them:
flush LF
The reason for a flush command is that when a consumer process (A)
talks to a git cat-file process (B) and interactively writes to and
reads from it in --buffer mode, (A) needs to be able to control when
the buffer is flushed to stdout.
Currently, from (A)'s perspective, the only way is to either
1. kill (B)'s process
2. send an invalid object to stdin.
1. is not ideal from a performance perspective as it will require
spawning a new cat-file process each time, and 2. is hacky and not a
good long term solution.
With this mechanism of queueing up commands and letting (A) issue a
flush command, process (A) can control when the buffer is flushed and
can guarantee it will receive all of the output when in --buffer mode.
--batch-command also will not allow (B) to flush to stdout until a flush
is received.
This patch adds the basic structure for adding command which can be
extended in the future to add more commands. It also adds the following
two commands (on top of the flush command):
contents <object> LF
info <object> LF
The contents command takes an <object> argument and prints out the object
contents.
The info command takes an <object> argument and prints out the object
metadata.
These can be used in the following way with --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
flush LF
info <object> LF
flush LF
When used without --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
info <object> LF
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: John Cai <johncai86@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-02-19 02:23:14 +08:00
|
|
|
test_expect_success "custom --batch-command format" '
|
2023-10-02 10:40:32 +08:00
|
|
|
echo "$type $oid" >expect &&
|
|
|
|
echo "info $oid" | git cat-file --batch-command="%(objecttype) %(objectname)" >actual &&
|
cat-file: add --batch-command mode
Add a new flag --batch-command that accepts commands and arguments
from stdin, similar to git-update-ref --stdin.
At GitLab, we use a pair of long running cat-file processes when
accessing object content. One for iterating over object metadata with
--batch-check, and the other to grab object contents with --batch.
However, if we had --batch-command, we wouldn't need to keep both
processes around, and instead just have one --batch-command process
where we can flip between getting object info, and getting object
contents. Since we have a pair of cat-file processes per repository,
this means we can get rid of roughly half of long lived git cat-file
processes. Given there are many repositories being accessed at any given
time, this can lead to huge savings.
git cat-file --batch-command
will enter an interactive command mode whereby the user can enter in
commands and their arguments that get queued in memory:
<command1> [arg1] [arg2] LF
<command2> [arg1] [arg2] LF
When --buffer mode is used, commands will be queued in memory until a
flush command is issued that execute them:
flush LF
The reason for a flush command is that when a consumer process (A)
talks to a git cat-file process (B) and interactively writes to and
reads from it in --buffer mode, (A) needs to be able to control when
the buffer is flushed to stdout.
Currently, from (A)'s perspective, the only way is to either
1. kill (B)'s process
2. send an invalid object to stdin.
1. is not ideal from a performance perspective as it will require
spawning a new cat-file process each time, and 2. is hacky and not a
good long term solution.
With this mechanism of queueing up commands and letting (A) issue a
flush command, process (A) can control when the buffer is flushed and
can guarantee it will receive all of the output when in --buffer mode.
--batch-command also will not allow (B) to flush to stdout until a flush
is received.
This patch adds the basic structure for adding command which can be
extended in the future to add more commands. It also adds the following
two commands (on top of the flush command):
contents <object> LF
info <object> LF
The contents command takes an <object> argument and prints out the object
contents.
The info command takes an <object> argument and prints out the object
metadata.
These can be used in the following way with --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
flush LF
info <object> LF
flush LF
When used without --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
info <object> LF
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: John Cai <johncai86@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-02-19 02:23:14 +08:00
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
cat-file: only split on whitespace when %(rest) is used
Commit c334b87b (cat-file: split --batch input lines on whitespace,
2013-07-11) taught `cat-file --batch-check` to split input lines on
the first whitespace, and stash everything after the first token
into the %(rest) output format element. It claimed:
Object names cannot contain spaces, so any input with
spaces would have resulted in a "missing" line.
But that is not correct. Refs, object sha1s, and various peeling
suffixes cannot contain spaces, but some object names can. In
particular:
1. Tree paths like "[<tree>]:path with whitespace"
2. Reflog specifications like "@{2 days ago}"
3. Commit searches like "rev^{/grep me}" or ":/grep me"
To remain backwards compatible, we cannot split on whitespace by
default, hence we will ship 1.8.4 with the commit reverted.
Resurrect its attempt but in a weaker form; only do the splitting
when "%(rest)" is used in the output format. Since that element did
not exist at all before c334b87, old scripts cannot be affected.
The existence of object names with spaces does mean that you
cannot reliably do:
echo ":path with space and other data" |
git cat-file --batch-check="%(objectname) %(rest)"
as it would split the path and feed only ":path" to get_sha1. But
that command is nonsensical. If you wanted to see "and other data"
in "%(rest)", git cannot possibly know where the filename ends and
the "rest" begins.
It might be more robust to have something like "-z" to separate the
input elements. But this patch is still a reasonable step before
having that. It makes the easy cases easy; people who do not care
about %(rest) do not have to consider it, and the %(rest) code
handles the spaces and newlines of "rev-list --objects" correctly.
Hard cases remain hard but possible (if you might get whitespace in
your input, you do not get to use %(rest) and must split and join
the output yourself using more flexible tools). And most
importantly, it does not preclude us from having different splitting
rules later if a "-z" (or similar) option is added. So we can make
the hard cases easier later, if we choose to.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-02 19:59:07 +08:00
|
|
|
test_expect_success '--batch-check with %(rest)' '
|
|
|
|
echo "$type this is some extra content" >expect &&
|
2023-10-02 10:40:32 +08:00
|
|
|
echo "$oid this is some extra content" |
|
cat-file: only split on whitespace when %(rest) is used
Commit c334b87b (cat-file: split --batch input lines on whitespace,
2013-07-11) taught `cat-file --batch-check` to split input lines on
the first whitespace, and stash everything after the first token
into the %(rest) output format element. It claimed:
Object names cannot contain spaces, so any input with
spaces would have resulted in a "missing" line.
But that is not correct. Refs, object sha1s, and various peeling
suffixes cannot contain spaces, but some object names can. In
particular:
1. Tree paths like "[<tree>]:path with whitespace"
2. Reflog specifications like "@{2 days ago}"
3. Commit searches like "rev^{/grep me}" or ":/grep me"
To remain backwards compatible, we cannot split on whitespace by
default, hence we will ship 1.8.4 with the commit reverted.
Resurrect its attempt but in a weaker form; only do the splitting
when "%(rest)" is used in the output format. Since that element did
not exist at all before c334b87, old scripts cannot be affected.
The existence of object names with spaces does mean that you
cannot reliably do:
echo ":path with space and other data" |
git cat-file --batch-check="%(objectname) %(rest)"
as it would split the path and feed only ":path" to get_sha1. But
that command is nonsensical. If you wanted to see "and other data"
in "%(rest)", git cannot possibly know where the filename ends and
the "rest" begins.
It might be more robust to have something like "-z" to separate the
input elements. But this patch is still a reasonable step before
having that. It makes the easy cases easy; people who do not care
about %(rest) do not have to consider it, and the %(rest) code
handles the spaces and newlines of "rev-list --objects" correctly.
Hard cases remain hard but possible (if you might get whitespace in
your input, you do not get to use %(rest) and must split and join
the output yourself using more flexible tools). And most
importantly, it does not preclude us from having different splitting
rules later if a "-z" (or similar) option is added. So we can make
the hard cases easier later, if we choose to.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-02 19:59:07 +08:00
|
|
|
git cat-file --batch-check="%(objecttype) %(rest)" >actual &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
cat-file: handle --batch format with missing type/size
Commit 98e2092 taught cat-file to stream blobs with --batch,
which requires that we look up the object type before
loading it into memory. As a result, we now print the
object header from information in sha1_object_info, and the
actual contents from the read_sha1_file. We double-check
that the information we printed in the header matches the
content we are about to show.
Later, commit 93d2a60 allowed custom header lines for
--batch, and commit 5b08640 made type lookups optional. As a
result, specifying a header line without the type or size
means that we will not look up those items at all.
This causes our double-checking to erroneously die with an
error; we think the type or size has changed, when in fact
it was simply left at "0".
For the size, we can fix this by only doing the consistency
double-check when we have retrieved the size via
sha1_object_info. In the case that we have not retrieved the
value, that means we also did not print it, so there is
nothing for us to check that we are consistent with.
We could do the same for the type. However, besides our
consistency check, we also care about the type in deciding
whether to stream or not. So instead of handling the case
where we do not know the type, this patch instead makes sure
that we always trigger a type lookup when we are printing,
so that even a format without the type will stream as we
would in the normal case.
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-12-12 07:15:50 +08:00
|
|
|
|
|
|
|
test -z "$content" ||
|
|
|
|
test_expect_success "--batch without type ($type)" '
|
|
|
|
{
|
|
|
|
echo "$size" &&
|
2023-06-06 13:19:29 +08:00
|
|
|
echo "$content"
|
cat-file: handle --batch format with missing type/size
Commit 98e2092 taught cat-file to stream blobs with --batch,
which requires that we look up the object type before
loading it into memory. As a result, we now print the
object header from information in sha1_object_info, and the
actual contents from the read_sha1_file. We double-check
that the information we printed in the header matches the
content we are about to show.
Later, commit 93d2a60 allowed custom header lines for
--batch, and commit 5b08640 made type lookups optional. As a
result, specifying a header line without the type or size
means that we will not look up those items at all.
This causes our double-checking to erroneously die with an
error; we think the type or size has changed, when in fact
it was simply left at "0".
For the size, we can fix this by only doing the consistency
double-check when we have retrieved the size via
sha1_object_info. In the case that we have not retrieved the
value, that means we also did not print it, so there is
nothing for us to check that we are consistent with.
We could do the same for the type. However, besides our
consistency check, we also care about the type in deciding
whether to stream or not. So instead of handling the case
where we do not know the type, this patch instead makes sure
that we always trigger a type lookup when we are printing,
so that even a format without the type will stream as we
would in the normal case.
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-12-12 07:15:50 +08:00
|
|
|
} >expect &&
|
2023-10-02 10:40:32 +08:00
|
|
|
echo $oid | git cat-file --batch="%(objectsize)" >actual &&
|
cat-file: handle --batch format with missing type/size
Commit 98e2092 taught cat-file to stream blobs with --batch,
which requires that we look up the object type before
loading it into memory. As a result, we now print the
object header from information in sha1_object_info, and the
actual contents from the read_sha1_file. We double-check
that the information we printed in the header matches the
content we are about to show.
Later, commit 93d2a60 allowed custom header lines for
--batch, and commit 5b08640 made type lookups optional. As a
result, specifying a header line without the type or size
means that we will not look up those items at all.
This causes our double-checking to erroneously die with an
error; we think the type or size has changed, when in fact
it was simply left at "0".
For the size, we can fix this by only doing the consistency
double-check when we have retrieved the size via
sha1_object_info. In the case that we have not retrieved the
value, that means we also did not print it, so there is
nothing for us to check that we are consistent with.
We could do the same for the type. However, besides our
consistency check, we also care about the type in deciding
whether to stream or not. So instead of handling the case
where we do not know the type, this patch instead makes sure
that we always trigger a type lookup when we are printing,
so that even a format without the type will stream as we
would in the normal case.
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-12-12 07:15:50 +08:00
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test -z "$content" ||
|
|
|
|
test_expect_success "--batch without size ($type)" '
|
|
|
|
{
|
|
|
|
echo "$type" &&
|
2023-06-06 13:19:29 +08:00
|
|
|
echo "$content"
|
cat-file: handle --batch format with missing type/size
Commit 98e2092 taught cat-file to stream blobs with --batch,
which requires that we look up the object type before
loading it into memory. As a result, we now print the
object header from information in sha1_object_info, and the
actual contents from the read_sha1_file. We double-check
that the information we printed in the header matches the
content we are about to show.
Later, commit 93d2a60 allowed custom header lines for
--batch, and commit 5b08640 made type lookups optional. As a
result, specifying a header line without the type or size
means that we will not look up those items at all.
This causes our double-checking to erroneously die with an
error; we think the type or size has changed, when in fact
it was simply left at "0".
For the size, we can fix this by only doing the consistency
double-check when we have retrieved the size via
sha1_object_info. In the case that we have not retrieved the
value, that means we also did not print it, so there is
nothing for us to check that we are consistent with.
We could do the same for the type. However, besides our
consistency check, we also care about the type in deciding
whether to stream or not. So instead of handling the case
where we do not know the type, this patch instead makes sure
that we always trigger a type lookup when we are printing,
so that even a format without the type will stream as we
would in the normal case.
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-12-12 07:15:50 +08:00
|
|
|
} >expect &&
|
2023-10-02 10:40:32 +08:00
|
|
|
echo $oid | git cat-file --batch="%(objecttype)" >actual &&
|
cat-file: handle --batch format with missing type/size
Commit 98e2092 taught cat-file to stream blobs with --batch,
which requires that we look up the object type before
loading it into memory. As a result, we now print the
object header from information in sha1_object_info, and the
actual contents from the read_sha1_file. We double-check
that the information we printed in the header matches the
content we are about to show.
Later, commit 93d2a60 allowed custom header lines for
--batch, and commit 5b08640 made type lookups optional. As a
result, specifying a header line without the type or size
means that we will not look up those items at all.
This causes our double-checking to erroneously die with an
error; we think the type or size has changed, when in fact
it was simply left at "0".
For the size, we can fix this by only doing the consistency
double-check when we have retrieved the size via
sha1_object_info. In the case that we have not retrieved the
value, that means we also did not print it, so there is
nothing for us to check that we are consistent with.
We could do the same for the type. However, besides our
consistency check, we also care about the type in deciding
whether to stream or not. So instead of handling the case
where we do not know the type, this patch instead makes sure
that we always trigger a type lookup when we are printing,
so that even a format without the type will stream as we
would in the normal case.
Reviewed-by: Jonathan Nieder <jrnieder@gmail.com>
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-12-12 07:15:50 +08:00
|
|
|
test_cmp expect actual
|
|
|
|
'
|
2008-04-24 03:17:43 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
hello_content="Hello World"
|
|
|
|
hello_size=$(strlen "$hello_content")
|
2023-10-02 10:40:32 +08:00
|
|
|
hello_oid=$(echo_without_newline "$hello_content" | git hash-object --stdin)
|
2008-04-24 03:17:43 +08:00
|
|
|
|
|
|
|
test_expect_success "setup" '
|
2023-10-02 10:40:33 +08:00
|
|
|
git config core.repositoryformatversion 1 &&
|
|
|
|
git config extensions.objectformat $test_hash_algo &&
|
|
|
|
git config extensions.compatobjectformat $test_compat_hash_algo &&
|
2008-04-24 03:17:43 +08:00
|
|
|
echo_without_newline "$hello_content" > hello &&
|
|
|
|
git update-index --add hello
|
|
|
|
'
|
|
|
|
|
2023-10-02 10:40:33 +08:00
|
|
|
run_blob_tests () {
|
|
|
|
oid=$1
|
2008-04-24 03:17:43 +08:00
|
|
|
|
2023-10-02 10:40:33 +08:00
|
|
|
run_tests 'blob' $oid $hello_size "$hello_content" "$hello_content"
|
|
|
|
|
|
|
|
test_expect_success '--batch-command --buffer with flush for blob info' '
|
|
|
|
echo "$oid blob $hello_size" >expect &&
|
|
|
|
test_write_lines "info $oid" "flush" |
|
cat-file: add --batch-command mode
Add a new flag --batch-command that accepts commands and arguments
from stdin, similar to git-update-ref --stdin.
At GitLab, we use a pair of long running cat-file processes when
accessing object content. One for iterating over object metadata with
--batch-check, and the other to grab object contents with --batch.
However, if we had --batch-command, we wouldn't need to keep both
processes around, and instead just have one --batch-command process
where we can flip between getting object info, and getting object
contents. Since we have a pair of cat-file processes per repository,
this means we can get rid of roughly half of long lived git cat-file
processes. Given there are many repositories being accessed at any given
time, this can lead to huge savings.
git cat-file --batch-command
will enter an interactive command mode whereby the user can enter in
commands and their arguments that get queued in memory:
<command1> [arg1] [arg2] LF
<command2> [arg1] [arg2] LF
When --buffer mode is used, commands will be queued in memory until a
flush command is issued that execute them:
flush LF
The reason for a flush command is that when a consumer process (A)
talks to a git cat-file process (B) and interactively writes to and
reads from it in --buffer mode, (A) needs to be able to control when
the buffer is flushed to stdout.
Currently, from (A)'s perspective, the only way is to either
1. kill (B)'s process
2. send an invalid object to stdin.
1. is not ideal from a performance perspective as it will require
spawning a new cat-file process each time, and 2. is hacky and not a
good long term solution.
With this mechanism of queueing up commands and letting (A) issue a
flush command, process (A) can control when the buffer is flushed and
can guarantee it will receive all of the output when in --buffer mode.
--batch-command also will not allow (B) to flush to stdout until a flush
is received.
This patch adds the basic structure for adding command which can be
extended in the future to add more commands. It also adds the following
two commands (on top of the flush command):
contents <object> LF
info <object> LF
The contents command takes an <object> argument and prints out the object
contents.
The info command takes an <object> argument and prints out the object
metadata.
These can be used in the following way with --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
flush LF
info <object> LF
flush LF
When used without --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
info <object> LF
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: John Cai <johncai86@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-02-19 02:23:14 +08:00
|
|
|
GIT_TEST_CAT_FILE_NO_FLUSH_ON_EXIT=1 \
|
|
|
|
git cat-file --batch-command --buffer >actual &&
|
|
|
|
test_cmp expect actual
|
2023-10-02 10:40:33 +08:00
|
|
|
'
|
cat-file: add --batch-command mode
Add a new flag --batch-command that accepts commands and arguments
from stdin, similar to git-update-ref --stdin.
At GitLab, we use a pair of long running cat-file processes when
accessing object content. One for iterating over object metadata with
--batch-check, and the other to grab object contents with --batch.
However, if we had --batch-command, we wouldn't need to keep both
processes around, and instead just have one --batch-command process
where we can flip between getting object info, and getting object
contents. Since we have a pair of cat-file processes per repository,
this means we can get rid of roughly half of long lived git cat-file
processes. Given there are many repositories being accessed at any given
time, this can lead to huge savings.
git cat-file --batch-command
will enter an interactive command mode whereby the user can enter in
commands and their arguments that get queued in memory:
<command1> [arg1] [arg2] LF
<command2> [arg1] [arg2] LF
When --buffer mode is used, commands will be queued in memory until a
flush command is issued that execute them:
flush LF
The reason for a flush command is that when a consumer process (A)
talks to a git cat-file process (B) and interactively writes to and
reads from it in --buffer mode, (A) needs to be able to control when
the buffer is flushed to stdout.
Currently, from (A)'s perspective, the only way is to either
1. kill (B)'s process
2. send an invalid object to stdin.
1. is not ideal from a performance perspective as it will require
spawning a new cat-file process each time, and 2. is hacky and not a
good long term solution.
With this mechanism of queueing up commands and letting (A) issue a
flush command, process (A) can control when the buffer is flushed and
can guarantee it will receive all of the output when in --buffer mode.
--batch-command also will not allow (B) to flush to stdout until a flush
is received.
This patch adds the basic structure for adding command which can be
extended in the future to add more commands. It also adds the following
two commands (on top of the flush command):
contents <object> LF
info <object> LF
The contents command takes an <object> argument and prints out the object
contents.
The info command takes an <object> argument and prints out the object
metadata.
These can be used in the following way with --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
flush LF
info <object> LF
flush LF
When used without --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
info <object> LF
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: John Cai <johncai86@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-02-19 02:23:14 +08:00
|
|
|
|
2023-10-02 10:40:33 +08:00
|
|
|
test_expect_success '--batch-command --buffer without flush for blob info' '
|
cat-file: add --batch-command mode
Add a new flag --batch-command that accepts commands and arguments
from stdin, similar to git-update-ref --stdin.
At GitLab, we use a pair of long running cat-file processes when
accessing object content. One for iterating over object metadata with
--batch-check, and the other to grab object contents with --batch.
However, if we had --batch-command, we wouldn't need to keep both
processes around, and instead just have one --batch-command process
where we can flip between getting object info, and getting object
contents. Since we have a pair of cat-file processes per repository,
this means we can get rid of roughly half of long lived git cat-file
processes. Given there are many repositories being accessed at any given
time, this can lead to huge savings.
git cat-file --batch-command
will enter an interactive command mode whereby the user can enter in
commands and their arguments that get queued in memory:
<command1> [arg1] [arg2] LF
<command2> [arg1] [arg2] LF
When --buffer mode is used, commands will be queued in memory until a
flush command is issued that execute them:
flush LF
The reason for a flush command is that when a consumer process (A)
talks to a git cat-file process (B) and interactively writes to and
reads from it in --buffer mode, (A) needs to be able to control when
the buffer is flushed to stdout.
Currently, from (A)'s perspective, the only way is to either
1. kill (B)'s process
2. send an invalid object to stdin.
1. is not ideal from a performance perspective as it will require
spawning a new cat-file process each time, and 2. is hacky and not a
good long term solution.
With this mechanism of queueing up commands and letting (A) issue a
flush command, process (A) can control when the buffer is flushed and
can guarantee it will receive all of the output when in --buffer mode.
--batch-command also will not allow (B) to flush to stdout until a flush
is received.
This patch adds the basic structure for adding command which can be
extended in the future to add more commands. It also adds the following
two commands (on top of the flush command):
contents <object> LF
info <object> LF
The contents command takes an <object> argument and prints out the object
contents.
The info command takes an <object> argument and prints out the object
metadata.
These can be used in the following way with --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
flush LF
info <object> LF
flush LF
When used without --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
info <object> LF
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: John Cai <johncai86@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-02-19 02:23:14 +08:00
|
|
|
touch output &&
|
2023-10-02 10:40:33 +08:00
|
|
|
test_write_lines "info $oid" |
|
cat-file: add --batch-command mode
Add a new flag --batch-command that accepts commands and arguments
from stdin, similar to git-update-ref --stdin.
At GitLab, we use a pair of long running cat-file processes when
accessing object content. One for iterating over object metadata with
--batch-check, and the other to grab object contents with --batch.
However, if we had --batch-command, we wouldn't need to keep both
processes around, and instead just have one --batch-command process
where we can flip between getting object info, and getting object
contents. Since we have a pair of cat-file processes per repository,
this means we can get rid of roughly half of long lived git cat-file
processes. Given there are many repositories being accessed at any given
time, this can lead to huge savings.
git cat-file --batch-command
will enter an interactive command mode whereby the user can enter in
commands and their arguments that get queued in memory:
<command1> [arg1] [arg2] LF
<command2> [arg1] [arg2] LF
When --buffer mode is used, commands will be queued in memory until a
flush command is issued that execute them:
flush LF
The reason for a flush command is that when a consumer process (A)
talks to a git cat-file process (B) and interactively writes to and
reads from it in --buffer mode, (A) needs to be able to control when
the buffer is flushed to stdout.
Currently, from (A)'s perspective, the only way is to either
1. kill (B)'s process
2. send an invalid object to stdin.
1. is not ideal from a performance perspective as it will require
spawning a new cat-file process each time, and 2. is hacky and not a
good long term solution.
With this mechanism of queueing up commands and letting (A) issue a
flush command, process (A) can control when the buffer is flushed and
can guarantee it will receive all of the output when in --buffer mode.
--batch-command also will not allow (B) to flush to stdout until a flush
is received.
This patch adds the basic structure for adding command which can be
extended in the future to add more commands. It also adds the following
two commands (on top of the flush command):
contents <object> LF
info <object> LF
The contents command takes an <object> argument and prints out the object
contents.
The info command takes an <object> argument and prints out the object
metadata.
These can be used in the following way with --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
flush LF
info <object> LF
flush LF
When used without --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
info <object> LF
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: John Cai <johncai86@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-02-19 02:23:14 +08:00
|
|
|
GIT_TEST_CAT_FILE_NO_FLUSH_ON_EXIT=1 \
|
|
|
|
git cat-file --batch-command --buffer >>output &&
|
|
|
|
test_must_be_empty output
|
2023-10-02 10:40:33 +08:00
|
|
|
'
|
|
|
|
}
|
|
|
|
|
|
|
|
hello_compat_oid=$(git rev-parse --output-object-format=$test_compat_hash_algo $hello_oid)
|
|
|
|
run_blob_tests $hello_oid
|
|
|
|
run_blob_tests $hello_compat_oid
|
cat-file: add --batch-command mode
Add a new flag --batch-command that accepts commands and arguments
from stdin, similar to git-update-ref --stdin.
At GitLab, we use a pair of long running cat-file processes when
accessing object content. One for iterating over object metadata with
--batch-check, and the other to grab object contents with --batch.
However, if we had --batch-command, we wouldn't need to keep both
processes around, and instead just have one --batch-command process
where we can flip between getting object info, and getting object
contents. Since we have a pair of cat-file processes per repository,
this means we can get rid of roughly half of long lived git cat-file
processes. Given there are many repositories being accessed at any given
time, this can lead to huge savings.
git cat-file --batch-command
will enter an interactive command mode whereby the user can enter in
commands and their arguments that get queued in memory:
<command1> [arg1] [arg2] LF
<command2> [arg1] [arg2] LF
When --buffer mode is used, commands will be queued in memory until a
flush command is issued that execute them:
flush LF
The reason for a flush command is that when a consumer process (A)
talks to a git cat-file process (B) and interactively writes to and
reads from it in --buffer mode, (A) needs to be able to control when
the buffer is flushed to stdout.
Currently, from (A)'s perspective, the only way is to either
1. kill (B)'s process
2. send an invalid object to stdin.
1. is not ideal from a performance perspective as it will require
spawning a new cat-file process each time, and 2. is hacky and not a
good long term solution.
With this mechanism of queueing up commands and letting (A) issue a
flush command, process (A) can control when the buffer is flushed and
can guarantee it will receive all of the output when in --buffer mode.
--batch-command also will not allow (B) to flush to stdout until a flush
is received.
This patch adds the basic structure for adding command which can be
extended in the future to add more commands. It also adds the following
two commands (on top of the flush command):
contents <object> LF
info <object> LF
The contents command takes an <object> argument and prints out the object
contents.
The info command takes an <object> argument and prints out the object
metadata.
These can be used in the following way with --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
flush LF
info <object> LF
flush LF
When used without --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
info <object> LF
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: John Cai <johncai86@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-02-19 02:23:14 +08:00
|
|
|
|
cat-file: only split on whitespace when %(rest) is used
Commit c334b87b (cat-file: split --batch input lines on whitespace,
2013-07-11) taught `cat-file --batch-check` to split input lines on
the first whitespace, and stash everything after the first token
into the %(rest) output format element. It claimed:
Object names cannot contain spaces, so any input with
spaces would have resulted in a "missing" line.
But that is not correct. Refs, object sha1s, and various peeling
suffixes cannot contain spaces, but some object names can. In
particular:
1. Tree paths like "[<tree>]:path with whitespace"
2. Reflog specifications like "@{2 days ago}"
3. Commit searches like "rev^{/grep me}" or ":/grep me"
To remain backwards compatible, we cannot split on whitespace by
default, hence we will ship 1.8.4 with the commit reverted.
Resurrect its attempt but in a weaker form; only do the splitting
when "%(rest)" is used in the output format. Since that element did
not exist at all before c334b87, old scripts cannot be affected.
The existence of object names with spaces does mean that you
cannot reliably do:
echo ":path with space and other data" |
git cat-file --batch-check="%(objectname) %(rest)"
as it would split the path and feed only ":path" to get_sha1. But
that command is nonsensical. If you wanted to see "and other data"
in "%(rest)", git cannot possibly know where the filename ends and
the "rest" begins.
It might be more robust to have something like "-z" to separate the
input elements. But this patch is still a reasonable step before
having that. It makes the easy cases easy; people who do not care
about %(rest) do not have to consider it, and the %(rest) code
handles the spaces and newlines of "rev-list --objects" correctly.
Hard cases remain hard but possible (if you might get whitespace in
your input, you do not get to use %(rest) and must split and join
the output yourself using more flexible tools). And most
importantly, it does not preclude us from having different splitting
rules later if a "-z" (or similar) option is added. So we can make
the hard cases easier later, if we choose to.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-02 19:59:07 +08:00
|
|
|
test_expect_success '--batch-check without %(rest) considers whole line' '
|
2023-10-02 10:40:32 +08:00
|
|
|
echo "$hello_oid blob $hello_size" >expect &&
|
|
|
|
git update-index --add --cacheinfo 100644 $hello_oid "white space" &&
|
cat-file: only split on whitespace when %(rest) is used
Commit c334b87b (cat-file: split --batch input lines on whitespace,
2013-07-11) taught `cat-file --batch-check` to split input lines on
the first whitespace, and stash everything after the first token
into the %(rest) output format element. It claimed:
Object names cannot contain spaces, so any input with
spaces would have resulted in a "missing" line.
But that is not correct. Refs, object sha1s, and various peeling
suffixes cannot contain spaces, but some object names can. In
particular:
1. Tree paths like "[<tree>]:path with whitespace"
2. Reflog specifications like "@{2 days ago}"
3. Commit searches like "rev^{/grep me}" or ":/grep me"
To remain backwards compatible, we cannot split on whitespace by
default, hence we will ship 1.8.4 with the commit reverted.
Resurrect its attempt but in a weaker form; only do the splitting
when "%(rest)" is used in the output format. Since that element did
not exist at all before c334b87, old scripts cannot be affected.
The existence of object names with spaces does mean that you
cannot reliably do:
echo ":path with space and other data" |
git cat-file --batch-check="%(objectname) %(rest)"
as it would split the path and feed only ":path" to get_sha1. But
that command is nonsensical. If you wanted to see "and other data"
in "%(rest)", git cannot possibly know where the filename ends and
the "rest" begins.
It might be more robust to have something like "-z" to separate the
input elements. But this patch is still a reasonable step before
having that. It makes the easy cases easy; people who do not care
about %(rest) do not have to consider it, and the %(rest) code
handles the spaces and newlines of "rev-list --objects" correctly.
Hard cases remain hard but possible (if you might get whitespace in
your input, you do not get to use %(rest) and must split and join
the output yourself using more flexible tools). And most
importantly, it does not preclude us from having different splitting
rules later if a "-z" (or similar) option is added. So we can make
the hard cases easier later, if we choose to.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-08-02 19:59:07 +08:00
|
|
|
test_when_finished "git update-index --remove \"white space\"" &&
|
|
|
|
echo ":white space" | git cat-file --batch-check >actual &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
2023-10-02 10:40:32 +08:00
|
|
|
tree_oid=$(git write-tree)
|
2023-10-02 10:40:33 +08:00
|
|
|
tree_compat_oid=$(git rev-parse --output-object-format=$test_compat_hash_algo $tree_oid)
|
2018-09-13 13:17:37 +08:00
|
|
|
tree_size=$(($(test_oid rawsz) + 13))
|
2023-10-02 10:40:33 +08:00
|
|
|
tree_compat_size=$(($(test_oid --hash=compat rawsz) + 13))
|
2023-10-02 10:40:32 +08:00
|
|
|
tree_pretty_content="100644 blob $hello_oid hello${LF}"
|
2023-10-02 10:40:33 +08:00
|
|
|
tree_compat_pretty_content="100644 blob $hello_compat_oid hello${LF}"
|
2008-04-24 03:17:43 +08:00
|
|
|
|
2023-10-02 10:40:32 +08:00
|
|
|
run_tests 'tree' $tree_oid $tree_size "" "$tree_pretty_content"
|
2023-10-02 10:40:33 +08:00
|
|
|
run_tests 'tree' $tree_compat_oid $tree_compat_size "" "$tree_compat_pretty_content"
|
2008-04-24 03:17:43 +08:00
|
|
|
|
2013-04-12 06:36:10 +08:00
|
|
|
commit_message="Initial commit"
|
2023-10-02 10:40:32 +08:00
|
|
|
commit_oid=$(echo_without_newline "$commit_message" | git commit-tree $tree_oid)
|
2023-10-02 10:40:33 +08:00
|
|
|
commit_compat_oid=$(git rev-parse --output-object-format=$test_compat_hash_algo $commit_oid)
|
2018-09-13 13:17:37 +08:00
|
|
|
commit_size=$(($(test_oid hexsz) + 137))
|
2023-10-02 10:40:33 +08:00
|
|
|
commit_compat_size=$(($(test_oid --hash=compat hexsz) + 137))
|
2023-10-02 10:40:32 +08:00
|
|
|
commit_content="tree $tree_oid
|
2023-06-06 13:19:29 +08:00
|
|
|
author $GIT_AUTHOR_NAME <$GIT_AUTHOR_EMAIL> $GIT_AUTHOR_DATE
|
|
|
|
committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
|
2008-04-24 03:17:43 +08:00
|
|
|
|
|
|
|
$commit_message"
|
|
|
|
|
2023-10-02 10:40:33 +08:00
|
|
|
commit_compat_content="tree $tree_compat_oid
|
2023-06-06 13:19:29 +08:00
|
|
|
author $GIT_AUTHOR_NAME <$GIT_AUTHOR_EMAIL> $GIT_AUTHOR_DATE
|
|
|
|
committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
|
2008-04-24 03:17:43 +08:00
|
|
|
|
|
|
|
$commit_message"
|
|
|
|
|
2023-10-02 10:40:32 +08:00
|
|
|
run_tests 'commit' $commit_oid $commit_size "$commit_content" "$commit_content"
|
2023-10-02 10:40:33 +08:00
|
|
|
run_tests 'commit' $commit_compat_oid $commit_compat_size "$commit_compat_content" "$commit_compat_content"
|
2008-04-24 03:17:43 +08:00
|
|
|
|
2023-10-02 10:40:33 +08:00
|
|
|
tag_header_without_oid="type blob
|
2008-04-24 03:17:43 +08:00
|
|
|
tag hellotag
|
|
|
|
tagger $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL>"
|
2023-10-02 10:40:33 +08:00
|
|
|
tag_header_without_timestamp="object $hello_oid
|
|
|
|
$tag_header_without_oid"
|
|
|
|
tag_compat_header_without_timestamp="object $hello_compat_oid
|
|
|
|
$tag_header_without_oid"
|
2008-04-24 03:17:43 +08:00
|
|
|
tag_description="This is a tag"
|
2023-01-19 04:35:52 +08:00
|
|
|
tag_content="$tag_header_without_timestamp 0 +0000
|
2008-04-24 03:17:43 +08:00
|
|
|
|
|
|
|
$tag_description"
|
2023-10-02 10:40:33 +08:00
|
|
|
tag_compat_content="$tag_compat_header_without_timestamp 0 +0000
|
2008-04-24 03:17:43 +08:00
|
|
|
|
|
|
|
$tag_description"
|
|
|
|
|
2023-10-02 10:40:32 +08:00
|
|
|
tag_oid=$(echo_without_newline "$tag_content" | git hash-object -t tag --stdin -w)
|
2008-04-24 03:17:43 +08:00
|
|
|
tag_size=$(strlen "$tag_content")
|
|
|
|
|
2023-10-02 10:40:33 +08:00
|
|
|
tag_compat_oid=$(git rev-parse --output-object-format=$test_compat_hash_algo $tag_oid)
|
|
|
|
tag_compat_size=$(strlen "$tag_compat_content")
|
|
|
|
|
2023-10-02 10:40:32 +08:00
|
|
|
run_tests 'tag' $tag_oid $tag_size "$tag_content" "$tag_content"
|
2023-10-02 10:40:33 +08:00
|
|
|
run_tests 'tag' $tag_compat_oid $tag_compat_size "$tag_compat_content" "$tag_compat_content"
|
2008-04-24 03:17:43 +08:00
|
|
|
|
2023-06-06 13:19:33 +08:00
|
|
|
test_expect_success "Reach a blob from a tag pointing to it" '
|
|
|
|
echo_without_newline "$hello_content" >expect &&
|
2023-10-02 10:40:32 +08:00
|
|
|
git cat-file blob $tag_oid >actual &&
|
2023-06-06 13:19:33 +08:00
|
|
|
test_cmp expect actual
|
|
|
|
'
|
2008-04-24 03:17:43 +08:00
|
|
|
|
2023-10-02 10:40:33 +08:00
|
|
|
for oid in $hello_oid $hello_compat_oid
|
2008-04-24 03:17:46 +08:00
|
|
|
do
|
2023-10-02 10:40:33 +08:00
|
|
|
for batch in batch batch-check batch-command
|
2008-04-24 03:17:47 +08:00
|
|
|
do
|
2023-10-02 10:40:33 +08:00
|
|
|
for opt in t s e p
|
|
|
|
do
|
2008-04-24 03:17:47 +08:00
|
|
|
test_expect_success "Passing -$opt with --$batch fails" '
|
2023-10-02 10:40:33 +08:00
|
|
|
test_must_fail git cat-file --$batch -$opt $oid
|
2008-04-24 03:17:47 +08:00
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success "Passing --$batch with -$opt fails" '
|
2023-10-02 10:40:33 +08:00
|
|
|
test_must_fail git cat-file -$opt --$batch $oid
|
2008-04-24 03:17:47 +08:00
|
|
|
'
|
2023-10-02 10:40:33 +08:00
|
|
|
done
|
2008-04-24 03:17:47 +08:00
|
|
|
|
2023-10-02 10:40:33 +08:00
|
|
|
test_expect_success "Passing <type> with --$batch fails" '
|
|
|
|
test_must_fail git cat-file --$batch blob $oid
|
|
|
|
'
|
2008-04-24 03:17:46 +08:00
|
|
|
|
2023-10-02 10:40:33 +08:00
|
|
|
test_expect_success "Passing --$batch with <type> fails" '
|
|
|
|
test_must_fail git cat-file blob --$batch $oid
|
|
|
|
'
|
2008-04-24 03:17:46 +08:00
|
|
|
|
2023-10-02 10:40:33 +08:00
|
|
|
test_expect_success "Passing oid with --$batch fails" '
|
|
|
|
test_must_fail git cat-file --$batch $oid
|
|
|
|
'
|
|
|
|
done
|
2008-04-24 03:17:47 +08:00
|
|
|
done
|
2008-04-24 03:17:46 +08:00
|
|
|
|
2023-10-02 10:40:33 +08:00
|
|
|
for oid in $hello_oid $hello_compat_oid
|
2015-05-21 01:03:40 +08:00
|
|
|
do
|
2023-10-02 10:40:33 +08:00
|
|
|
for opt in t s e p
|
|
|
|
do
|
|
|
|
test_expect_success "Passing -$opt with --follow-symlinks fails" '
|
|
|
|
test_must_fail git cat-file --follow-symlinks -$opt $oid
|
2015-05-21 01:03:40 +08:00
|
|
|
'
|
2023-10-02 10:40:33 +08:00
|
|
|
done
|
2015-05-21 01:03:40 +08:00
|
|
|
done
|
|
|
|
|
2008-06-09 08:02:21 +08:00
|
|
|
test_expect_success "--batch-check for a non-existent named object" '
|
2023-06-06 13:19:33 +08:00
|
|
|
cat >expect <<-EOF &&
|
|
|
|
foobar42 missing
|
|
|
|
foobar84 missing
|
|
|
|
EOF
|
|
|
|
|
|
|
|
printf "foobar42\nfoobar84" >in &&
|
|
|
|
git cat-file --batch-check <in >actual &&
|
|
|
|
test_cmp expect actual
|
2008-06-09 08:02:21 +08:00
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success "--batch-check for a non-existent hash" '
|
2023-06-06 13:19:33 +08:00
|
|
|
cat >expect <<-EOF &&
|
|
|
|
0000000000000000000000000000000000000042 missing
|
|
|
|
0000000000000000000000000000000000000084 missing
|
|
|
|
EOF
|
|
|
|
|
|
|
|
printf "0000000000000000000000000000000000000042\n0000000000000000000000000000000000000084" >in &&
|
|
|
|
git cat-file --batch-check <in >actual &&
|
|
|
|
test_cmp expect actual
|
2008-06-09 08:02:21 +08:00
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success "--batch for an existent and a non-existent hash" '
|
2023-06-06 13:19:33 +08:00
|
|
|
cat >expect <<-EOF &&
|
2023-10-02 10:40:32 +08:00
|
|
|
$tag_oid tag $tag_size
|
2023-06-06 13:19:33 +08:00
|
|
|
$tag_content
|
|
|
|
0000000000000000000000000000000000000000 missing
|
|
|
|
EOF
|
|
|
|
|
2023-10-02 10:40:32 +08:00
|
|
|
printf "$tag_oid\n0000000000000000000000000000000000000000" >in &&
|
2023-06-06 13:19:33 +08:00
|
|
|
git cat-file --batch <in >actual &&
|
|
|
|
test_cmp expect actual
|
2008-04-24 03:17:46 +08:00
|
|
|
'
|
|
|
|
|
2016-08-09 16:53:38 +08:00
|
|
|
test_expect_success "--batch-check for an empty line" '
|
2023-06-06 13:19:33 +08:00
|
|
|
cat >expect <<-EOF &&
|
|
|
|
missing
|
|
|
|
EOF
|
|
|
|
|
|
|
|
echo >in &&
|
|
|
|
git cat-file --batch-check <in >actual &&
|
|
|
|
test_cmp expect actual
|
2008-04-24 03:17:46 +08:00
|
|
|
'
|
|
|
|
|
2013-11-07 02:00:57 +08:00
|
|
|
test_expect_success 'empty --batch-check notices missing object' '
|
2018-05-13 10:24:13 +08:00
|
|
|
echo "$ZERO_OID missing" >expect &&
|
|
|
|
echo "$ZERO_OID" | git cat-file --batch-check="" >actual &&
|
2013-11-07 02:00:57 +08:00
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
2023-10-02 10:40:33 +08:00
|
|
|
batch_tests () {
|
|
|
|
boid=$1
|
|
|
|
loid=$2
|
|
|
|
lsize=$3
|
|
|
|
coid=$4
|
|
|
|
csize=$5
|
|
|
|
ccontent=$6
|
|
|
|
toid=$7
|
|
|
|
tsize=$8
|
|
|
|
tcontent=$9
|
|
|
|
|
|
|
|
batch_input="$boid
|
|
|
|
$coid
|
|
|
|
$toid
|
2008-04-24 03:17:47 +08:00
|
|
|
deadbeef
|
|
|
|
|
|
|
|
"
|
|
|
|
|
2023-10-02 10:40:33 +08:00
|
|
|
printf "%s\0" \
|
|
|
|
"$boid blob $hello_size" \
|
cat-file: add option '-Z' that delimits input and output with NUL
In db9d67f2e9 (builtin/cat-file.c: support NUL-delimited input with
`-z`, 2022-07-22), we have introduced a new mode to read the input via
NUL-delimited records instead of newline-delimited records. This allows
the user to query for revisions that have newlines in their path
component. While unusual, such queries are perfectly valid and thus it
is clear that we should be able to support them properly.
Unfortunately, the commit only changed the input to be NUL-delimited,
but didn't change the output at the same time. While this is fine for
queries that are processed successfully, it is less so for queries that
aren't. In the case of missing commits for example the result can become
entirely unparsable:
```
$ printf "7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10\n1234567890\n\n\commit000" |
git cat-file --batch -z
7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10
1234567890
commit missing
```
This is of course a crafted query that is intentionally gaming the
deficiency, but more benign queries that contain newlines would have
similar problems.
Ideally, we should have also changed the output to be NUL-delimited when
`-z` is specified to avoid this problem. As the input is NUL-delimited,
it is clear that the output in this case cannot ever contain NUL
characters by itself. Furthermore, Git does not allow NUL characters in
revisions anyway, further stressing the point that using NUL-delimited
output is safe. The only exception is of course the object data itself,
but as git-cat-file(1) prints the size of the object data clients should
read until that specified size has been consumed.
But even though `-z` has only been introduced a few releases ago in Git
v2.38.0, changing the output format retroactively to also NUL-delimit
output would be a backwards incompatible change. And while one could
make the argument that the output is inherently broken already, we need
to assume that there are existing users out there that use it just fine
given that revisions containing newlines are quite exotic.
Instead, introduce a new option `-Z` that switches to NUL-delimited
input and output. While this new option could arguably only switch the
output format to be NUL-delimited, the consequence would be that users
have to always specify both `-z` and `-Z` when the input may contain
newlines. On the other hand, if the user knows that there never will be
newlines in the input, they don't have to use either of those options.
There is thus no usecase that would warrant treating input and output
format separately, which is why we instead opt to "do the right thing"
and have `-Z` mean to NUL-terminate both formats.
The old `-z` option is marked as deprecated with a hint that its output
may become unparsable. It is thus hidden both from the synopsis as well
as the command's help output.
Co-authored-by: Toon Claes <toon@iotcl.com>
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-06 13:19:45 +08:00
|
|
|
"$hello_content" \
|
2023-10-02 10:40:33 +08:00
|
|
|
"$coid commit $csize" \
|
|
|
|
"$ccontent" \
|
|
|
|
"$toid tag $tsize" \
|
|
|
|
"$tcontent" \
|
cat-file: add option '-Z' that delimits input and output with NUL
In db9d67f2e9 (builtin/cat-file.c: support NUL-delimited input with
`-z`, 2022-07-22), we have introduced a new mode to read the input via
NUL-delimited records instead of newline-delimited records. This allows
the user to query for revisions that have newlines in their path
component. While unusual, such queries are perfectly valid and thus it
is clear that we should be able to support them properly.
Unfortunately, the commit only changed the input to be NUL-delimited,
but didn't change the output at the same time. While this is fine for
queries that are processed successfully, it is less so for queries that
aren't. In the case of missing commits for example the result can become
entirely unparsable:
```
$ printf "7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10\n1234567890\n\n\commit000" |
git cat-file --batch -z
7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10
1234567890
commit missing
```
This is of course a crafted query that is intentionally gaming the
deficiency, but more benign queries that contain newlines would have
similar problems.
Ideally, we should have also changed the output to be NUL-delimited when
`-z` is specified to avoid this problem. As the input is NUL-delimited,
it is clear that the output in this case cannot ever contain NUL
characters by itself. Furthermore, Git does not allow NUL characters in
revisions anyway, further stressing the point that using NUL-delimited
output is safe. The only exception is of course the object data itself,
but as git-cat-file(1) prints the size of the object data clients should
read until that specified size has been consumed.
But even though `-z` has only been introduced a few releases ago in Git
v2.38.0, changing the output format retroactively to also NUL-delimit
output would be a backwards incompatible change. And while one could
make the argument that the output is inherently broken already, we need
to assume that there are existing users out there that use it just fine
given that revisions containing newlines are quite exotic.
Instead, introduce a new option `-Z` that switches to NUL-delimited
input and output. While this new option could arguably only switch the
output format to be NUL-delimited, the consequence would be that users
have to always specify both `-z` and `-Z` when the input may contain
newlines. On the other hand, if the user knows that there never will be
newlines in the input, they don't have to use either of those options.
There is thus no usecase that would warrant treating input and output
format separately, which is why we instead opt to "do the right thing"
and have `-Z` mean to NUL-terminate both formats.
The old `-z` option is marked as deprecated with a hint that its output
may become unparsable. It is thus hidden both from the synopsis as well
as the command's help output.
Co-authored-by: Toon Claes <toon@iotcl.com>
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-06 13:19:45 +08:00
|
|
|
"deadbeef missing" \
|
|
|
|
" missing" >batch_output
|
2008-04-24 03:17:47 +08:00
|
|
|
|
2023-10-02 10:40:33 +08:00
|
|
|
test_expect_success '--batch with multiple oids gives correct format' '
|
cat-file: add option '-Z' that delimits input and output with NUL
In db9d67f2e9 (builtin/cat-file.c: support NUL-delimited input with
`-z`, 2022-07-22), we have introduced a new mode to read the input via
NUL-delimited records instead of newline-delimited records. This allows
the user to query for revisions that have newlines in their path
component. While unusual, such queries are perfectly valid and thus it
is clear that we should be able to support them properly.
Unfortunately, the commit only changed the input to be NUL-delimited,
but didn't change the output at the same time. While this is fine for
queries that are processed successfully, it is less so for queries that
aren't. In the case of missing commits for example the result can become
entirely unparsable:
```
$ printf "7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10\n1234567890\n\n\commit000" |
git cat-file --batch -z
7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10
1234567890
commit missing
```
This is of course a crafted query that is intentionally gaming the
deficiency, but more benign queries that contain newlines would have
similar problems.
Ideally, we should have also changed the output to be NUL-delimited when
`-z` is specified to avoid this problem. As the input is NUL-delimited,
it is clear that the output in this case cannot ever contain NUL
characters by itself. Furthermore, Git does not allow NUL characters in
revisions anyway, further stressing the point that using NUL-delimited
output is safe. The only exception is of course the object data itself,
but as git-cat-file(1) prints the size of the object data clients should
read until that specified size has been consumed.
But even though `-z` has only been introduced a few releases ago in Git
v2.38.0, changing the output format retroactively to also NUL-delimit
output would be a backwards incompatible change. And while one could
make the argument that the output is inherently broken already, we need
to assume that there are existing users out there that use it just fine
given that revisions containing newlines are quite exotic.
Instead, introduce a new option `-Z` that switches to NUL-delimited
input and output. While this new option could arguably only switch the
output format to be NUL-delimited, the consequence would be that users
have to always specify both `-z` and `-Z` when the input may contain
newlines. On the other hand, if the user knows that there never will be
newlines in the input, they don't have to use either of those options.
There is thus no usecase that would warrant treating input and output
format separately, which is why we instead opt to "do the right thing"
and have `-Z` mean to NUL-terminate both formats.
The old `-z` option is marked as deprecated with a hint that its output
may become unparsable. It is thus hidden both from the synopsis as well
as the command's help output.
Co-authored-by: Toon Claes <toon@iotcl.com>
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-06 13:19:45 +08:00
|
|
|
tr "\0" "\n" <batch_output >expect &&
|
2023-06-06 13:19:33 +08:00
|
|
|
echo_without_newline "$batch_input" >in &&
|
|
|
|
git cat-file --batch <in >actual &&
|
2023-06-06 13:19:29 +08:00
|
|
|
test_cmp expect actual
|
2023-10-02 10:40:33 +08:00
|
|
|
'
|
2008-04-24 03:17:47 +08:00
|
|
|
|
2023-10-02 10:40:33 +08:00
|
|
|
test_expect_success '--batch, -z with multiple oids gives correct format' '
|
builtin/cat-file.c: support NUL-delimited input with `-z`
When callers are using `cat-file` via one of the stdin-driven `--batch`
modes, all input is newline-delimited. This presents a problem when
callers wish to ask about, e.g. tree-entries that have a newline
character present in their filename.
To support this niche scenario, introduce a new `-z` mode to the
`--batch`, `--batch-check`, and `--batch-command` suite of options that
instructs `cat-file` to treat its input as NUL-delimited, allowing the
individual commands themselves to have newlines present.
The refactoring here is slightly unfortunate, since we turn loops like:
while (strbuf_getline(&buf, stdin) != EOF)
into:
while (1) {
int ret;
if (opt->nul_terminated)
ret = strbuf_getline_nul(&input, stdin);
else
ret = strbuf_getline(&input, stdin);
if (ret == EOF)
break;
}
It's tempting to think that we could use `strbuf_getwholeline()` and
specify either `\n` or `\0` as the terminating character. But for input
on platforms that include a CR character preceeding the LF, this
wouldn't quite be the same, since `strbuf_getline(...)` will trim any
trailing CR, while `strbuf_getwholeline(&buf, stdin, '\n')` will not.
In the future, we could clean this up further by introducing a variant
of `strbuf_getwholeline()` that addresses the aforementioned gap, but
that approach felt too heavy-handed for this pair of uses.
Some tests are added in t1006 to ensure that `cat-file` produces the
same output in `--batch`, `--batch-check`, and `--batch-command` modes
with and without the new `-z` option.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-07-23 07:29:05 +08:00
|
|
|
echo_without_newline_nul "$batch_input" >in &&
|
cat-file: add option '-Z' that delimits input and output with NUL
In db9d67f2e9 (builtin/cat-file.c: support NUL-delimited input with
`-z`, 2022-07-22), we have introduced a new mode to read the input via
NUL-delimited records instead of newline-delimited records. This allows
the user to query for revisions that have newlines in their path
component. While unusual, such queries are perfectly valid and thus it
is clear that we should be able to support them properly.
Unfortunately, the commit only changed the input to be NUL-delimited,
but didn't change the output at the same time. While this is fine for
queries that are processed successfully, it is less so for queries that
aren't. In the case of missing commits for example the result can become
entirely unparsable:
```
$ printf "7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10\n1234567890\n\n\commit000" |
git cat-file --batch -z
7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10
1234567890
commit missing
```
This is of course a crafted query that is intentionally gaming the
deficiency, but more benign queries that contain newlines would have
similar problems.
Ideally, we should have also changed the output to be NUL-delimited when
`-z` is specified to avoid this problem. As the input is NUL-delimited,
it is clear that the output in this case cannot ever contain NUL
characters by itself. Furthermore, Git does not allow NUL characters in
revisions anyway, further stressing the point that using NUL-delimited
output is safe. The only exception is of course the object data itself,
but as git-cat-file(1) prints the size of the object data clients should
read until that specified size has been consumed.
But even though `-z` has only been introduced a few releases ago in Git
v2.38.0, changing the output format retroactively to also NUL-delimit
output would be a backwards incompatible change. And while one could
make the argument that the output is inherently broken already, we need
to assume that there are existing users out there that use it just fine
given that revisions containing newlines are quite exotic.
Instead, introduce a new option `-Z` that switches to NUL-delimited
input and output. While this new option could arguably only switch the
output format to be NUL-delimited, the consequence would be that users
have to always specify both `-z` and `-Z` when the input may contain
newlines. On the other hand, if the user knows that there never will be
newlines in the input, they don't have to use either of those options.
There is thus no usecase that would warrant treating input and output
format separately, which is why we instead opt to "do the right thing"
and have `-Z` mean to NUL-terminate both formats.
The old `-z` option is marked as deprecated with a hint that its output
may become unparsable. It is thus hidden both from the synopsis as well
as the command's help output.
Co-authored-by: Toon Claes <toon@iotcl.com>
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-06 13:19:45 +08:00
|
|
|
tr "\0" "\n" <batch_output >expect &&
|
2023-06-06 13:19:29 +08:00
|
|
|
git cat-file --batch -z <in >actual &&
|
|
|
|
test_cmp expect actual
|
2023-10-02 10:40:33 +08:00
|
|
|
'
|
builtin/cat-file.c: support NUL-delimited input with `-z`
When callers are using `cat-file` via one of the stdin-driven `--batch`
modes, all input is newline-delimited. This presents a problem when
callers wish to ask about, e.g. tree-entries that have a newline
character present in their filename.
To support this niche scenario, introduce a new `-z` mode to the
`--batch`, `--batch-check`, and `--batch-command` suite of options that
instructs `cat-file` to treat its input as NUL-delimited, allowing the
individual commands themselves to have newlines present.
The refactoring here is slightly unfortunate, since we turn loops like:
while (strbuf_getline(&buf, stdin) != EOF)
into:
while (1) {
int ret;
if (opt->nul_terminated)
ret = strbuf_getline_nul(&input, stdin);
else
ret = strbuf_getline(&input, stdin);
if (ret == EOF)
break;
}
It's tempting to think that we could use `strbuf_getwholeline()` and
specify either `\n` or `\0` as the terminating character. But for input
on platforms that include a CR character preceeding the LF, this
wouldn't quite be the same, since `strbuf_getline(...)` will trim any
trailing CR, while `strbuf_getwholeline(&buf, stdin, '\n')` will not.
In the future, we could clean this up further by introducing a variant
of `strbuf_getwholeline()` that addresses the aforementioned gap, but
that approach felt too heavy-handed for this pair of uses.
Some tests are added in t1006 to ensure that `cat-file` produces the
same output in `--batch`, `--batch-check`, and `--batch-command` modes
with and without the new `-z` option.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-07-23 07:29:05 +08:00
|
|
|
|
2023-10-02 10:40:33 +08:00
|
|
|
test_expect_success '--batch, -Z with multiple oids gives correct format' '
|
cat-file: add option '-Z' that delimits input and output with NUL
In db9d67f2e9 (builtin/cat-file.c: support NUL-delimited input with
`-z`, 2022-07-22), we have introduced a new mode to read the input via
NUL-delimited records instead of newline-delimited records. This allows
the user to query for revisions that have newlines in their path
component. While unusual, such queries are perfectly valid and thus it
is clear that we should be able to support them properly.
Unfortunately, the commit only changed the input to be NUL-delimited,
but didn't change the output at the same time. While this is fine for
queries that are processed successfully, it is less so for queries that
aren't. In the case of missing commits for example the result can become
entirely unparsable:
```
$ printf "7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10\n1234567890\n\n\commit000" |
git cat-file --batch -z
7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10
1234567890
commit missing
```
This is of course a crafted query that is intentionally gaming the
deficiency, but more benign queries that contain newlines would have
similar problems.
Ideally, we should have also changed the output to be NUL-delimited when
`-z` is specified to avoid this problem. As the input is NUL-delimited,
it is clear that the output in this case cannot ever contain NUL
characters by itself. Furthermore, Git does not allow NUL characters in
revisions anyway, further stressing the point that using NUL-delimited
output is safe. The only exception is of course the object data itself,
but as git-cat-file(1) prints the size of the object data clients should
read until that specified size has been consumed.
But even though `-z` has only been introduced a few releases ago in Git
v2.38.0, changing the output format retroactively to also NUL-delimit
output would be a backwards incompatible change. And while one could
make the argument that the output is inherently broken already, we need
to assume that there are existing users out there that use it just fine
given that revisions containing newlines are quite exotic.
Instead, introduce a new option `-Z` that switches to NUL-delimited
input and output. While this new option could arguably only switch the
output format to be NUL-delimited, the consequence would be that users
have to always specify both `-z` and `-Z` when the input may contain
newlines. On the other hand, if the user knows that there never will be
newlines in the input, they don't have to use either of those options.
There is thus no usecase that would warrant treating input and output
format separately, which is why we instead opt to "do the right thing"
and have `-Z` mean to NUL-terminate both formats.
The old `-z` option is marked as deprecated with a hint that its output
may become unparsable. It is thus hidden both from the synopsis as well
as the command's help output.
Co-authored-by: Toon Claes <toon@iotcl.com>
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-06 13:19:45 +08:00
|
|
|
echo_without_newline_nul "$batch_input" >in &&
|
|
|
|
git cat-file --batch -Z <in >actual &&
|
|
|
|
test_cmp batch_output actual
|
2023-10-02 10:40:33 +08:00
|
|
|
'
|
cat-file: add option '-Z' that delimits input and output with NUL
In db9d67f2e9 (builtin/cat-file.c: support NUL-delimited input with
`-z`, 2022-07-22), we have introduced a new mode to read the input via
NUL-delimited records instead of newline-delimited records. This allows
the user to query for revisions that have newlines in their path
component. While unusual, such queries are perfectly valid and thus it
is clear that we should be able to support them properly.
Unfortunately, the commit only changed the input to be NUL-delimited,
but didn't change the output at the same time. While this is fine for
queries that are processed successfully, it is less so for queries that
aren't. In the case of missing commits for example the result can become
entirely unparsable:
```
$ printf "7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10\n1234567890\n\n\commit000" |
git cat-file --batch -z
7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10
1234567890
commit missing
```
This is of course a crafted query that is intentionally gaming the
deficiency, but more benign queries that contain newlines would have
similar problems.
Ideally, we should have also changed the output to be NUL-delimited when
`-z` is specified to avoid this problem. As the input is NUL-delimited,
it is clear that the output in this case cannot ever contain NUL
characters by itself. Furthermore, Git does not allow NUL characters in
revisions anyway, further stressing the point that using NUL-delimited
output is safe. The only exception is of course the object data itself,
but as git-cat-file(1) prints the size of the object data clients should
read until that specified size has been consumed.
But even though `-z` has only been introduced a few releases ago in Git
v2.38.0, changing the output format retroactively to also NUL-delimit
output would be a backwards incompatible change. And while one could
make the argument that the output is inherently broken already, we need
to assume that there are existing users out there that use it just fine
given that revisions containing newlines are quite exotic.
Instead, introduce a new option `-Z` that switches to NUL-delimited
input and output. While this new option could arguably only switch the
output format to be NUL-delimited, the consequence would be that users
have to always specify both `-z` and `-Z` when the input may contain
newlines. On the other hand, if the user knows that there never will be
newlines in the input, they don't have to use either of those options.
There is thus no usecase that would warrant treating input and output
format separately, which is why we instead opt to "do the right thing"
and have `-Z` mean to NUL-terminate both formats.
The old `-z` option is marked as deprecated with a hint that its output
may become unparsable. It is thus hidden both from the synopsis as well
as the command's help output.
Co-authored-by: Toon Claes <toon@iotcl.com>
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-06 13:19:45 +08:00
|
|
|
|
2023-10-02 10:40:33 +08:00
|
|
|
batch_check_input="$boid
|
|
|
|
$loid
|
|
|
|
$coid
|
|
|
|
$toid
|
2008-04-24 03:17:46 +08:00
|
|
|
deadbeef
|
|
|
|
|
|
|
|
"
|
|
|
|
|
2023-10-02 10:40:33 +08:00
|
|
|
printf "%s\0" \
|
|
|
|
"$boid blob $hello_size" \
|
|
|
|
"$loid tree $lsize" \
|
|
|
|
"$coid commit $csize" \
|
|
|
|
"$toid tag $tsize" \
|
cat-file: add option '-Z' that delimits input and output with NUL
In db9d67f2e9 (builtin/cat-file.c: support NUL-delimited input with
`-z`, 2022-07-22), we have introduced a new mode to read the input via
NUL-delimited records instead of newline-delimited records. This allows
the user to query for revisions that have newlines in their path
component. While unusual, such queries are perfectly valid and thus it
is clear that we should be able to support them properly.
Unfortunately, the commit only changed the input to be NUL-delimited,
but didn't change the output at the same time. While this is fine for
queries that are processed successfully, it is less so for queries that
aren't. In the case of missing commits for example the result can become
entirely unparsable:
```
$ printf "7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10\n1234567890\n\n\commit000" |
git cat-file --batch -z
7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10
1234567890
commit missing
```
This is of course a crafted query that is intentionally gaming the
deficiency, but more benign queries that contain newlines would have
similar problems.
Ideally, we should have also changed the output to be NUL-delimited when
`-z` is specified to avoid this problem. As the input is NUL-delimited,
it is clear that the output in this case cannot ever contain NUL
characters by itself. Furthermore, Git does not allow NUL characters in
revisions anyway, further stressing the point that using NUL-delimited
output is safe. The only exception is of course the object data itself,
but as git-cat-file(1) prints the size of the object data clients should
read until that specified size has been consumed.
But even though `-z` has only been introduced a few releases ago in Git
v2.38.0, changing the output format retroactively to also NUL-delimit
output would be a backwards incompatible change. And while one could
make the argument that the output is inherently broken already, we need
to assume that there are existing users out there that use it just fine
given that revisions containing newlines are quite exotic.
Instead, introduce a new option `-Z` that switches to NUL-delimited
input and output. While this new option could arguably only switch the
output format to be NUL-delimited, the consequence would be that users
have to always specify both `-z` and `-Z` when the input may contain
newlines. On the other hand, if the user knows that there never will be
newlines in the input, they don't have to use either of those options.
There is thus no usecase that would warrant treating input and output
format separately, which is why we instead opt to "do the right thing"
and have `-Z` mean to NUL-terminate both formats.
The old `-z` option is marked as deprecated with a hint that its output
may become unparsable. It is thus hidden both from the synopsis as well
as the command's help output.
Co-authored-by: Toon Claes <toon@iotcl.com>
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-06 13:19:45 +08:00
|
|
|
"deadbeef missing" \
|
|
|
|
" missing" >batch_check_output
|
2008-04-24 03:17:46 +08:00
|
|
|
|
2023-10-02 10:40:33 +08:00
|
|
|
test_expect_success "--batch-check with multiple oids gives correct format" '
|
cat-file: add option '-Z' that delimits input and output with NUL
In db9d67f2e9 (builtin/cat-file.c: support NUL-delimited input with
`-z`, 2022-07-22), we have introduced a new mode to read the input via
NUL-delimited records instead of newline-delimited records. This allows
the user to query for revisions that have newlines in their path
component. While unusual, such queries are perfectly valid and thus it
is clear that we should be able to support them properly.
Unfortunately, the commit only changed the input to be NUL-delimited,
but didn't change the output at the same time. While this is fine for
queries that are processed successfully, it is less so for queries that
aren't. In the case of missing commits for example the result can become
entirely unparsable:
```
$ printf "7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10\n1234567890\n\n\commit000" |
git cat-file --batch -z
7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10
1234567890
commit missing
```
This is of course a crafted query that is intentionally gaming the
deficiency, but more benign queries that contain newlines would have
similar problems.
Ideally, we should have also changed the output to be NUL-delimited when
`-z` is specified to avoid this problem. As the input is NUL-delimited,
it is clear that the output in this case cannot ever contain NUL
characters by itself. Furthermore, Git does not allow NUL characters in
revisions anyway, further stressing the point that using NUL-delimited
output is safe. The only exception is of course the object data itself,
but as git-cat-file(1) prints the size of the object data clients should
read until that specified size has been consumed.
But even though `-z` has only been introduced a few releases ago in Git
v2.38.0, changing the output format retroactively to also NUL-delimit
output would be a backwards incompatible change. And while one could
make the argument that the output is inherently broken already, we need
to assume that there are existing users out there that use it just fine
given that revisions containing newlines are quite exotic.
Instead, introduce a new option `-Z` that switches to NUL-delimited
input and output. While this new option could arguably only switch the
output format to be NUL-delimited, the consequence would be that users
have to always specify both `-z` and `-Z` when the input may contain
newlines. On the other hand, if the user knows that there never will be
newlines in the input, they don't have to use either of those options.
There is thus no usecase that would warrant treating input and output
format separately, which is why we instead opt to "do the right thing"
and have `-Z` mean to NUL-terminate both formats.
The old `-z` option is marked as deprecated with a hint that its output
may become unparsable. It is thus hidden both from the synopsis as well
as the command's help output.
Co-authored-by: Toon Claes <toon@iotcl.com>
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-06 13:19:45 +08:00
|
|
|
tr "\0" "\n" <batch_check_output >expect &&
|
2023-06-06 13:19:33 +08:00
|
|
|
echo_without_newline "$batch_check_input" >in &&
|
|
|
|
git cat-file --batch-check <in >actual &&
|
|
|
|
test_cmp expect actual
|
2023-10-02 10:40:33 +08:00
|
|
|
'
|
2008-04-24 03:17:46 +08:00
|
|
|
|
2023-10-02 10:40:33 +08:00
|
|
|
test_expect_success "--batch-check, -z with multiple oids gives correct format" '
|
cat-file: add option '-Z' that delimits input and output with NUL
In db9d67f2e9 (builtin/cat-file.c: support NUL-delimited input with
`-z`, 2022-07-22), we have introduced a new mode to read the input via
NUL-delimited records instead of newline-delimited records. This allows
the user to query for revisions that have newlines in their path
component. While unusual, such queries are perfectly valid and thus it
is clear that we should be able to support them properly.
Unfortunately, the commit only changed the input to be NUL-delimited,
but didn't change the output at the same time. While this is fine for
queries that are processed successfully, it is less so for queries that
aren't. In the case of missing commits for example the result can become
entirely unparsable:
```
$ printf "7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10\n1234567890\n\n\commit000" |
git cat-file --batch -z
7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10
1234567890
commit missing
```
This is of course a crafted query that is intentionally gaming the
deficiency, but more benign queries that contain newlines would have
similar problems.
Ideally, we should have also changed the output to be NUL-delimited when
`-z` is specified to avoid this problem. As the input is NUL-delimited,
it is clear that the output in this case cannot ever contain NUL
characters by itself. Furthermore, Git does not allow NUL characters in
revisions anyway, further stressing the point that using NUL-delimited
output is safe. The only exception is of course the object data itself,
but as git-cat-file(1) prints the size of the object data clients should
read until that specified size has been consumed.
But even though `-z` has only been introduced a few releases ago in Git
v2.38.0, changing the output format retroactively to also NUL-delimit
output would be a backwards incompatible change. And while one could
make the argument that the output is inherently broken already, we need
to assume that there are existing users out there that use it just fine
given that revisions containing newlines are quite exotic.
Instead, introduce a new option `-Z` that switches to NUL-delimited
input and output. While this new option could arguably only switch the
output format to be NUL-delimited, the consequence would be that users
have to always specify both `-z` and `-Z` when the input may contain
newlines. On the other hand, if the user knows that there never will be
newlines in the input, they don't have to use either of those options.
There is thus no usecase that would warrant treating input and output
format separately, which is why we instead opt to "do the right thing"
and have `-Z` mean to NUL-terminate both formats.
The old `-z` option is marked as deprecated with a hint that its output
may become unparsable. It is thus hidden both from the synopsis as well
as the command's help output.
Co-authored-by: Toon Claes <toon@iotcl.com>
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-06 13:19:45 +08:00
|
|
|
tr "\0" "\n" <batch_check_output >expect &&
|
2023-06-06 13:19:33 +08:00
|
|
|
echo_without_newline_nul "$batch_check_input" >in &&
|
|
|
|
git cat-file --batch-check -z <in >actual &&
|
|
|
|
test_cmp expect actual
|
2023-10-02 10:40:33 +08:00
|
|
|
'
|
builtin/cat-file.c: support NUL-delimited input with `-z`
When callers are using `cat-file` via one of the stdin-driven `--batch`
modes, all input is newline-delimited. This presents a problem when
callers wish to ask about, e.g. tree-entries that have a newline
character present in their filename.
To support this niche scenario, introduce a new `-z` mode to the
`--batch`, `--batch-check`, and `--batch-command` suite of options that
instructs `cat-file` to treat its input as NUL-delimited, allowing the
individual commands themselves to have newlines present.
The refactoring here is slightly unfortunate, since we turn loops like:
while (strbuf_getline(&buf, stdin) != EOF)
into:
while (1) {
int ret;
if (opt->nul_terminated)
ret = strbuf_getline_nul(&input, stdin);
else
ret = strbuf_getline(&input, stdin);
if (ret == EOF)
break;
}
It's tempting to think that we could use `strbuf_getwholeline()` and
specify either `\n` or `\0` as the terminating character. But for input
on platforms that include a CR character preceeding the LF, this
wouldn't quite be the same, since `strbuf_getline(...)` will trim any
trailing CR, while `strbuf_getwholeline(&buf, stdin, '\n')` will not.
In the future, we could clean this up further by introducing a variant
of `strbuf_getwholeline()` that addresses the aforementioned gap, but
that approach felt too heavy-handed for this pair of uses.
Some tests are added in t1006 to ensure that `cat-file` produces the
same output in `--batch`, `--batch-check`, and `--batch-command` modes
with and without the new `-z` option.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-07-23 07:29:05 +08:00
|
|
|
|
2023-10-02 10:40:33 +08:00
|
|
|
test_expect_success "--batch-check, -Z with multiple oids gives correct format" '
|
cat-file: add option '-Z' that delimits input and output with NUL
In db9d67f2e9 (builtin/cat-file.c: support NUL-delimited input with
`-z`, 2022-07-22), we have introduced a new mode to read the input via
NUL-delimited records instead of newline-delimited records. This allows
the user to query for revisions that have newlines in their path
component. While unusual, such queries are perfectly valid and thus it
is clear that we should be able to support them properly.
Unfortunately, the commit only changed the input to be NUL-delimited,
but didn't change the output at the same time. While this is fine for
queries that are processed successfully, it is less so for queries that
aren't. In the case of missing commits for example the result can become
entirely unparsable:
```
$ printf "7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10\n1234567890\n\n\commit000" |
git cat-file --batch -z
7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10
1234567890
commit missing
```
This is of course a crafted query that is intentionally gaming the
deficiency, but more benign queries that contain newlines would have
similar problems.
Ideally, we should have also changed the output to be NUL-delimited when
`-z` is specified to avoid this problem. As the input is NUL-delimited,
it is clear that the output in this case cannot ever contain NUL
characters by itself. Furthermore, Git does not allow NUL characters in
revisions anyway, further stressing the point that using NUL-delimited
output is safe. The only exception is of course the object data itself,
but as git-cat-file(1) prints the size of the object data clients should
read until that specified size has been consumed.
But even though `-z` has only been introduced a few releases ago in Git
v2.38.0, changing the output format retroactively to also NUL-delimit
output would be a backwards incompatible change. And while one could
make the argument that the output is inherently broken already, we need
to assume that there are existing users out there that use it just fine
given that revisions containing newlines are quite exotic.
Instead, introduce a new option `-Z` that switches to NUL-delimited
input and output. While this new option could arguably only switch the
output format to be NUL-delimited, the consequence would be that users
have to always specify both `-z` and `-Z` when the input may contain
newlines. On the other hand, if the user knows that there never will be
newlines in the input, they don't have to use either of those options.
There is thus no usecase that would warrant treating input and output
format separately, which is why we instead opt to "do the right thing"
and have `-Z` mean to NUL-terminate both formats.
The old `-z` option is marked as deprecated with a hint that its output
may become unparsable. It is thus hidden both from the synopsis as well
as the command's help output.
Co-authored-by: Toon Claes <toon@iotcl.com>
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-06 13:19:45 +08:00
|
|
|
echo_without_newline_nul "$batch_check_input" >in &&
|
|
|
|
git cat-file --batch-check -Z <in >actual &&
|
|
|
|
test_cmp batch_check_output actual
|
2023-10-02 10:40:33 +08:00
|
|
|
'
|
cat-file: add option '-Z' that delimits input and output with NUL
In db9d67f2e9 (builtin/cat-file.c: support NUL-delimited input with
`-z`, 2022-07-22), we have introduced a new mode to read the input via
NUL-delimited records instead of newline-delimited records. This allows
the user to query for revisions that have newlines in their path
component. While unusual, such queries are perfectly valid and thus it
is clear that we should be able to support them properly.
Unfortunately, the commit only changed the input to be NUL-delimited,
but didn't change the output at the same time. While this is fine for
queries that are processed successfully, it is less so for queries that
aren't. In the case of missing commits for example the result can become
entirely unparsable:
```
$ printf "7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10\n1234567890\n\n\commit000" |
git cat-file --batch -z
7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10
1234567890
commit missing
```
This is of course a crafted query that is intentionally gaming the
deficiency, but more benign queries that contain newlines would have
similar problems.
Ideally, we should have also changed the output to be NUL-delimited when
`-z` is specified to avoid this problem. As the input is NUL-delimited,
it is clear that the output in this case cannot ever contain NUL
characters by itself. Furthermore, Git does not allow NUL characters in
revisions anyway, further stressing the point that using NUL-delimited
output is safe. The only exception is of course the object data itself,
but as git-cat-file(1) prints the size of the object data clients should
read until that specified size has been consumed.
But even though `-z` has only been introduced a few releases ago in Git
v2.38.0, changing the output format retroactively to also NUL-delimit
output would be a backwards incompatible change. And while one could
make the argument that the output is inherently broken already, we need
to assume that there are existing users out there that use it just fine
given that revisions containing newlines are quite exotic.
Instead, introduce a new option `-Z` that switches to NUL-delimited
input and output. While this new option could arguably only switch the
output format to be NUL-delimited, the consequence would be that users
have to always specify both `-z` and `-Z` when the input may contain
newlines. On the other hand, if the user knows that there never will be
newlines in the input, they don't have to use either of those options.
There is thus no usecase that would warrant treating input and output
format separately, which is why we instead opt to "do the right thing"
and have `-Z` mean to NUL-terminate both formats.
The old `-z` option is marked as deprecated with a hint that its output
may become unparsable. It is thus hidden both from the synopsis as well
as the command's help output.
Co-authored-by: Toon Claes <toon@iotcl.com>
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-06 13:19:45 +08:00
|
|
|
|
2023-10-02 10:40:33 +08:00
|
|
|
batch_command_multiple_info="info $boid
|
|
|
|
info $loid
|
|
|
|
info $coid
|
|
|
|
info $toid
|
2022-07-23 07:29:02 +08:00
|
|
|
info deadbeef"
|
|
|
|
|
2023-10-02 10:40:33 +08:00
|
|
|
test_expect_success '--batch-command with multiple info calls gives correct format' '
|
cat-file: add --batch-command mode
Add a new flag --batch-command that accepts commands and arguments
from stdin, similar to git-update-ref --stdin.
At GitLab, we use a pair of long running cat-file processes when
accessing object content. One for iterating over object metadata with
--batch-check, and the other to grab object contents with --batch.
However, if we had --batch-command, we wouldn't need to keep both
processes around, and instead just have one --batch-command process
where we can flip between getting object info, and getting object
contents. Since we have a pair of cat-file processes per repository,
this means we can get rid of roughly half of long lived git cat-file
processes. Given there are many repositories being accessed at any given
time, this can lead to huge savings.
git cat-file --batch-command
will enter an interactive command mode whereby the user can enter in
commands and their arguments that get queued in memory:
<command1> [arg1] [arg2] LF
<command2> [arg1] [arg2] LF
When --buffer mode is used, commands will be queued in memory until a
flush command is issued that execute them:
flush LF
The reason for a flush command is that when a consumer process (A)
talks to a git cat-file process (B) and interactively writes to and
reads from it in --buffer mode, (A) needs to be able to control when
the buffer is flushed to stdout.
Currently, from (A)'s perspective, the only way is to either
1. kill (B)'s process
2. send an invalid object to stdin.
1. is not ideal from a performance perspective as it will require
spawning a new cat-file process each time, and 2. is hacky and not a
good long term solution.
With this mechanism of queueing up commands and letting (A) issue a
flush command, process (A) can control when the buffer is flushed and
can guarantee it will receive all of the output when in --buffer mode.
--batch-command also will not allow (B) to flush to stdout until a flush
is received.
This patch adds the basic structure for adding command which can be
extended in the future to add more commands. It also adds the following
two commands (on top of the flush command):
contents <object> LF
info <object> LF
The contents command takes an <object> argument and prints out the object
contents.
The info command takes an <object> argument and prints out the object
metadata.
These can be used in the following way with --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
flush LF
info <object> LF
flush LF
When used without --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
info <object> LF
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: John Cai <johncai86@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-02-19 02:23:14 +08:00
|
|
|
cat >expect <<-EOF &&
|
2023-10-02 10:40:33 +08:00
|
|
|
$boid blob $hello_size
|
|
|
|
$loid tree $lsize
|
|
|
|
$coid commit $csize
|
|
|
|
$toid tag $tsize
|
cat-file: add --batch-command mode
Add a new flag --batch-command that accepts commands and arguments
from stdin, similar to git-update-ref --stdin.
At GitLab, we use a pair of long running cat-file processes when
accessing object content. One for iterating over object metadata with
--batch-check, and the other to grab object contents with --batch.
However, if we had --batch-command, we wouldn't need to keep both
processes around, and instead just have one --batch-command process
where we can flip between getting object info, and getting object
contents. Since we have a pair of cat-file processes per repository,
this means we can get rid of roughly half of long lived git cat-file
processes. Given there are many repositories being accessed at any given
time, this can lead to huge savings.
git cat-file --batch-command
will enter an interactive command mode whereby the user can enter in
commands and their arguments that get queued in memory:
<command1> [arg1] [arg2] LF
<command2> [arg1] [arg2] LF
When --buffer mode is used, commands will be queued in memory until a
flush command is issued that execute them:
flush LF
The reason for a flush command is that when a consumer process (A)
talks to a git cat-file process (B) and interactively writes to and
reads from it in --buffer mode, (A) needs to be able to control when
the buffer is flushed to stdout.
Currently, from (A)'s perspective, the only way is to either
1. kill (B)'s process
2. send an invalid object to stdin.
1. is not ideal from a performance perspective as it will require
spawning a new cat-file process each time, and 2. is hacky and not a
good long term solution.
With this mechanism of queueing up commands and letting (A) issue a
flush command, process (A) can control when the buffer is flushed and
can guarantee it will receive all of the output when in --buffer mode.
--batch-command also will not allow (B) to flush to stdout until a flush
is received.
This patch adds the basic structure for adding command which can be
extended in the future to add more commands. It also adds the following
two commands (on top of the flush command):
contents <object> LF
info <object> LF
The contents command takes an <object> argument and prints out the object
contents.
The info command takes an <object> argument and prints out the object
metadata.
These can be used in the following way with --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
flush LF
info <object> LF
flush LF
When used without --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
info <object> LF
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: John Cai <johncai86@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-02-19 02:23:14 +08:00
|
|
|
deadbeef missing
|
|
|
|
EOF
|
|
|
|
|
2022-07-23 07:29:02 +08:00
|
|
|
echo "$batch_command_multiple_info" >in &&
|
|
|
|
git cat-file --batch-command --buffer <in >actual &&
|
cat-file: add --batch-command mode
Add a new flag --batch-command that accepts commands and arguments
from stdin, similar to git-update-ref --stdin.
At GitLab, we use a pair of long running cat-file processes when
accessing object content. One for iterating over object metadata with
--batch-check, and the other to grab object contents with --batch.
However, if we had --batch-command, we wouldn't need to keep both
processes around, and instead just have one --batch-command process
where we can flip between getting object info, and getting object
contents. Since we have a pair of cat-file processes per repository,
this means we can get rid of roughly half of long lived git cat-file
processes. Given there are many repositories being accessed at any given
time, this can lead to huge savings.
git cat-file --batch-command
will enter an interactive command mode whereby the user can enter in
commands and their arguments that get queued in memory:
<command1> [arg1] [arg2] LF
<command2> [arg1] [arg2] LF
When --buffer mode is used, commands will be queued in memory until a
flush command is issued that execute them:
flush LF
The reason for a flush command is that when a consumer process (A)
talks to a git cat-file process (B) and interactively writes to and
reads from it in --buffer mode, (A) needs to be able to control when
the buffer is flushed to stdout.
Currently, from (A)'s perspective, the only way is to either
1. kill (B)'s process
2. send an invalid object to stdin.
1. is not ideal from a performance perspective as it will require
spawning a new cat-file process each time, and 2. is hacky and not a
good long term solution.
With this mechanism of queueing up commands and letting (A) issue a
flush command, process (A) can control when the buffer is flushed and
can guarantee it will receive all of the output when in --buffer mode.
--batch-command also will not allow (B) to flush to stdout until a flush
is received.
This patch adds the basic structure for adding command which can be
extended in the future to add more commands. It also adds the following
two commands (on top of the flush command):
contents <object> LF
info <object> LF
The contents command takes an <object> argument and prints out the object
contents.
The info command takes an <object> argument and prints out the object
metadata.
These can be used in the following way with --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
flush LF
info <object> LF
flush LF
When used without --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
info <object> LF
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: John Cai <johncai86@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-02-19 02:23:14 +08:00
|
|
|
|
builtin/cat-file.c: support NUL-delimited input with `-z`
When callers are using `cat-file` via one of the stdin-driven `--batch`
modes, all input is newline-delimited. This presents a problem when
callers wish to ask about, e.g. tree-entries that have a newline
character present in their filename.
To support this niche scenario, introduce a new `-z` mode to the
`--batch`, `--batch-check`, and `--batch-command` suite of options that
instructs `cat-file` to treat its input as NUL-delimited, allowing the
individual commands themselves to have newlines present.
The refactoring here is slightly unfortunate, since we turn loops like:
while (strbuf_getline(&buf, stdin) != EOF)
into:
while (1) {
int ret;
if (opt->nul_terminated)
ret = strbuf_getline_nul(&input, stdin);
else
ret = strbuf_getline(&input, stdin);
if (ret == EOF)
break;
}
It's tempting to think that we could use `strbuf_getwholeline()` and
specify either `\n` or `\0` as the terminating character. But for input
on platforms that include a CR character preceeding the LF, this
wouldn't quite be the same, since `strbuf_getline(...)` will trim any
trailing CR, while `strbuf_getwholeline(&buf, stdin, '\n')` will not.
In the future, we could clean this up further by introducing a variant
of `strbuf_getwholeline()` that addresses the aforementioned gap, but
that approach felt too heavy-handed for this pair of uses.
Some tests are added in t1006 to ensure that `cat-file` produces the
same output in `--batch`, `--batch-check`, and `--batch-command` modes
with and without the new `-z` option.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-07-23 07:29:05 +08:00
|
|
|
test_cmp expect actual &&
|
|
|
|
|
|
|
|
echo "$batch_command_multiple_info" | tr "\n" "\0" >in &&
|
|
|
|
git cat-file --batch-command --buffer -z <in >actual &&
|
|
|
|
|
cat-file: add option '-Z' that delimits input and output with NUL
In db9d67f2e9 (builtin/cat-file.c: support NUL-delimited input with
`-z`, 2022-07-22), we have introduced a new mode to read the input via
NUL-delimited records instead of newline-delimited records. This allows
the user to query for revisions that have newlines in their path
component. While unusual, such queries are perfectly valid and thus it
is clear that we should be able to support them properly.
Unfortunately, the commit only changed the input to be NUL-delimited,
but didn't change the output at the same time. While this is fine for
queries that are processed successfully, it is less so for queries that
aren't. In the case of missing commits for example the result can become
entirely unparsable:
```
$ printf "7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10\n1234567890\n\n\commit000" |
git cat-file --batch -z
7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10
1234567890
commit missing
```
This is of course a crafted query that is intentionally gaming the
deficiency, but more benign queries that contain newlines would have
similar problems.
Ideally, we should have also changed the output to be NUL-delimited when
`-z` is specified to avoid this problem. As the input is NUL-delimited,
it is clear that the output in this case cannot ever contain NUL
characters by itself. Furthermore, Git does not allow NUL characters in
revisions anyway, further stressing the point that using NUL-delimited
output is safe. The only exception is of course the object data itself,
but as git-cat-file(1) prints the size of the object data clients should
read until that specified size has been consumed.
But even though `-z` has only been introduced a few releases ago in Git
v2.38.0, changing the output format retroactively to also NUL-delimit
output would be a backwards incompatible change. And while one could
make the argument that the output is inherently broken already, we need
to assume that there are existing users out there that use it just fine
given that revisions containing newlines are quite exotic.
Instead, introduce a new option `-Z` that switches to NUL-delimited
input and output. While this new option could arguably only switch the
output format to be NUL-delimited, the consequence would be that users
have to always specify both `-z` and `-Z` when the input may contain
newlines. On the other hand, if the user knows that there never will be
newlines in the input, they don't have to use either of those options.
There is thus no usecase that would warrant treating input and output
format separately, which is why we instead opt to "do the right thing"
and have `-Z` mean to NUL-terminate both formats.
The old `-z` option is marked as deprecated with a hint that its output
may become unparsable. It is thus hidden both from the synopsis as well
as the command's help output.
Co-authored-by: Toon Claes <toon@iotcl.com>
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-06 13:19:45 +08:00
|
|
|
test_cmp expect actual &&
|
|
|
|
|
|
|
|
echo "$batch_command_multiple_info" | tr "\n" "\0" >in &&
|
|
|
|
tr "\n" "\0" <expect >expect_nul &&
|
|
|
|
git cat-file --batch-command --buffer -Z <in >actual &&
|
|
|
|
|
|
|
|
test_cmp expect_nul actual
|
2023-10-02 10:40:33 +08:00
|
|
|
'
|
cat-file: add --batch-command mode
Add a new flag --batch-command that accepts commands and arguments
from stdin, similar to git-update-ref --stdin.
At GitLab, we use a pair of long running cat-file processes when
accessing object content. One for iterating over object metadata with
--batch-check, and the other to grab object contents with --batch.
However, if we had --batch-command, we wouldn't need to keep both
processes around, and instead just have one --batch-command process
where we can flip between getting object info, and getting object
contents. Since we have a pair of cat-file processes per repository,
this means we can get rid of roughly half of long lived git cat-file
processes. Given there are many repositories being accessed at any given
time, this can lead to huge savings.
git cat-file --batch-command
will enter an interactive command mode whereby the user can enter in
commands and their arguments that get queued in memory:
<command1> [arg1] [arg2] LF
<command2> [arg1] [arg2] LF
When --buffer mode is used, commands will be queued in memory until a
flush command is issued that execute them:
flush LF
The reason for a flush command is that when a consumer process (A)
talks to a git cat-file process (B) and interactively writes to and
reads from it in --buffer mode, (A) needs to be able to control when
the buffer is flushed to stdout.
Currently, from (A)'s perspective, the only way is to either
1. kill (B)'s process
2. send an invalid object to stdin.
1. is not ideal from a performance perspective as it will require
spawning a new cat-file process each time, and 2. is hacky and not a
good long term solution.
With this mechanism of queueing up commands and letting (A) issue a
flush command, process (A) can control when the buffer is flushed and
can guarantee it will receive all of the output when in --buffer mode.
--batch-command also will not allow (B) to flush to stdout until a flush
is received.
This patch adds the basic structure for adding command which can be
extended in the future to add more commands. It also adds the following
two commands (on top of the flush command):
contents <object> LF
info <object> LF
The contents command takes an <object> argument and prints out the object
contents.
The info command takes an <object> argument and prints out the object
metadata.
These can be used in the following way with --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
flush LF
info <object> LF
flush LF
When used without --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
info <object> LF
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: John Cai <johncai86@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-02-19 02:23:14 +08:00
|
|
|
|
2023-10-02 10:40:33 +08:00
|
|
|
batch_command_multiple_contents="contents $boid
|
|
|
|
contents $coid
|
|
|
|
contents $toid
|
2022-07-23 07:29:02 +08:00
|
|
|
contents deadbeef
|
|
|
|
flush"
|
|
|
|
|
2023-10-02 10:40:33 +08:00
|
|
|
test_expect_success '--batch-command with multiple command calls gives correct format' '
|
cat-file: add option '-Z' that delimits input and output with NUL
In db9d67f2e9 (builtin/cat-file.c: support NUL-delimited input with
`-z`, 2022-07-22), we have introduced a new mode to read the input via
NUL-delimited records instead of newline-delimited records. This allows
the user to query for revisions that have newlines in their path
component. While unusual, such queries are perfectly valid and thus it
is clear that we should be able to support them properly.
Unfortunately, the commit only changed the input to be NUL-delimited,
but didn't change the output at the same time. While this is fine for
queries that are processed successfully, it is less so for queries that
aren't. In the case of missing commits for example the result can become
entirely unparsable:
```
$ printf "7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10\n1234567890\n\n\commit000" |
git cat-file --batch -z
7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10
1234567890
commit missing
```
This is of course a crafted query that is intentionally gaming the
deficiency, but more benign queries that contain newlines would have
similar problems.
Ideally, we should have also changed the output to be NUL-delimited when
`-z` is specified to avoid this problem. As the input is NUL-delimited,
it is clear that the output in this case cannot ever contain NUL
characters by itself. Furthermore, Git does not allow NUL characters in
revisions anyway, further stressing the point that using NUL-delimited
output is safe. The only exception is of course the object data itself,
but as git-cat-file(1) prints the size of the object data clients should
read until that specified size has been consumed.
But even though `-z` has only been introduced a few releases ago in Git
v2.38.0, changing the output format retroactively to also NUL-delimit
output would be a backwards incompatible change. And while one could
make the argument that the output is inherently broken already, we need
to assume that there are existing users out there that use it just fine
given that revisions containing newlines are quite exotic.
Instead, introduce a new option `-Z` that switches to NUL-delimited
input and output. While this new option could arguably only switch the
output format to be NUL-delimited, the consequence would be that users
have to always specify both `-z` and `-Z` when the input may contain
newlines. On the other hand, if the user knows that there never will be
newlines in the input, they don't have to use either of those options.
There is thus no usecase that would warrant treating input and output
format separately, which is why we instead opt to "do the right thing"
and have `-Z` mean to NUL-terminate both formats.
The old `-z` option is marked as deprecated with a hint that its output
may become unparsable. It is thus hidden both from the synopsis as well
as the command's help output.
Co-authored-by: Toon Claes <toon@iotcl.com>
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-06 13:19:45 +08:00
|
|
|
printf "%s\0" \
|
2023-10-02 10:40:33 +08:00
|
|
|
"$boid blob $hello_size" \
|
cat-file: add option '-Z' that delimits input and output with NUL
In db9d67f2e9 (builtin/cat-file.c: support NUL-delimited input with
`-z`, 2022-07-22), we have introduced a new mode to read the input via
NUL-delimited records instead of newline-delimited records. This allows
the user to query for revisions that have newlines in their path
component. While unusual, such queries are perfectly valid and thus it
is clear that we should be able to support them properly.
Unfortunately, the commit only changed the input to be NUL-delimited,
but didn't change the output at the same time. While this is fine for
queries that are processed successfully, it is less so for queries that
aren't. In the case of missing commits for example the result can become
entirely unparsable:
```
$ printf "7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10\n1234567890\n\n\commit000" |
git cat-file --batch -z
7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10
1234567890
commit missing
```
This is of course a crafted query that is intentionally gaming the
deficiency, but more benign queries that contain newlines would have
similar problems.
Ideally, we should have also changed the output to be NUL-delimited when
`-z` is specified to avoid this problem. As the input is NUL-delimited,
it is clear that the output in this case cannot ever contain NUL
characters by itself. Furthermore, Git does not allow NUL characters in
revisions anyway, further stressing the point that using NUL-delimited
output is safe. The only exception is of course the object data itself,
but as git-cat-file(1) prints the size of the object data clients should
read until that specified size has been consumed.
But even though `-z` has only been introduced a few releases ago in Git
v2.38.0, changing the output format retroactively to also NUL-delimit
output would be a backwards incompatible change. And while one could
make the argument that the output is inherently broken already, we need
to assume that there are existing users out there that use it just fine
given that revisions containing newlines are quite exotic.
Instead, introduce a new option `-Z` that switches to NUL-delimited
input and output. While this new option could arguably only switch the
output format to be NUL-delimited, the consequence would be that users
have to always specify both `-z` and `-Z` when the input may contain
newlines. On the other hand, if the user knows that there never will be
newlines in the input, they don't have to use either of those options.
There is thus no usecase that would warrant treating input and output
format separately, which is why we instead opt to "do the right thing"
and have `-Z` mean to NUL-terminate both formats.
The old `-z` option is marked as deprecated with a hint that its output
may become unparsable. It is thus hidden both from the synopsis as well
as the command's help output.
Co-authored-by: Toon Claes <toon@iotcl.com>
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-06 13:19:45 +08:00
|
|
|
"$hello_content" \
|
2023-10-02 10:40:33 +08:00
|
|
|
"$coid commit $csize" \
|
|
|
|
"$ccontent" \
|
|
|
|
"$toid tag $tsize" \
|
|
|
|
"$tcontent" \
|
cat-file: add option '-Z' that delimits input and output with NUL
In db9d67f2e9 (builtin/cat-file.c: support NUL-delimited input with
`-z`, 2022-07-22), we have introduced a new mode to read the input via
NUL-delimited records instead of newline-delimited records. This allows
the user to query for revisions that have newlines in their path
component. While unusual, such queries are perfectly valid and thus it
is clear that we should be able to support them properly.
Unfortunately, the commit only changed the input to be NUL-delimited,
but didn't change the output at the same time. While this is fine for
queries that are processed successfully, it is less so for queries that
aren't. In the case of missing commits for example the result can become
entirely unparsable:
```
$ printf "7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10\n1234567890\n\n\commit000" |
git cat-file --batch -z
7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10
1234567890
commit missing
```
This is of course a crafted query that is intentionally gaming the
deficiency, but more benign queries that contain newlines would have
similar problems.
Ideally, we should have also changed the output to be NUL-delimited when
`-z` is specified to avoid this problem. As the input is NUL-delimited,
it is clear that the output in this case cannot ever contain NUL
characters by itself. Furthermore, Git does not allow NUL characters in
revisions anyway, further stressing the point that using NUL-delimited
output is safe. The only exception is of course the object data itself,
but as git-cat-file(1) prints the size of the object data clients should
read until that specified size has been consumed.
But even though `-z` has only been introduced a few releases ago in Git
v2.38.0, changing the output format retroactively to also NUL-delimit
output would be a backwards incompatible change. And while one could
make the argument that the output is inherently broken already, we need
to assume that there are existing users out there that use it just fine
given that revisions containing newlines are quite exotic.
Instead, introduce a new option `-Z` that switches to NUL-delimited
input and output. While this new option could arguably only switch the
output format to be NUL-delimited, the consequence would be that users
have to always specify both `-z` and `-Z` when the input may contain
newlines. On the other hand, if the user knows that there never will be
newlines in the input, they don't have to use either of those options.
There is thus no usecase that would warrant treating input and output
format separately, which is why we instead opt to "do the right thing"
and have `-Z` mean to NUL-terminate both formats.
The old `-z` option is marked as deprecated with a hint that its output
may become unparsable. It is thus hidden both from the synopsis as well
as the command's help output.
Co-authored-by: Toon Claes <toon@iotcl.com>
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-06 13:19:45 +08:00
|
|
|
"deadbeef missing" >expect_nul &&
|
|
|
|
tr "\0" "\n" <expect_nul >expect &&
|
cat-file: add --batch-command mode
Add a new flag --batch-command that accepts commands and arguments
from stdin, similar to git-update-ref --stdin.
At GitLab, we use a pair of long running cat-file processes when
accessing object content. One for iterating over object metadata with
--batch-check, and the other to grab object contents with --batch.
However, if we had --batch-command, we wouldn't need to keep both
processes around, and instead just have one --batch-command process
where we can flip between getting object info, and getting object
contents. Since we have a pair of cat-file processes per repository,
this means we can get rid of roughly half of long lived git cat-file
processes. Given there are many repositories being accessed at any given
time, this can lead to huge savings.
git cat-file --batch-command
will enter an interactive command mode whereby the user can enter in
commands and their arguments that get queued in memory:
<command1> [arg1] [arg2] LF
<command2> [arg1] [arg2] LF
When --buffer mode is used, commands will be queued in memory until a
flush command is issued that execute them:
flush LF
The reason for a flush command is that when a consumer process (A)
talks to a git cat-file process (B) and interactively writes to and
reads from it in --buffer mode, (A) needs to be able to control when
the buffer is flushed to stdout.
Currently, from (A)'s perspective, the only way is to either
1. kill (B)'s process
2. send an invalid object to stdin.
1. is not ideal from a performance perspective as it will require
spawning a new cat-file process each time, and 2. is hacky and not a
good long term solution.
With this mechanism of queueing up commands and letting (A) issue a
flush command, process (A) can control when the buffer is flushed and
can guarantee it will receive all of the output when in --buffer mode.
--batch-command also will not allow (B) to flush to stdout until a flush
is received.
This patch adds the basic structure for adding command which can be
extended in the future to add more commands. It also adds the following
two commands (on top of the flush command):
contents <object> LF
info <object> LF
The contents command takes an <object> argument and prints out the object
contents.
The info command takes an <object> argument and prints out the object
metadata.
These can be used in the following way with --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
flush LF
info <object> LF
flush LF
When used without --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
info <object> LF
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: John Cai <johncai86@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-02-19 02:23:14 +08:00
|
|
|
|
2022-07-23 07:29:02 +08:00
|
|
|
echo "$batch_command_multiple_contents" >in &&
|
2023-06-06 13:19:29 +08:00
|
|
|
git cat-file --batch-command --buffer <in >actual &&
|
cat-file: add --batch-command mode
Add a new flag --batch-command that accepts commands and arguments
from stdin, similar to git-update-ref --stdin.
At GitLab, we use a pair of long running cat-file processes when
accessing object content. One for iterating over object metadata with
--batch-check, and the other to grab object contents with --batch.
However, if we had --batch-command, we wouldn't need to keep both
processes around, and instead just have one --batch-command process
where we can flip between getting object info, and getting object
contents. Since we have a pair of cat-file processes per repository,
this means we can get rid of roughly half of long lived git cat-file
processes. Given there are many repositories being accessed at any given
time, this can lead to huge savings.
git cat-file --batch-command
will enter an interactive command mode whereby the user can enter in
commands and their arguments that get queued in memory:
<command1> [arg1] [arg2] LF
<command2> [arg1] [arg2] LF
When --buffer mode is used, commands will be queued in memory until a
flush command is issued that execute them:
flush LF
The reason for a flush command is that when a consumer process (A)
talks to a git cat-file process (B) and interactively writes to and
reads from it in --buffer mode, (A) needs to be able to control when
the buffer is flushed to stdout.
Currently, from (A)'s perspective, the only way is to either
1. kill (B)'s process
2. send an invalid object to stdin.
1. is not ideal from a performance perspective as it will require
spawning a new cat-file process each time, and 2. is hacky and not a
good long term solution.
With this mechanism of queueing up commands and letting (A) issue a
flush command, process (A) can control when the buffer is flushed and
can guarantee it will receive all of the output when in --buffer mode.
--batch-command also will not allow (B) to flush to stdout until a flush
is received.
This patch adds the basic structure for adding command which can be
extended in the future to add more commands. It also adds the following
two commands (on top of the flush command):
contents <object> LF
info <object> LF
The contents command takes an <object> argument and prints out the object
contents.
The info command takes an <object> argument and prints out the object
metadata.
These can be used in the following way with --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
flush LF
info <object> LF
flush LF
When used without --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
info <object> LF
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: John Cai <johncai86@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-02-19 02:23:14 +08:00
|
|
|
|
builtin/cat-file.c: support NUL-delimited input with `-z`
When callers are using `cat-file` via one of the stdin-driven `--batch`
modes, all input is newline-delimited. This presents a problem when
callers wish to ask about, e.g. tree-entries that have a newline
character present in their filename.
To support this niche scenario, introduce a new `-z` mode to the
`--batch`, `--batch-check`, and `--batch-command` suite of options that
instructs `cat-file` to treat its input as NUL-delimited, allowing the
individual commands themselves to have newlines present.
The refactoring here is slightly unfortunate, since we turn loops like:
while (strbuf_getline(&buf, stdin) != EOF)
into:
while (1) {
int ret;
if (opt->nul_terminated)
ret = strbuf_getline_nul(&input, stdin);
else
ret = strbuf_getline(&input, stdin);
if (ret == EOF)
break;
}
It's tempting to think that we could use `strbuf_getwholeline()` and
specify either `\n` or `\0` as the terminating character. But for input
on platforms that include a CR character preceeding the LF, this
wouldn't quite be the same, since `strbuf_getline(...)` will trim any
trailing CR, while `strbuf_getwholeline(&buf, stdin, '\n')` will not.
In the future, we could clean this up further by introducing a variant
of `strbuf_getwholeline()` that addresses the aforementioned gap, but
that approach felt too heavy-handed for this pair of uses.
Some tests are added in t1006 to ensure that `cat-file` produces the
same output in `--batch`, `--batch-check`, and `--batch-command` modes
with and without the new `-z` option.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-07-23 07:29:05 +08:00
|
|
|
test_cmp expect actual &&
|
|
|
|
|
|
|
|
echo "$batch_command_multiple_contents" | tr "\n" "\0" >in &&
|
2023-06-06 13:19:29 +08:00
|
|
|
git cat-file --batch-command --buffer -z <in >actual &&
|
builtin/cat-file.c: support NUL-delimited input with `-z`
When callers are using `cat-file` via one of the stdin-driven `--batch`
modes, all input is newline-delimited. This presents a problem when
callers wish to ask about, e.g. tree-entries that have a newline
character present in their filename.
To support this niche scenario, introduce a new `-z` mode to the
`--batch`, `--batch-check`, and `--batch-command` suite of options that
instructs `cat-file` to treat its input as NUL-delimited, allowing the
individual commands themselves to have newlines present.
The refactoring here is slightly unfortunate, since we turn loops like:
while (strbuf_getline(&buf, stdin) != EOF)
into:
while (1) {
int ret;
if (opt->nul_terminated)
ret = strbuf_getline_nul(&input, stdin);
else
ret = strbuf_getline(&input, stdin);
if (ret == EOF)
break;
}
It's tempting to think that we could use `strbuf_getwholeline()` and
specify either `\n` or `\0` as the terminating character. But for input
on platforms that include a CR character preceeding the LF, this
wouldn't quite be the same, since `strbuf_getline(...)` will trim any
trailing CR, while `strbuf_getwholeline(&buf, stdin, '\n')` will not.
In the future, we could clean this up further by introducing a variant
of `strbuf_getwholeline()` that addresses the aforementioned gap, but
that approach felt too heavy-handed for this pair of uses.
Some tests are added in t1006 to ensure that `cat-file` produces the
same output in `--batch`, `--batch-check`, and `--batch-command` modes
with and without the new `-z` option.
Signed-off-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-07-23 07:29:05 +08:00
|
|
|
|
cat-file: add option '-Z' that delimits input and output with NUL
In db9d67f2e9 (builtin/cat-file.c: support NUL-delimited input with
`-z`, 2022-07-22), we have introduced a new mode to read the input via
NUL-delimited records instead of newline-delimited records. This allows
the user to query for revisions that have newlines in their path
component. While unusual, such queries are perfectly valid and thus it
is clear that we should be able to support them properly.
Unfortunately, the commit only changed the input to be NUL-delimited,
but didn't change the output at the same time. While this is fine for
queries that are processed successfully, it is less so for queries that
aren't. In the case of missing commits for example the result can become
entirely unparsable:
```
$ printf "7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10\n1234567890\n\n\commit000" |
git cat-file --batch -z
7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10
1234567890
commit missing
```
This is of course a crafted query that is intentionally gaming the
deficiency, but more benign queries that contain newlines would have
similar problems.
Ideally, we should have also changed the output to be NUL-delimited when
`-z` is specified to avoid this problem. As the input is NUL-delimited,
it is clear that the output in this case cannot ever contain NUL
characters by itself. Furthermore, Git does not allow NUL characters in
revisions anyway, further stressing the point that using NUL-delimited
output is safe. The only exception is of course the object data itself,
but as git-cat-file(1) prints the size of the object data clients should
read until that specified size has been consumed.
But even though `-z` has only been introduced a few releases ago in Git
v2.38.0, changing the output format retroactively to also NUL-delimit
output would be a backwards incompatible change. And while one could
make the argument that the output is inherently broken already, we need
to assume that there are existing users out there that use it just fine
given that revisions containing newlines are quite exotic.
Instead, introduce a new option `-Z` that switches to NUL-delimited
input and output. While this new option could arguably only switch the
output format to be NUL-delimited, the consequence would be that users
have to always specify both `-z` and `-Z` when the input may contain
newlines. On the other hand, if the user knows that there never will be
newlines in the input, they don't have to use either of those options.
There is thus no usecase that would warrant treating input and output
format separately, which is why we instead opt to "do the right thing"
and have `-Z` mean to NUL-terminate both formats.
The old `-z` option is marked as deprecated with a hint that its output
may become unparsable. It is thus hidden both from the synopsis as well
as the command's help output.
Co-authored-by: Toon Claes <toon@iotcl.com>
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-06 13:19:45 +08:00
|
|
|
test_cmp expect actual &&
|
|
|
|
|
|
|
|
echo "$batch_command_multiple_contents" | tr "\n" "\0" >in &&
|
|
|
|
git cat-file --batch-command --buffer -Z <in >actual &&
|
|
|
|
|
|
|
|
test_cmp expect_nul actual
|
2023-10-02 10:40:33 +08:00
|
|
|
'
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
batch_tests $hello_oid $tree_oid $tree_size $commit_oid $commit_size "$commit_content" $tag_oid $tag_size "$tag_content"
|
|
|
|
batch_tests $hello_compat_oid $tree_compat_oid $tree_compat_size $commit_compat_oid $commit_compat_size "$commit_compat_content" $tag_compat_oid $tag_compat_size "$tag_compat_content"
|
|
|
|
|
|
|
|
|
|
|
|
test_expect_success FUNNYNAMES 'setup with newline in input' '
|
|
|
|
touch -- "newline${LF}embedded" &&
|
|
|
|
git add -- "newline${LF}embedded" &&
|
|
|
|
git commit -m "file with newline embedded" &&
|
|
|
|
test_tick &&
|
|
|
|
|
|
|
|
printf "HEAD:newline${LF}embedded" >in
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success FUNNYNAMES '--batch-check, -z with newline in input' '
|
|
|
|
git cat-file --batch-check -z <in >actual &&
|
|
|
|
echo "$(git rev-parse "HEAD:newline${LF}embedded") blob 0" >expect &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success FUNNYNAMES '--batch-check, -Z with newline in input' '
|
|
|
|
git cat-file --batch-check -Z <in >actual &&
|
|
|
|
printf "%s\0" "$(git rev-parse "HEAD:newline${LF}embedded") blob 0" >expect &&
|
|
|
|
test_cmp expect actual
|
cat-file: add --batch-command mode
Add a new flag --batch-command that accepts commands and arguments
from stdin, similar to git-update-ref --stdin.
At GitLab, we use a pair of long running cat-file processes when
accessing object content. One for iterating over object metadata with
--batch-check, and the other to grab object contents with --batch.
However, if we had --batch-command, we wouldn't need to keep both
processes around, and instead just have one --batch-command process
where we can flip between getting object info, and getting object
contents. Since we have a pair of cat-file processes per repository,
this means we can get rid of roughly half of long lived git cat-file
processes. Given there are many repositories being accessed at any given
time, this can lead to huge savings.
git cat-file --batch-command
will enter an interactive command mode whereby the user can enter in
commands and their arguments that get queued in memory:
<command1> [arg1] [arg2] LF
<command2> [arg1] [arg2] LF
When --buffer mode is used, commands will be queued in memory until a
flush command is issued that execute them:
flush LF
The reason for a flush command is that when a consumer process (A)
talks to a git cat-file process (B) and interactively writes to and
reads from it in --buffer mode, (A) needs to be able to control when
the buffer is flushed to stdout.
Currently, from (A)'s perspective, the only way is to either
1. kill (B)'s process
2. send an invalid object to stdin.
1. is not ideal from a performance perspective as it will require
spawning a new cat-file process each time, and 2. is hacky and not a
good long term solution.
With this mechanism of queueing up commands and letting (A) issue a
flush command, process (A) can control when the buffer is flushed and
can guarantee it will receive all of the output when in --buffer mode.
--batch-command also will not allow (B) to flush to stdout until a flush
is received.
This patch adds the basic structure for adding command which can be
extended in the future to add more commands. It also adds the following
two commands (on top of the flush command):
contents <object> LF
info <object> LF
The contents command takes an <object> argument and prints out the object
contents.
The info command takes an <object> argument and prints out the object
metadata.
These can be used in the following way with --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
flush LF
info <object> LF
flush LF
When used without --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
info <object> LF
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: John Cai <johncai86@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-02-19 02:23:14 +08:00
|
|
|
'
|
|
|
|
|
2013-12-21 22:25:22 +08:00
|
|
|
test_expect_success 'setup blobs which are likely to delta' '
|
2018-03-24 15:44:42 +08:00
|
|
|
test-tool genrandom foo 10240 >foo &&
|
tests: fix broken &&-chains in `{...}` groups
The top-level &&-chain checker built into t/test-lib.sh causes tests to
magically exit with code 117 if the &&-chain is broken. However, it has
the shortcoming that the magic does not work within `{...}` groups,
`(...)` subshells, `$(...)` substitutions, or within bodies of compound
statements, such as `if`, `for`, `while`, `case`, etc. `chainlint.sed`
partly fills in the gap by catching broken &&-chains in `(...)`
subshells, but bugs can still lurk behind broken &&-chains in the other
cases.
Fix broken &&-chains in `{...}` groups in order to reduce the number of
possible lurking bugs.
Signed-off-by: Eric Sunshine <sunshine@sunshineco.com>
Reviewed-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-12-09 13:11:08 +08:00
|
|
|
{ cat foo && echo plus; } >foo-plus &&
|
2013-12-21 22:25:22 +08:00
|
|
|
git add foo foo-plus &&
|
|
|
|
git commit -m foo &&
|
|
|
|
cat >blobs <<-\EOF
|
|
|
|
HEAD:foo
|
|
|
|
HEAD:foo-plus
|
|
|
|
EOF
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'confirm that neither loose blob is a delta' '
|
2015-03-20 18:07:15 +08:00
|
|
|
cat >expect <<-EOF &&
|
2018-05-13 10:24:13 +08:00
|
|
|
$ZERO_OID
|
|
|
|
$ZERO_OID
|
2013-12-21 22:25:22 +08:00
|
|
|
EOF
|
|
|
|
git cat-file --batch-check="%(deltabase)" <blobs >actual &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
|
|
|
# To avoid relying too much on the current delta heuristics,
|
|
|
|
# we will check only that one of the two objects is a delta
|
|
|
|
# against the other, but not the order. We can do so by just
|
|
|
|
# asking for the base of both, and checking whether either
|
2023-10-02 10:40:32 +08:00
|
|
|
# oid appears in the output.
|
2013-12-21 22:25:22 +08:00
|
|
|
test_expect_success '%(deltabase) reports packed delta bases' '
|
|
|
|
git repack -ad &&
|
|
|
|
git cat-file --batch-check="%(deltabase)" <blobs >actual &&
|
|
|
|
{
|
|
|
|
grep "$(git rev-parse HEAD:foo)" actual ||
|
|
|
|
grep "$(git rev-parse HEAD:foo-plus)" actual
|
|
|
|
}
|
|
|
|
'
|
|
|
|
|
2021-10-01 17:16:41 +08:00
|
|
|
test_expect_success 'setup bogus data' '
|
|
|
|
bogus_short_type="bogus" &&
|
|
|
|
bogus_short_content="bogus" &&
|
|
|
|
bogus_short_size=$(strlen "$bogus_short_content") &&
|
2023-10-02 10:40:32 +08:00
|
|
|
bogus_short_oid=$(echo_without_newline "$bogus_short_content" | git hash-object -t $bogus_short_type --literally -w --stdin) &&
|
2021-10-01 17:16:41 +08:00
|
|
|
|
|
|
|
bogus_long_type="abcdefghijklmnopqrstuvwxyz1234679" &&
|
|
|
|
bogus_long_content="bogus" &&
|
|
|
|
bogus_long_size=$(strlen "$bogus_long_content") &&
|
2023-10-02 10:40:32 +08:00
|
|
|
bogus_long_oid=$(echo_without_newline "$bogus_long_content" | git hash-object -t $bogus_long_type --literally -w --stdin)
|
2021-10-01 17:16:41 +08:00
|
|
|
'
|
2015-05-03 22:30:02 +08:00
|
|
|
|
2021-10-01 17:16:42 +08:00
|
|
|
for arg1 in '' --allow-unknown-type
|
|
|
|
do
|
|
|
|
for arg2 in -s -t -p
|
|
|
|
do
|
|
|
|
if test "$arg1" = "--allow-unknown-type" && test "$arg2" = "-p"
|
|
|
|
then
|
|
|
|
continue
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
test_expect_success "cat-file $arg1 $arg2 error on bogus short OID" '
|
|
|
|
cat >expect <<-\EOF &&
|
|
|
|
fatal: invalid object type
|
|
|
|
EOF
|
|
|
|
|
|
|
|
if test "$arg1" = "--allow-unknown-type"
|
|
|
|
then
|
2023-10-02 10:40:32 +08:00
|
|
|
git cat-file $arg1 $arg2 $bogus_short_oid
|
2021-10-01 17:16:42 +08:00
|
|
|
else
|
2023-10-02 10:40:32 +08:00
|
|
|
test_must_fail git cat-file $arg1 $arg2 $bogus_short_oid >out 2>actual &&
|
2021-10-01 17:16:42 +08:00
|
|
|
test_must_be_empty out &&
|
|
|
|
test_cmp expect actual
|
|
|
|
fi
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success "cat-file $arg1 $arg2 error on bogus full OID" '
|
|
|
|
if test "$arg2" = "-p"
|
|
|
|
then
|
|
|
|
cat >expect <<-EOF
|
2023-10-02 10:40:32 +08:00
|
|
|
error: header for $bogus_long_oid too long, exceeds 32 bytes
|
|
|
|
fatal: Not a valid object name $bogus_long_oid
|
2021-10-01 17:16:42 +08:00
|
|
|
EOF
|
|
|
|
else
|
|
|
|
cat >expect <<-EOF
|
2023-10-02 10:40:32 +08:00
|
|
|
error: header for $bogus_long_oid too long, exceeds 32 bytes
|
2021-10-01 17:16:42 +08:00
|
|
|
fatal: git cat-file: could not get object info
|
|
|
|
EOF
|
|
|
|
fi &&
|
|
|
|
|
|
|
|
if test "$arg1" = "--allow-unknown-type"
|
|
|
|
then
|
2023-10-02 10:40:32 +08:00
|
|
|
git cat-file $arg1 $arg2 $bogus_short_oid
|
2021-10-01 17:16:42 +08:00
|
|
|
else
|
2023-10-02 10:40:32 +08:00
|
|
|
test_must_fail git cat-file $arg1 $arg2 $bogus_long_oid >out 2>actual &&
|
2021-10-01 17:16:42 +08:00
|
|
|
test_must_be_empty out &&
|
|
|
|
test_cmp expect actual
|
|
|
|
fi
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success "cat-file $arg1 $arg2 error on missing short OID" '
|
|
|
|
cat >expect.err <<-EOF &&
|
|
|
|
fatal: Not a valid object name $(test_oid deadbeef_short)
|
|
|
|
EOF
|
|
|
|
test_must_fail git cat-file $arg1 $arg2 $(test_oid deadbeef_short) >out 2>err.actual &&
|
2023-03-18 23:46:41 +08:00
|
|
|
test_must_be_empty out &&
|
|
|
|
test_cmp expect.err err.actual
|
2021-10-01 17:16:42 +08:00
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success "cat-file $arg1 $arg2 error on missing full OID" '
|
|
|
|
if test "$arg2" = "-p"
|
|
|
|
then
|
|
|
|
cat >expect.err <<-EOF
|
|
|
|
fatal: Not a valid object name $(test_oid deadbeef)
|
|
|
|
EOF
|
|
|
|
else
|
|
|
|
cat >expect.err <<-\EOF
|
|
|
|
fatal: git cat-file: could not get object info
|
|
|
|
EOF
|
|
|
|
fi &&
|
|
|
|
test_must_fail git cat-file $arg1 $arg2 $(test_oid deadbeef) >out 2>err.actual &&
|
|
|
|
test_must_be_empty out &&
|
|
|
|
test_cmp expect.err err.actual
|
|
|
|
'
|
|
|
|
done
|
|
|
|
done
|
|
|
|
|
2021-10-01 17:16:44 +08:00
|
|
|
test_expect_success '-e is OK with a broken object without --allow-unknown-type' '
|
2023-10-02 10:40:32 +08:00
|
|
|
git cat-file -e $bogus_short_oid
|
2021-10-01 17:16:44 +08:00
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success '-e can not be combined with --allow-unknown-type' '
|
2023-10-02 10:40:32 +08:00
|
|
|
test_expect_code 128 git cat-file -e --allow-unknown-type $bogus_short_oid
|
2021-10-01 17:16:44 +08:00
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success '-p cannot print a broken object even with --allow-unknown-type' '
|
2023-10-02 10:40:32 +08:00
|
|
|
test_must_fail git cat-file -p $bogus_short_oid &&
|
|
|
|
test_expect_code 128 git cat-file -p --allow-unknown-type $bogus_short_oid
|
2021-10-01 17:16:44 +08:00
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success '<type> <hash> does not work with objects of broken types' '
|
|
|
|
cat >err.expect <<-\EOF &&
|
|
|
|
fatal: invalid object type "bogus"
|
|
|
|
EOF
|
2023-10-02 10:40:32 +08:00
|
|
|
test_must_fail git cat-file $bogus_short_type $bogus_short_oid 2>err.actual &&
|
2021-10-01 17:16:44 +08:00
|
|
|
test_cmp err.expect err.actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'broken types combined with --batch and --batch-check' '
|
2023-10-02 10:40:32 +08:00
|
|
|
echo $bogus_short_oid >bogus-oid &&
|
2021-10-01 17:16:44 +08:00
|
|
|
|
|
|
|
cat >err.expect <<-\EOF &&
|
|
|
|
fatal: invalid object type
|
|
|
|
EOF
|
|
|
|
|
|
|
|
test_must_fail git cat-file --batch <bogus-oid 2>err.actual &&
|
|
|
|
test_cmp err.expect err.actual &&
|
|
|
|
|
|
|
|
test_must_fail git cat-file --batch-check <bogus-oid 2>err.actual &&
|
|
|
|
test_cmp err.expect err.actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'the --batch and --batch-check options do not combine with --allow-unknown-type' '
|
|
|
|
test_expect_code 128 git cat-file --batch --allow-unknown-type <bogus-oid &&
|
|
|
|
test_expect_code 128 git cat-file --batch-check --allow-unknown-type <bogus-oid
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'the --allow-unknown-type option does not consider replacement refs' '
|
|
|
|
cat >expect <<-EOF &&
|
|
|
|
$bogus_short_type
|
|
|
|
EOF
|
2023-10-02 10:40:32 +08:00
|
|
|
git cat-file -t --allow-unknown-type $bogus_short_oid >actual &&
|
2021-10-01 17:16:44 +08:00
|
|
|
test_cmp expect actual &&
|
|
|
|
|
|
|
|
# Create it manually, as "git replace" will die on bogus
|
|
|
|
# types.
|
|
|
|
head=$(git rev-parse --verify HEAD) &&
|
2023-10-02 10:40:32 +08:00
|
|
|
test_when_finished "test-tool ref-store main delete-refs 0 msg refs/replace/$bogus_short_oid" &&
|
|
|
|
test-tool ref-store main update-ref msg "refs/replace/$bogus_short_oid" $head $ZERO_OID REF_SKIP_OID_VERIFICATION &&
|
2021-10-01 17:16:44 +08:00
|
|
|
|
|
|
|
cat >expect <<-EOF &&
|
|
|
|
commit
|
|
|
|
EOF
|
2023-10-02 10:40:32 +08:00
|
|
|
git cat-file -t --allow-unknown-type $bogus_short_oid >actual &&
|
2021-10-01 17:16:44 +08:00
|
|
|
test_cmp expect actual
|
|
|
|
'
|
2015-05-03 22:30:02 +08:00
|
|
|
|
|
|
|
test_expect_success "Type of broken object is correct" '
|
2021-10-01 17:16:41 +08:00
|
|
|
echo $bogus_short_type >expect &&
|
2023-10-02 10:40:32 +08:00
|
|
|
git cat-file -t --allow-unknown-type $bogus_short_oid >actual &&
|
2015-05-03 22:30:02 +08:00
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success "Size of broken object is correct" '
|
2021-10-01 17:16:41 +08:00
|
|
|
echo $bogus_short_size >expect &&
|
2023-10-02 10:40:32 +08:00
|
|
|
git cat-file -s --allow-unknown-type $bogus_short_oid >actual &&
|
2015-05-03 22:30:02 +08:00
|
|
|
test_cmp expect actual
|
|
|
|
'
|
2021-10-06 04:30:36 +08:00
|
|
|
|
|
|
|
test_expect_success 'clean up broken object' '
|
2023-10-02 10:40:32 +08:00
|
|
|
rm .git/objects/$(test_oid_to_path $bogus_short_oid)
|
2021-10-06 04:30:36 +08:00
|
|
|
'
|
|
|
|
|
2015-05-03 22:30:02 +08:00
|
|
|
test_expect_success "Type of broken object is correct when type is large" '
|
2021-10-01 17:16:41 +08:00
|
|
|
echo $bogus_long_type >expect &&
|
2023-10-02 10:40:32 +08:00
|
|
|
git cat-file -t --allow-unknown-type $bogus_long_oid >actual &&
|
2015-05-03 22:30:02 +08:00
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success "Size of large broken object is correct when type is large" '
|
2021-10-01 17:16:41 +08:00
|
|
|
echo $bogus_long_size >expect &&
|
2023-10-02 10:40:32 +08:00
|
|
|
git cat-file -s --allow-unknown-type $bogus_long_oid >actual &&
|
2015-05-03 22:30:02 +08:00
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
2021-10-06 04:30:36 +08:00
|
|
|
test_expect_success 'clean up broken object' '
|
2023-10-02 10:40:32 +08:00
|
|
|
rm .git/objects/$(test_oid_to_path $bogus_long_oid)
|
2021-10-26 07:06:56 +08:00
|
|
|
'
|
|
|
|
|
2021-10-01 17:16:43 +08:00
|
|
|
test_expect_success 'cat-file -t and -s on corrupt loose object' '
|
|
|
|
git init --bare corrupt-loose.git &&
|
|
|
|
(
|
|
|
|
cd corrupt-loose.git &&
|
|
|
|
|
|
|
|
# Setup and create the empty blob and its path
|
|
|
|
empty_path=$(git rev-parse --git-path objects/$(test_oid_to_path "$EMPTY_BLOB")) &&
|
2022-05-13 06:32:18 +08:00
|
|
|
empty_blob=$(git hash-object -w --stdin </dev/null) &&
|
2021-10-01 17:16:43 +08:00
|
|
|
|
|
|
|
# Create another blob and its path
|
|
|
|
echo other >other.blob &&
|
|
|
|
other_blob=$(git hash-object -w --stdin <other.blob) &&
|
|
|
|
other_path=$(git rev-parse --git-path objects/$(test_oid_to_path "$other_blob")) &&
|
|
|
|
|
|
|
|
# Before the swap the size is 0
|
|
|
|
cat >out.expect <<-EOF &&
|
|
|
|
0
|
|
|
|
EOF
|
|
|
|
git cat-file -s "$EMPTY_BLOB" >out.actual 2>err.actual &&
|
|
|
|
test_must_be_empty err.actual &&
|
|
|
|
test_cmp out.expect out.actual &&
|
|
|
|
|
|
|
|
# Swap the two to corrupt the repository
|
|
|
|
mv -f "$other_path" "$empty_path" &&
|
|
|
|
test_must_fail git fsck 2>err.fsck &&
|
fsck: report invalid object type-path combinations
Improve the error that's emitted in cases where we find a loose object
we parse, but which isn't at the location we expect it to be.
Before this change we'd prefix the error with a not-a-OID derived from
the path at which the object was found, due to an emergent behavior in
how we'd end up with an "OID" in these codepaths.
Now we'll instead say what object we hashed, and what path it was
found at. Before this patch series e.g.:
$ git hash-object --stdin -w -t blob </dev/null
e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
$ mv objects/e6/ objects/e7
Would emit ("[...]" used to abbreviate the OIDs):
git fsck
error: hash mismatch for ./objects/e7/9d[...] (expected e79d[...])
error: e79d[...]: object corrupt or missing: ./objects/e7/9d[...]
Now we'll instead emit:
error: e69d[...]: hash-path mismatch, found at: ./objects/e7/9d[...]
Furthermore, we'll do the right thing when the object type and its
location are bad. I.e. this case:
$ git hash-object --stdin -w -t garbage --literally </dev/null
8315a83d2acc4c174aed59430f9a9c4ed926440f
$ mv objects/83 objects/84
As noted in an earlier commits we'd simply die early in those cases,
until preceding commits fixed the hard die on invalid object type:
$ git fsck
fatal: invalid object type
Now we'll instead emit sensible error messages:
$ git fsck
error: 8315[...]: hash-path mismatch, found at: ./objects/84/15[...]
error: 8315[...]: object is of unknown type 'garbage': ./objects/84/15[...]
In both fsck.c and object-file.c we're using null_oid as a sentinel
value for checking whether we got far enough to be certain that the
issue was indeed this OID mismatch.
We need to add the "object corrupt or missing" special-case to deal
with cases where read_loose_object() will return an error before
completing check_object_signature(), e.g. if we have an error in
unpack_loose_rest() because we find garbage after the valid gzip
content:
$ git hash-object --stdin -w -t blob </dev/null
e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
$ chmod 755 objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391
$ echo garbage >>objects/e6/9de29bb2d1d6434b8b29ae775ad8c2e48c5391
$ git fsck
error: garbage at end of loose object 'e69d[...]'
error: unable to unpack contents of ./objects/e6/9d[...]
error: e69d[...]: object corrupt or missing: ./objects/e6/9d[...]
There is currently some weird messaging in the edge case when the two
are combined, i.e. because we're not explicitly passing along an error
state about this specific scenario from check_stream_oid() via
read_loose_object() we'll end up printing the null OID if an object is
of an unknown type *and* it can't be unpacked by zlib, e.g.:
$ git hash-object --stdin -w -t garbage --literally </dev/null
8315a83d2acc4c174aed59430f9a9c4ed926440f
$ chmod 755 objects/83/15a83d2acc4c174aed59430f9a9c4ed926440f
$ echo garbage >>objects/83/15a83d2acc4c174aed59430f9a9c4ed926440f
$ /usr/bin/git fsck
fatal: invalid object type
$ ~/g/git/git fsck
error: garbage at end of loose object '8315a83d2acc4c174aed59430f9a9c4ed926440f'
error: unable to unpack contents of ./objects/83/15a83d2acc4c174aed59430f9a9c4ed926440f
error: 8315a83d2acc4c174aed59430f9a9c4ed926440f: object corrupt or missing: ./objects/83/15a83d2acc4c174aed59430f9a9c4ed926440f
error: 0000000000000000000000000000000000000000: object is of unknown type 'garbage': ./objects/83/15a83d2acc4c174aed59430f9a9c4ed926440f
[...]
I think it's OK to leave that for future improvements, which would
involve enum-ifying more error state as we've done with "enum
unpack_loose_header_result" in preceding commits. In these
increasingly more obscure cases the worst that can happen is that
we'll get slightly nonsensical or inapplicable error messages.
There's other such potential edge cases, all of which might produce
some confusing messaging, but still be handled correctly as far as
passing along errors goes. E.g. if check_object_signature() returns
and oideq(real_oid, null_oid()) is true, which could happen if it
returns -1 due to the read_istream() call having failed.
Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-10-01 17:16:53 +08:00
|
|
|
grep "hash-path mismatch" err.fsck &&
|
2021-10-01 17:16:43 +08:00
|
|
|
|
|
|
|
# confirm that cat-file is reading the new swapped-in
|
|
|
|
# blob...
|
|
|
|
cat >out.expect <<-EOF &&
|
|
|
|
blob
|
|
|
|
EOF
|
|
|
|
git cat-file -t "$EMPTY_BLOB" >out.actual 2>err.actual &&
|
|
|
|
test_must_be_empty err.actual &&
|
|
|
|
test_cmp out.expect out.actual &&
|
|
|
|
|
|
|
|
# ... since it has a different size now.
|
|
|
|
cat >out.expect <<-EOF &&
|
|
|
|
6
|
|
|
|
EOF
|
|
|
|
git cat-file -s "$EMPTY_BLOB" >out.actual 2>err.actual &&
|
|
|
|
test_must_be_empty err.actual &&
|
|
|
|
test_cmp out.expect out.actual &&
|
|
|
|
|
|
|
|
# So far "cat-file" has been happy to spew the found
|
|
|
|
# content out as-is. Try to make it zlib-invalid.
|
|
|
|
mv -f other.blob "$empty_path" &&
|
|
|
|
test_must_fail git fsck 2>err.fsck &&
|
2022-05-13 06:32:18 +08:00
|
|
|
cat >expect <<-EOF &&
|
|
|
|
error: inflate: data stream error (incorrect header check)
|
|
|
|
error: unable to unpack header of ./$empty_path
|
|
|
|
error: $empty_blob: object corrupt or missing: ./$empty_path
|
|
|
|
EOF
|
|
|
|
grep "^error: " err.fsck >actual &&
|
|
|
|
test_cmp expect actual
|
2021-10-01 17:16:43 +08:00
|
|
|
)
|
2021-10-06 04:30:36 +08:00
|
|
|
'
|
|
|
|
|
2015-05-21 01:03:40 +08:00
|
|
|
# Tests for git cat-file --follow-symlinks
|
|
|
|
test_expect_success 'prep for symlink tests' '
|
|
|
|
echo_without_newline "$hello_content" >morx &&
|
|
|
|
test_ln_s_add morx same-dir-link &&
|
|
|
|
test_ln_s_add dir link-to-dir &&
|
|
|
|
test_ln_s_add ../fleem out-of-repo-link &&
|
|
|
|
test_ln_s_add .. out-of-repo-link-dir &&
|
|
|
|
test_ln_s_add same-dir-link link-to-link &&
|
|
|
|
test_ln_s_add nope broken-same-dir-link &&
|
|
|
|
mkdir dir &&
|
|
|
|
test_ln_s_add ../morx dir/parent-dir-link &&
|
|
|
|
test_ln_s_add .. dir/link-dir &&
|
|
|
|
test_ln_s_add ../../escape dir/out-of-repo-link &&
|
|
|
|
test_ln_s_add ../.. dir/out-of-repo-link-dir &&
|
|
|
|
test_ln_s_add nope dir/broken-link-in-dir &&
|
|
|
|
mkdir dir/subdir &&
|
|
|
|
test_ln_s_add ../../morx dir/subdir/grandparent-dir-link &&
|
|
|
|
test_ln_s_add ../../../great-escape dir/subdir/out-of-repo-link &&
|
|
|
|
test_ln_s_add ../../.. dir/subdir/out-of-repo-link-dir &&
|
|
|
|
test_ln_s_add ../../../ dir/subdir/out-of-repo-link-dir-trailing &&
|
|
|
|
test_ln_s_add ../parent-dir-link dir/subdir/parent-dir-link-to-link &&
|
|
|
|
echo_without_newline "$hello_content" >dir/subdir/ind2 &&
|
|
|
|
echo_without_newline "$hello_content" >dir/ind1 &&
|
|
|
|
test_ln_s_add dir dirlink &&
|
|
|
|
test_ln_s_add dir/subdir subdirlink &&
|
|
|
|
test_ln_s_add subdir/ind2 dir/link-to-child &&
|
|
|
|
test_ln_s_add dir/link-to-child link-to-down-link &&
|
|
|
|
test_ln_s_add dir/.. up-down &&
|
|
|
|
test_ln_s_add dir/../ up-down-trailing &&
|
|
|
|
test_ln_s_add dir/../morx up-down-file &&
|
|
|
|
test_ln_s_add dir/../../morx up-up-down-file &&
|
|
|
|
test_ln_s_add subdirlink/../../morx up-two-down-file &&
|
|
|
|
test_ln_s_add loop1 loop2 &&
|
|
|
|
test_ln_s_add loop2 loop1 &&
|
|
|
|
git add morx dir/subdir/ind2 dir/ind1 &&
|
|
|
|
git commit -am "test" &&
|
2023-10-02 10:40:32 +08:00
|
|
|
echo $hello_oid blob $hello_size >found
|
2015-05-21 01:03:40 +08:00
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'git cat-file --batch-check --follow-symlinks works for non-links' '
|
|
|
|
echo HEAD:morx | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp found actual &&
|
|
|
|
echo HEAD:nope missing >expect &&
|
|
|
|
echo HEAD:nope | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'git cat-file --batch-check --follow-symlinks works for in-repo, same-dir links' '
|
|
|
|
echo HEAD:same-dir-link | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp found actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'git cat-file --batch-check --follow-symlinks works for in-repo, links to dirs' '
|
|
|
|
echo HEAD:link-to-dir/ind1 | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp found actual
|
|
|
|
'
|
|
|
|
|
|
|
|
|
|
|
|
test_expect_success 'git cat-file --batch-check --follow-symlinks works for broken in-repo, same-dir links' '
|
|
|
|
echo dangling 25 >expect &&
|
|
|
|
echo HEAD:broken-same-dir-link >>expect &&
|
|
|
|
echo HEAD:broken-same-dir-link | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
cat-file: add option '-Z' that delimits input and output with NUL
In db9d67f2e9 (builtin/cat-file.c: support NUL-delimited input with
`-z`, 2022-07-22), we have introduced a new mode to read the input via
NUL-delimited records instead of newline-delimited records. This allows
the user to query for revisions that have newlines in their path
component. While unusual, such queries are perfectly valid and thus it
is clear that we should be able to support them properly.
Unfortunately, the commit only changed the input to be NUL-delimited,
but didn't change the output at the same time. While this is fine for
queries that are processed successfully, it is less so for queries that
aren't. In the case of missing commits for example the result can become
entirely unparsable:
```
$ printf "7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10\n1234567890\n\n\commit000" |
git cat-file --batch -z
7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10
1234567890
commit missing
```
This is of course a crafted query that is intentionally gaming the
deficiency, but more benign queries that contain newlines would have
similar problems.
Ideally, we should have also changed the output to be NUL-delimited when
`-z` is specified to avoid this problem. As the input is NUL-delimited,
it is clear that the output in this case cannot ever contain NUL
characters by itself. Furthermore, Git does not allow NUL characters in
revisions anyway, further stressing the point that using NUL-delimited
output is safe. The only exception is of course the object data itself,
but as git-cat-file(1) prints the size of the object data clients should
read until that specified size has been consumed.
But even though `-z` has only been introduced a few releases ago in Git
v2.38.0, changing the output format retroactively to also NUL-delimit
output would be a backwards incompatible change. And while one could
make the argument that the output is inherently broken already, we need
to assume that there are existing users out there that use it just fine
given that revisions containing newlines are quite exotic.
Instead, introduce a new option `-Z` that switches to NUL-delimited
input and output. While this new option could arguably only switch the
output format to be NUL-delimited, the consequence would be that users
have to always specify both `-z` and `-Z` when the input may contain
newlines. On the other hand, if the user knows that there never will be
newlines in the input, they don't have to use either of those options.
There is thus no usecase that would warrant treating input and output
format separately, which is why we instead opt to "do the right thing"
and have `-Z` mean to NUL-terminate both formats.
The old `-z` option is marked as deprecated with a hint that its output
may become unparsable. It is thus hidden both from the synopsis as well
as the command's help output.
Co-authored-by: Toon Claes <toon@iotcl.com>
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-06 13:19:45 +08:00
|
|
|
test_expect_success 'git cat-file --batch-check --follow-symlinks -Z works for broken in-repo, same-dir links' '
|
|
|
|
printf "HEAD:broken-same-dir-link\0" >in &&
|
|
|
|
printf "dangling 25\0HEAD:broken-same-dir-link\0" >expect &&
|
|
|
|
git cat-file --batch-check --follow-symlinks -Z <in >actual &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
2015-05-21 01:03:40 +08:00
|
|
|
test_expect_success 'git cat-file --batch-check --follow-symlinks works for same-dir links-to-links' '
|
|
|
|
echo HEAD:link-to-link | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp found actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'git cat-file --batch-check --follow-symlinks works for parent-dir links' '
|
|
|
|
echo HEAD:dir/parent-dir-link | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp found actual &&
|
|
|
|
echo notdir 29 >expect &&
|
|
|
|
echo HEAD:dir/parent-dir-link/nope >>expect &&
|
|
|
|
echo HEAD:dir/parent-dir-link/nope | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
cat-file: add option '-Z' that delimits input and output with NUL
In db9d67f2e9 (builtin/cat-file.c: support NUL-delimited input with
`-z`, 2022-07-22), we have introduced a new mode to read the input via
NUL-delimited records instead of newline-delimited records. This allows
the user to query for revisions that have newlines in their path
component. While unusual, such queries are perfectly valid and thus it
is clear that we should be able to support them properly.
Unfortunately, the commit only changed the input to be NUL-delimited,
but didn't change the output at the same time. While this is fine for
queries that are processed successfully, it is less so for queries that
aren't. In the case of missing commits for example the result can become
entirely unparsable:
```
$ printf "7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10\n1234567890\n\n\commit000" |
git cat-file --batch -z
7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10
1234567890
commit missing
```
This is of course a crafted query that is intentionally gaming the
deficiency, but more benign queries that contain newlines would have
similar problems.
Ideally, we should have also changed the output to be NUL-delimited when
`-z` is specified to avoid this problem. As the input is NUL-delimited,
it is clear that the output in this case cannot ever contain NUL
characters by itself. Furthermore, Git does not allow NUL characters in
revisions anyway, further stressing the point that using NUL-delimited
output is safe. The only exception is of course the object data itself,
but as git-cat-file(1) prints the size of the object data clients should
read until that specified size has been consumed.
But even though `-z` has only been introduced a few releases ago in Git
v2.38.0, changing the output format retroactively to also NUL-delimit
output would be a backwards incompatible change. And while one could
make the argument that the output is inherently broken already, we need
to assume that there are existing users out there that use it just fine
given that revisions containing newlines are quite exotic.
Instead, introduce a new option `-Z` that switches to NUL-delimited
input and output. While this new option could arguably only switch the
output format to be NUL-delimited, the consequence would be that users
have to always specify both `-z` and `-Z` when the input may contain
newlines. On the other hand, if the user knows that there never will be
newlines in the input, they don't have to use either of those options.
There is thus no usecase that would warrant treating input and output
format separately, which is why we instead opt to "do the right thing"
and have `-Z` mean to NUL-terminate both formats.
The old `-z` option is marked as deprecated with a hint that its output
may become unparsable. It is thus hidden both from the synopsis as well
as the command's help output.
Co-authored-by: Toon Claes <toon@iotcl.com>
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-06 13:19:45 +08:00
|
|
|
test_expect_success 'git cat-file --batch-check --follow-symlinks -Z works for parent-dir links' '
|
|
|
|
echo HEAD:dir/parent-dir-link | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp found actual &&
|
|
|
|
printf "notdir 29\0HEAD:dir/parent-dir-link/nope\0" >expect &&
|
|
|
|
printf "HEAD:dir/parent-dir-link/nope\0" >in &&
|
|
|
|
git cat-file --batch-check --follow-symlinks -Z <in >actual &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
2015-05-21 01:03:40 +08:00
|
|
|
test_expect_success 'git cat-file --batch-check --follow-symlinks works for .. links' '
|
|
|
|
echo dangling 22 >expect &&
|
|
|
|
echo HEAD:dir/link-dir/nope >>expect &&
|
|
|
|
echo HEAD:dir/link-dir/nope | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp expect actual &&
|
|
|
|
echo HEAD:dir/link-dir/morx | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp found actual &&
|
|
|
|
echo dangling 27 >expect &&
|
|
|
|
echo HEAD:dir/broken-link-in-dir >>expect &&
|
|
|
|
echo HEAD:dir/broken-link-in-dir | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'git cat-file --batch-check --follow-symlinks works for ../.. links' '
|
|
|
|
echo notdir 41 >expect &&
|
|
|
|
echo HEAD:dir/subdir/grandparent-dir-link/nope >>expect &&
|
|
|
|
echo HEAD:dir/subdir/grandparent-dir-link/nope | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp expect actual &&
|
|
|
|
echo HEAD:dir/subdir/grandparent-dir-link | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp found actual &&
|
|
|
|
echo HEAD:dir/subdir/parent-dir-link-to-link | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp found actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'git cat-file --batch-check --follow-symlinks works for dir/ links' '
|
|
|
|
echo dangling 17 >expect &&
|
|
|
|
echo HEAD:dirlink/morx >>expect &&
|
|
|
|
echo HEAD:dirlink/morx | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp expect actual &&
|
2023-10-02 10:40:32 +08:00
|
|
|
echo $hello_oid blob $hello_size >expect &&
|
2015-05-21 01:03:40 +08:00
|
|
|
echo HEAD:dirlink/ind1 | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'git cat-file --batch-check --follow-symlinks works for dir/subdir links' '
|
|
|
|
echo dangling 20 >expect &&
|
|
|
|
echo HEAD:subdirlink/morx >>expect &&
|
|
|
|
echo HEAD:subdirlink/morx | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp expect actual &&
|
|
|
|
echo HEAD:subdirlink/ind2 | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp found actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'git cat-file --batch-check --follow-symlinks works for dir ->subdir links' '
|
|
|
|
echo notdir 27 >expect &&
|
|
|
|
echo HEAD:dir/link-to-child/morx >>expect &&
|
|
|
|
echo HEAD:dir/link-to-child/morx | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp expect actual &&
|
|
|
|
echo HEAD:dir/link-to-child | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp found actual &&
|
|
|
|
echo HEAD:link-to-down-link | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp found actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'git cat-file --batch-check --follow-symlinks works for out-of-repo symlinks' '
|
|
|
|
echo symlink 8 >expect &&
|
|
|
|
echo ../fleem >>expect &&
|
|
|
|
echo HEAD:out-of-repo-link | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp expect actual &&
|
|
|
|
echo symlink 2 >expect &&
|
|
|
|
echo .. >>expect &&
|
|
|
|
echo HEAD:out-of-repo-link-dir | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'git cat-file --batch-check --follow-symlinks works for out-of-repo symlinks in dirs' '
|
|
|
|
echo symlink 9 >expect &&
|
|
|
|
echo ../escape >>expect &&
|
|
|
|
echo HEAD:dir/out-of-repo-link | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp expect actual &&
|
|
|
|
echo symlink 2 >expect &&
|
|
|
|
echo .. >>expect &&
|
|
|
|
echo HEAD:dir/out-of-repo-link-dir | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'git cat-file --batch-check --follow-symlinks works for out-of-repo symlinks in subdirs' '
|
|
|
|
echo symlink 15 >expect &&
|
|
|
|
echo ../great-escape >>expect &&
|
|
|
|
echo HEAD:dir/subdir/out-of-repo-link | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp expect actual &&
|
|
|
|
echo symlink 2 >expect &&
|
|
|
|
echo .. >>expect &&
|
|
|
|
echo HEAD:dir/subdir/out-of-repo-link-dir | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp expect actual &&
|
|
|
|
echo symlink 3 >expect &&
|
|
|
|
echo ../ >>expect &&
|
|
|
|
echo HEAD:dir/subdir/out-of-repo-link-dir-trailing | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'git cat-file --batch-check --follow-symlinks works for symlinks with internal ..' '
|
|
|
|
echo HEAD: | git cat-file --batch-check >expect &&
|
|
|
|
echo HEAD:up-down | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp expect actual &&
|
|
|
|
echo HEAD:up-down-trailing | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp expect actual &&
|
|
|
|
echo HEAD:up-down-file | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp found actual &&
|
|
|
|
echo symlink 7 >expect &&
|
|
|
|
echo ../morx >>expect &&
|
|
|
|
echo HEAD:up-up-down-file | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp expect actual &&
|
|
|
|
echo HEAD:up-two-down-file | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp found actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'git cat-file --batch-check --follow-symlink breaks loops' '
|
|
|
|
echo loop 10 >expect &&
|
|
|
|
echo HEAD:loop1 >>expect &&
|
|
|
|
echo HEAD:loop1 | git cat-file --batch-check --follow-symlinks >actual &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
cat-file: add option '-Z' that delimits input and output with NUL
In db9d67f2e9 (builtin/cat-file.c: support NUL-delimited input with
`-z`, 2022-07-22), we have introduced a new mode to read the input via
NUL-delimited records instead of newline-delimited records. This allows
the user to query for revisions that have newlines in their path
component. While unusual, such queries are perfectly valid and thus it
is clear that we should be able to support them properly.
Unfortunately, the commit only changed the input to be NUL-delimited,
but didn't change the output at the same time. While this is fine for
queries that are processed successfully, it is less so for queries that
aren't. In the case of missing commits for example the result can become
entirely unparsable:
```
$ printf "7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10\n1234567890\n\n\commit000" |
git cat-file --batch -z
7ce4f05bae8120d9fa258e854a8669f6ea9cb7b1 blob 10
1234567890
commit missing
```
This is of course a crafted query that is intentionally gaming the
deficiency, but more benign queries that contain newlines would have
similar problems.
Ideally, we should have also changed the output to be NUL-delimited when
`-z` is specified to avoid this problem. As the input is NUL-delimited,
it is clear that the output in this case cannot ever contain NUL
characters by itself. Furthermore, Git does not allow NUL characters in
revisions anyway, further stressing the point that using NUL-delimited
output is safe. The only exception is of course the object data itself,
but as git-cat-file(1) prints the size of the object data clients should
read until that specified size has been consumed.
But even though `-z` has only been introduced a few releases ago in Git
v2.38.0, changing the output format retroactively to also NUL-delimit
output would be a backwards incompatible change. And while one could
make the argument that the output is inherently broken already, we need
to assume that there are existing users out there that use it just fine
given that revisions containing newlines are quite exotic.
Instead, introduce a new option `-Z` that switches to NUL-delimited
input and output. While this new option could arguably only switch the
output format to be NUL-delimited, the consequence would be that users
have to always specify both `-z` and `-Z` when the input may contain
newlines. On the other hand, if the user knows that there never will be
newlines in the input, they don't have to use either of those options.
There is thus no usecase that would warrant treating input and output
format separately, which is why we instead opt to "do the right thing"
and have `-Z` mean to NUL-terminate both formats.
The old `-z` option is marked as deprecated with a hint that its output
may become unparsable. It is thus hidden both from the synopsis as well
as the command's help output.
Co-authored-by: Toon Claes <toon@iotcl.com>
Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2023-06-06 13:19:45 +08:00
|
|
|
test_expect_success 'git cat-file --batch-check --follow-symlink -Z breaks loops' '
|
|
|
|
printf "loop 10\0HEAD:loop1\0" >expect &&
|
|
|
|
printf "HEAD:loop1\0" >in &&
|
|
|
|
git cat-file --batch-check --follow-symlinks -Z <in >actual &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
2015-05-21 01:03:40 +08:00
|
|
|
test_expect_success 'git cat-file --batch --follow-symlink returns correct sha and mode' '
|
|
|
|
echo HEAD:morx | git cat-file --batch >expect &&
|
|
|
|
echo HEAD:morx | git cat-file --batch --follow-symlinks >actual &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
2015-06-02 03:45:16 +08:00
|
|
|
|
2015-06-22 18:45:59 +08:00
|
|
|
test_expect_success 'cat-file --batch-all-objects shows all objects' '
|
2015-06-22 19:06:32 +08:00
|
|
|
# make new repos so we know the full set of objects; we will
|
2015-06-22 18:45:59 +08:00
|
|
|
# also make sure that there are some packed and some loose
|
2018-08-11 07:16:40 +08:00
|
|
|
# objects, some referenced and some not, some duplicates, and that
|
|
|
|
# there are some available only via alternates.
|
2015-06-22 18:45:59 +08:00
|
|
|
git init all-one &&
|
|
|
|
(
|
|
|
|
cd all-one &&
|
|
|
|
echo content >file &&
|
|
|
|
git add file &&
|
|
|
|
git commit -qm base &&
|
|
|
|
git rev-parse HEAD HEAD^{tree} HEAD:file &&
|
|
|
|
git repack -ad &&
|
|
|
|
echo not-cloned | git hash-object -w --stdin
|
|
|
|
) >expect.unsorted &&
|
|
|
|
git clone -s all-one all-two &&
|
|
|
|
(
|
|
|
|
cd all-two &&
|
|
|
|
echo local-unref | git hash-object -w --stdin
|
|
|
|
) >>expect.unsorted &&
|
2018-08-11 07:16:40 +08:00
|
|
|
git -C all-two rev-parse HEAD:file |
|
|
|
|
git -C all-two pack-objects .git/objects/pack/pack &&
|
2015-06-22 18:45:59 +08:00
|
|
|
sort <expect.unsorted >expect &&
|
|
|
|
git -C all-two cat-file --batch-all-objects \
|
2015-06-22 19:06:32 +08:00
|
|
|
--batch-check="%(objectname)" >actual &&
|
2015-06-22 18:45:59 +08:00
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
cat-file: support "unordered" output for --batch-all-objects
If you're going to access the contents of every object in a
packfile, it's generally much more efficient to do so in
pack order, rather than in hash order. That increases the
locality of access within the packfile, which in turn is
friendlier to the delta base cache, since the packfile puts
related deltas next to each other. By contrast, hash order
is effectively random, since the sha1 has no discernible
relationship to the content.
This patch introduces an "--unordered" option to cat-file
which iterates over packs in pack-order under the hood. You
can see the results when dumping all of the file content:
$ time ./git cat-file --batch-all-objects --buffer --batch | wc -c
6883195596
real 0m44.491s
user 0m42.902s
sys 0m5.230s
$ time ./git cat-file --unordered \
--batch-all-objects --buffer --batch | wc -c
6883195596
real 0m6.075s
user 0m4.774s
sys 0m3.548s
Same output, different order, way faster. The same speed-up
applies even if you end up accessing the object content in a
different process, like:
git cat-file --batch-all-objects --buffer --batch-check |
grep blob |
git cat-file --batch='%(objectname) %(rest)' |
wc -c
Adding "--unordered" to the first command drops the runtime
in git.git from 24s to 3.5s.
Side note: there are actually further speedups available
for doing it all in-process now. Since we are outputting
the object content during the actual pack iteration, we
know where to find the object and could skip the extra
lookup done by oid_object_info(). This patch stops short
of that optimization since the underlying API isn't ready
for us to make those sorts of direct requests.
So if --unordered is so much better, why not make it the
default? Two reasons:
1. We've promised in the documentation that --batch-all-objects
outputs in hash order. Since cat-file is plumbing,
people may be relying on that default, and we can't
change it.
2. It's actually _slower_ for some cases. We have to
compute the pack revindex to walk in pack order. And
our de-duplication step uses an oidset, rather than a
sort-and-dedup, which can end up being more expensive.
If we're just accessing the type and size of each
object, for example, like:
git cat-file --batch-all-objects --buffer --batch-check
my best-of-five warm cache timings go from 900ms to
1100ms using --unordered. Though it's possible in a
cold-cache or under memory pressure that we could do
better, since we'd have better locality within the
packfile.
And one final question: why is it "--unordered" and not
"--pack-order"? The answer is again two-fold:
1. "pack order" isn't a well-defined thing across the
whole set of objects. We're hitting loose objects, as
well as objects in multiple packs, and the only
ordering we're promising is _within_ a single pack. The
rest is apparently random.
2. The point here is optimization. So we don't want to
promise any particular ordering, but only to say that
we will choose an ordering which is likely to be
efficient for accessing the object content. That leaves
the door open for further changes in the future without
having to add another compatibility option.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-08-11 07:24:57 +08:00
|
|
|
# The only user-visible difference is that the objects are no longer sorted,
|
|
|
|
# and the resulting sort order is undefined. So we can only check that it
|
|
|
|
# produces the same objects as the ordered case, but that at least exercises
|
|
|
|
# the code.
|
|
|
|
test_expect_success 'cat-file --unordered works' '
|
|
|
|
git -C all-two cat-file --batch-all-objects --unordered \
|
|
|
|
--batch-check="%(objectname)" >actual.unsorted &&
|
|
|
|
sort <actual.unsorted >actual &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
2021-06-04 00:29:25 +08:00
|
|
|
test_expect_success 'set up object list for --batch-all-objects tests' '
|
|
|
|
git -C all-two cat-file --batch-all-objects --batch-check="%(objectname)" >objects
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'cat-file --batch="%(objectname)" with --batch-all-objects will work' '
|
|
|
|
git -C all-two cat-file --batch="%(objectname)" <objects >expect &&
|
|
|
|
git -C all-two cat-file --batch-all-objects --batch="%(objectname)" >actual &&
|
|
|
|
cmp expect actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'cat-file --batch="%(rest)" with --batch-all-objects will work' '
|
|
|
|
git -C all-two cat-file --batch="%(rest)" <objects >expect &&
|
|
|
|
git -C all-two cat-file --batch-all-objects --batch="%(rest)" >actual &&
|
|
|
|
cmp expect actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'cat-file --batch="batman" with --batch-all-objects will work' '
|
|
|
|
git -C all-two cat-file --batch="batman" <objects >expect &&
|
|
|
|
git -C all-two cat-file --batch-all-objects --batch="batman" >actual &&
|
|
|
|
cmp expect actual
|
|
|
|
'
|
|
|
|
|
2023-12-21 17:47:22 +08:00
|
|
|
test_expect_success 'cat-file %(objectsize:disk) with --batch-all-objects' '
|
|
|
|
# our state has both loose and packed objects,
|
|
|
|
# so find both for our expected output
|
|
|
|
{
|
|
|
|
find .git/objects/?? -type f |
|
|
|
|
awk -F/ "{ print \$0, \$3\$4 }" |
|
|
|
|
while read path oid
|
|
|
|
do
|
|
|
|
size=$(test_file_size "$path") &&
|
|
|
|
echo "$oid $size" ||
|
|
|
|
return 1
|
|
|
|
done &&
|
|
|
|
rawsz=$(test_oid rawsz) &&
|
|
|
|
find .git/objects/pack -name "*.idx" |
|
|
|
|
while read idx
|
|
|
|
do
|
|
|
|
git show-index <"$idx" >idx.raw &&
|
2024-01-03 17:01:52 +08:00
|
|
|
sort -nr <idx.raw >idx.sorted &&
|
2023-12-21 17:47:22 +08:00
|
|
|
packsz=$(test_file_size "${idx%.idx}.pack") &&
|
|
|
|
end=$((packsz - rawsz)) &&
|
2024-01-03 17:01:52 +08:00
|
|
|
while read start oid rest
|
|
|
|
do
|
|
|
|
size=$((end - start)) &&
|
|
|
|
end=$start &&
|
|
|
|
echo "$oid $size" ||
|
|
|
|
return 1
|
|
|
|
done <idx.sorted ||
|
2023-12-21 17:47:22 +08:00
|
|
|
return 1
|
|
|
|
done
|
|
|
|
} >expect.raw &&
|
|
|
|
sort <expect.raw >expect &&
|
|
|
|
git cat-file --batch-all-objects \
|
|
|
|
--batch-check="%(objectname) %(objectsize:disk)" >actual &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
cat-file: disable refs/replace with --batch-all-objects
When we're enumerating all objects in the object database, it doesn't
make sense to respect refs/replace. The point of this option is to
enumerate all of the objects in the database at a low level. By
definition we'd already show the replacement object's contents (under
its real oid), and showing those contents under another oid is almost
certainly working against what the user is trying to do.
Note that you could make the same argument for something like:
git show-index <foo.idx |
awk '{print $2}' |
git cat-file --batch
but there we can't know in cat-file exactly what the user intended,
because we don't know the source of the input. They could be trying to
do low-level debugging, or they could be doing something more high-level
(e.g., imagine a porcelain built around cat-file for its object
accesses). So in those cases, we'll have to rely on the user specifying
"git --no-replace-objects" to tell us what to do.
One _could_ make an argument that "cat-file --batch" is sufficiently
low-level plumbing that it should not respect replace-objects at all
(and the caller should do any replacement if they want it). But we have
been doing so for some time. The history is a little tangled:
- looking back as far as v1.6.6, we would not respect replace refs for
--batch-check, but would for --batch (because the former used
sha1_object_info(), and the replace mechanism only affected actual
object reads)
- this discrepancy was made even weirder by 98e2092b50 (cat-file:
teach --batch to stream blob objects, 2013-07-10), where we always
output the header using the --batch-check code, and then printed the
object separately. This could lead to "cat-file --batch" dying (when
it notices the size or type changed for a non-blob object) or even
producing bogus output (in streaming mode, we didn't notice that we
wrote the wrong number of bytes).
- that persisted until 1f7117ef7a (sha1_file: perform object
replacement in sha1_object_info_extended(), 2013-12-11), which then
respected replace refs for both forms.
So it has worked reliably this way for over 7 years, and we should make
sure it continues to do so. That could also be an argument that
--batch-all-objects should not change behavior (which this patch is
doing), but I really consider the current behavior to be an unintended
bug. It's a side effect of how the code is implemented (feeding the oids
back into oid_object_info() rather than looking at what we found while
reading the loose and packed object storage).
The implementation is straight-forward: we just disable the global
read_replace_refs flag when we're in --batch-all-objects mode. It would
perhaps be a little cleaner to change the flag we pass to
oid_object_info_extended(), but that's not enough. We also read objects
via read_object_file() and stream_blob_to_fd(). The former could switch
to its _extended() form, but the streaming code has no mechanism for
disabling replace refs. Setting the global flag works, and as a bonus,
it's impossible to have any "oops, we're sometimes replacing the object
and sometimes not" bugs in the output (like the ones caused by
98e2092b50 above).
The tests here cover the regular-input and --batch-all-objects cases,
for both --batch-check and --batch. There is a test in t6050 that covers
the regular-input case with --batch already, but this new one goes much
further in actually verifying the output (plus covering --batch-check
explicitly). This is perhaps a little overkill and the tests would be
simpler just covering --batch-check, but I wanted to make sure we're
checking that --batch output is consistent between the header and the
content. The global-flag technique used here makes that easy to get
right, but this is future-proofing us against regressions.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-10-06 04:36:07 +08:00
|
|
|
test_expect_success 'set up replacement object' '
|
|
|
|
orig=$(git rev-parse HEAD) &&
|
|
|
|
git cat-file commit $orig >orig &&
|
|
|
|
{
|
|
|
|
cat orig &&
|
|
|
|
echo extra
|
|
|
|
} >fake &&
|
|
|
|
fake=$(git hash-object -t commit -w fake) &&
|
|
|
|
orig_size=$(git cat-file -s $orig) &&
|
|
|
|
fake_size=$(git cat-file -s $fake) &&
|
|
|
|
git replace $orig $fake
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'cat-file --batch respects replace objects' '
|
|
|
|
git cat-file --batch >actual <<-EOF &&
|
|
|
|
$orig
|
|
|
|
EOF
|
|
|
|
{
|
|
|
|
echo "$orig commit $fake_size" &&
|
|
|
|
cat fake &&
|
|
|
|
echo
|
|
|
|
} >expect &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'cat-file --batch-check respects replace objects' '
|
|
|
|
git cat-file --batch-check >actual <<-EOF &&
|
|
|
|
$orig
|
|
|
|
EOF
|
|
|
|
echo "$orig commit $fake_size" >expect &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
|
|
|
# Pull the entry for object with oid "$1" out of the output of
|
|
|
|
# "cat-file --batch", including its object content (which requires
|
|
|
|
# parsing and reading a set amount of bytes, hence perl).
|
|
|
|
extract_batch_output () {
|
|
|
|
perl -ne '
|
|
|
|
BEGIN { $oid = shift }
|
|
|
|
if (/^$oid \S+ (\d+)$/) {
|
|
|
|
print;
|
|
|
|
read STDIN, my $buf, $1;
|
|
|
|
print $buf;
|
|
|
|
print "\n";
|
|
|
|
}
|
|
|
|
' "$@"
|
|
|
|
}
|
|
|
|
|
|
|
|
test_expect_success 'cat-file --batch-all-objects --batch ignores replace' '
|
|
|
|
git cat-file --batch-all-objects --batch >actual.raw &&
|
|
|
|
extract_batch_output $orig <actual.raw >actual &&
|
|
|
|
{
|
|
|
|
echo "$orig commit $orig_size" &&
|
|
|
|
cat orig &&
|
|
|
|
echo
|
|
|
|
} >expect &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'cat-file --batch-all-objects --batch-check ignores replace' '
|
|
|
|
git cat-file --batch-all-objects --batch-check >actual.raw &&
|
|
|
|
grep ^$orig actual.raw >actual &&
|
|
|
|
echo "$orig commit $orig_size" >expect &&
|
|
|
|
test_cmp expect actual
|
|
|
|
'
|
cat-file: add --batch-command mode
Add a new flag --batch-command that accepts commands and arguments
from stdin, similar to git-update-ref --stdin.
At GitLab, we use a pair of long running cat-file processes when
accessing object content. One for iterating over object metadata with
--batch-check, and the other to grab object contents with --batch.
However, if we had --batch-command, we wouldn't need to keep both
processes around, and instead just have one --batch-command process
where we can flip between getting object info, and getting object
contents. Since we have a pair of cat-file processes per repository,
this means we can get rid of roughly half of long lived git cat-file
processes. Given there are many repositories being accessed at any given
time, this can lead to huge savings.
git cat-file --batch-command
will enter an interactive command mode whereby the user can enter in
commands and their arguments that get queued in memory:
<command1> [arg1] [arg2] LF
<command2> [arg1] [arg2] LF
When --buffer mode is used, commands will be queued in memory until a
flush command is issued that execute them:
flush LF
The reason for a flush command is that when a consumer process (A)
talks to a git cat-file process (B) and interactively writes to and
reads from it in --buffer mode, (A) needs to be able to control when
the buffer is flushed to stdout.
Currently, from (A)'s perspective, the only way is to either
1. kill (B)'s process
2. send an invalid object to stdin.
1. is not ideal from a performance perspective as it will require
spawning a new cat-file process each time, and 2. is hacky and not a
good long term solution.
With this mechanism of queueing up commands and letting (A) issue a
flush command, process (A) can control when the buffer is flushed and
can guarantee it will receive all of the output when in --buffer mode.
--batch-command also will not allow (B) to flush to stdout until a flush
is received.
This patch adds the basic structure for adding command which can be
extended in the future to add more commands. It also adds the following
two commands (on top of the flush command):
contents <object> LF
info <object> LF
The contents command takes an <object> argument and prints out the object
contents.
The info command takes an <object> argument and prints out the object
metadata.
These can be used in the following way with --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
flush LF
info <object> LF
flush LF
When used without --buffer:
info <object> LF
contents <object> LF
contents <object> LF
info <object> LF
info <object> LF
Helped-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: John Cai <johncai86@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2022-02-19 02:23:14 +08:00
|
|
|
test_expect_success 'batch-command empty command' '
|
|
|
|
echo "" >cmd &&
|
|
|
|
test_expect_code 128 git cat-file --batch-command <cmd 2>err &&
|
|
|
|
grep "^fatal:.*empty command in input.*" err
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'batch-command whitespace before command' '
|
|
|
|
echo " info deadbeef" >cmd &&
|
|
|
|
test_expect_code 128 git cat-file --batch-command <cmd 2>err &&
|
|
|
|
grep "^fatal:.*whitespace before command.*" err
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'batch-command unknown command' '
|
|
|
|
echo unknown_command >cmd &&
|
|
|
|
test_expect_code 128 git cat-file --batch-command <cmd 2>err &&
|
|
|
|
grep "^fatal:.*unknown command.*" err
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'batch-command missing arguments' '
|
|
|
|
echo "info" >cmd &&
|
|
|
|
test_expect_code 128 git cat-file --batch-command <cmd 2>err &&
|
|
|
|
grep "^fatal:.*info requires arguments.*" err
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'batch-command flush with arguments' '
|
|
|
|
echo "flush arg" >cmd &&
|
|
|
|
test_expect_code 128 git cat-file --batch-command --buffer <cmd 2>err &&
|
|
|
|
grep "^fatal:.*flush takes no arguments.*" err
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success 'batch-command flush without --buffer' '
|
|
|
|
echo "flush" >cmd &&
|
|
|
|
test_expect_code 128 git cat-file --batch-command <cmd 2>err &&
|
|
|
|
grep "^fatal:.*flush is only for --buffer mode.*" err
|
|
|
|
'
|
cat-file: disable refs/replace with --batch-all-objects
When we're enumerating all objects in the object database, it doesn't
make sense to respect refs/replace. The point of this option is to
enumerate all of the objects in the database at a low level. By
definition we'd already show the replacement object's contents (under
its real oid), and showing those contents under another oid is almost
certainly working against what the user is trying to do.
Note that you could make the same argument for something like:
git show-index <foo.idx |
awk '{print $2}' |
git cat-file --batch
but there we can't know in cat-file exactly what the user intended,
because we don't know the source of the input. They could be trying to
do low-level debugging, or they could be doing something more high-level
(e.g., imagine a porcelain built around cat-file for its object
accesses). So in those cases, we'll have to rely on the user specifying
"git --no-replace-objects" to tell us what to do.
One _could_ make an argument that "cat-file --batch" is sufficiently
low-level plumbing that it should not respect replace-objects at all
(and the caller should do any replacement if they want it). But we have
been doing so for some time. The history is a little tangled:
- looking back as far as v1.6.6, we would not respect replace refs for
--batch-check, but would for --batch (because the former used
sha1_object_info(), and the replace mechanism only affected actual
object reads)
- this discrepancy was made even weirder by 98e2092b50 (cat-file:
teach --batch to stream blob objects, 2013-07-10), where we always
output the header using the --batch-check code, and then printed the
object separately. This could lead to "cat-file --batch" dying (when
it notices the size or type changed for a non-blob object) or even
producing bogus output (in streaming mode, we didn't notice that we
wrote the wrong number of bytes).
- that persisted until 1f7117ef7a (sha1_file: perform object
replacement in sha1_object_info_extended(), 2013-12-11), which then
respected replace refs for both forms.
So it has worked reliably this way for over 7 years, and we should make
sure it continues to do so. That could also be an argument that
--batch-all-objects should not change behavior (which this patch is
doing), but I really consider the current behavior to be an unintended
bug. It's a side effect of how the code is implemented (feeding the oids
back into oid_object_info() rather than looking at what we found while
reading the loose and packed object storage).
The implementation is straight-forward: we just disable the global
read_replace_refs flag when we're in --batch-all-objects mode. It would
perhaps be a little cleaner to change the flag we pass to
oid_object_info_extended(), but that's not enough. We also read objects
via read_object_file() and stream_blob_to_fd(). The former could switch
to its _extended() form, but the streaming code has no mechanism for
disabling replace refs. Setting the global flag works, and as a bonus,
it's impossible to have any "oops, we're sometimes replacing the object
and sometimes not" bugs in the output (like the ones caused by
98e2092b50 above).
The tests here cover the regular-input and --batch-all-objects cases,
for both --batch-check and --batch. There is a test in t6050 that covers
the regular-input case with --batch already, but this new one goes much
further in actually verifying the output (plus covering --batch-check
explicitly). This is perhaps a little overkill and the tests would be
simpler just covering --batch-check, but I wanted to make sure we're
checking that --batch output is consistent between the header and the
content. The global-flag technique used here makes that easy to get
right, but this is future-proofing us against regressions.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2021-10-06 04:36:07 +08:00
|
|
|
|
2024-06-19 05:30:41 +08:00
|
|
|
script='
|
|
|
|
use warnings;
|
|
|
|
use strict;
|
|
|
|
use IPC::Open2;
|
|
|
|
my ($opt, $oid, $expect, @pfx) = @ARGV;
|
|
|
|
my @cmd = (qw(git cat-file), $opt);
|
|
|
|
my $pid = open2(my $out, my $in, @cmd) or die "open2: @cmd";
|
|
|
|
print $in @pfx, $oid, "\n" or die "print $!";
|
|
|
|
my $rvec = "";
|
|
|
|
vec($rvec, fileno($out), 1) = 1;
|
|
|
|
select($rvec, undef, undef, 30) or die "no response to `@pfx $oid` from @cmd";
|
|
|
|
my $info = <$out>;
|
|
|
|
chop($info) eq "\n" or die "no LF";
|
|
|
|
$info eq $expect or die "`$info` != `$expect`";
|
|
|
|
close $in or die "close in $!";
|
|
|
|
close $out or die "close out $!";
|
|
|
|
waitpid $pid, 0;
|
|
|
|
$? == 0 or die "\$?=$?";
|
|
|
|
'
|
|
|
|
|
|
|
|
expect="$hello_oid blob $hello_size"
|
|
|
|
|
|
|
|
test_expect_success PERL '--batch-check is unbuffered by default' '
|
|
|
|
perl -e "$script" -- --batch-check $hello_oid "$expect"
|
|
|
|
'
|
|
|
|
|
|
|
|
test_expect_success PERL '--batch-command info is unbuffered by default' '
|
|
|
|
perl -e "$script" -- --batch-command $hello_oid "$expect" "info "
|
|
|
|
'
|
|
|
|
|
2008-04-24 03:17:43 +08:00
|
|
|
test_done
|