mirror of
https://github.com/git/git.git
synced 2025-01-22 15:33:59 +08:00
Merge branch 'jk/oi-delta-base'
Teach "cat-file --batch" to show delta-base object name for a packed object that is represented as a delta. * jk/oi-delta-base: cat-file: provide %(deltabase) batch format sha1_object_info_extended: provide delta base sha1s
This commit is contained in:
commit
b2132068c6
@ -109,6 +109,11 @@ newline. The available atoms are:
|
||||
The size, in bytes, that the object takes up on disk. See the
|
||||
note about on-disk sizes in the `CAVEATS` section below.
|
||||
|
||||
`deltabase`::
|
||||
If the object is stored as a delta on-disk, this expands to the
|
||||
40-hex sha1 of the delta base object. Otherwise, expands to the
|
||||
null sha1 (40 zeroes). See `CAVEATS` below.
|
||||
|
||||
`rest`::
|
||||
If this atom is used in the output string, input lines are split
|
||||
at the first whitespace boundary. All characters before that
|
||||
@ -152,10 +157,11 @@ should be taken in drawing conclusions about which refs or objects are
|
||||
responsible for disk usage. The size of a packed non-delta object may be
|
||||
much larger than the size of objects which delta against it, but the
|
||||
choice of which object is the base and which is the delta is arbitrary
|
||||
and is subject to change during a repack. Note also that multiple copies
|
||||
of an object may be present in the object database; in this case, it is
|
||||
undefined which copy's size will be reported.
|
||||
and is subject to change during a repack.
|
||||
|
||||
Note also that multiple copies of an object may be present in the object
|
||||
database; in this case, it is undefined which copy's size or delta base
|
||||
will be reported.
|
||||
|
||||
GIT
|
||||
---
|
||||
|
@ -118,6 +118,7 @@ struct expand_data {
|
||||
unsigned long size;
|
||||
unsigned long disk_size;
|
||||
const char *rest;
|
||||
unsigned char delta_base_sha1[20];
|
||||
|
||||
/*
|
||||
* If mark_query is true, we do not expand anything, but rather
|
||||
@ -174,6 +175,11 @@ static void expand_atom(struct strbuf *sb, const char *atom, int len,
|
||||
data->split_on_whitespace = 1;
|
||||
else if (data->rest)
|
||||
strbuf_addstr(sb, data->rest);
|
||||
} else if (is_atom("deltabase", atom, len)) {
|
||||
if (data->mark_query)
|
||||
data->info.delta_base_sha1 = data->delta_base_sha1;
|
||||
else
|
||||
strbuf_addstr(sb, sha1_to_hex(data->delta_base_sha1));
|
||||
} else
|
||||
die("unknown format element: %.*s", len, atom);
|
||||
}
|
||||
|
1
cache.h
1
cache.h
@ -1080,6 +1080,7 @@ struct object_info {
|
||||
enum object_type *typep;
|
||||
unsigned long *sizep;
|
||||
unsigned long *disk_sizep;
|
||||
unsigned char *delta_base_sha1;
|
||||
|
||||
/* Response */
|
||||
enum {
|
||||
|
53
sha1_file.c
53
sha1_file.c
@ -1690,6 +1690,38 @@ static off_t get_delta_base(struct packed_git *p,
|
||||
return base_offset;
|
||||
}
|
||||
|
||||
/*
|
||||
* Like get_delta_base above, but we return the sha1 instead of the pack
|
||||
* offset. This means it is cheaper for REF deltas (we do not have to do
|
||||
* the final object lookup), but more expensive for OFS deltas (we
|
||||
* have to load the revidx to convert the offset back into a sha1).
|
||||
*/
|
||||
static const unsigned char *get_delta_base_sha1(struct packed_git *p,
|
||||
struct pack_window **w_curs,
|
||||
off_t curpos,
|
||||
enum object_type type,
|
||||
off_t delta_obj_offset)
|
||||
{
|
||||
if (type == OBJ_REF_DELTA) {
|
||||
unsigned char *base = use_pack(p, w_curs, curpos, NULL);
|
||||
return base;
|
||||
} else if (type == OBJ_OFS_DELTA) {
|
||||
struct revindex_entry *revidx;
|
||||
off_t base_offset = get_delta_base(p, w_curs, &curpos,
|
||||
type, delta_obj_offset);
|
||||
|
||||
if (!base_offset)
|
||||
return NULL;
|
||||
|
||||
revidx = find_pack_revindex(p, base_offset);
|
||||
if (!revidx)
|
||||
return NULL;
|
||||
|
||||
return nth_packed_object_sha1(p, revidx->nr);
|
||||
} else
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int unpack_object_header(struct packed_git *p,
|
||||
struct pack_window **w_curs,
|
||||
off_t *curpos,
|
||||
@ -1847,6 +1879,22 @@ static int packed_object_info(struct packed_git *p, off_t obj_offset,
|
||||
}
|
||||
}
|
||||
|
||||
if (oi->delta_base_sha1) {
|
||||
if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
|
||||
const unsigned char *base;
|
||||
|
||||
base = get_delta_base_sha1(p, &w_curs, curpos,
|
||||
type, obj_offset);
|
||||
if (!base) {
|
||||
type = OBJ_BAD;
|
||||
goto out;
|
||||
}
|
||||
|
||||
hashcpy(oi->delta_base_sha1, base);
|
||||
} else
|
||||
hashclr(oi->delta_base_sha1);
|
||||
}
|
||||
|
||||
out:
|
||||
unuse_pack(&w_curs);
|
||||
return type;
|
||||
@ -2430,6 +2478,9 @@ static int sha1_loose_object_info(const unsigned char *sha1,
|
||||
git_zstream stream;
|
||||
char hdr[32];
|
||||
|
||||
if (oi->delta_base_sha1)
|
||||
hashclr(oi->delta_base_sha1);
|
||||
|
||||
/*
|
||||
* If we don't care about type or size, then we don't
|
||||
* need to look inside the object at all. Note that we
|
||||
@ -2481,6 +2532,8 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
|
||||
*(oi->sizep) = co->size;
|
||||
if (oi->disk_sizep)
|
||||
*(oi->disk_sizep) = 0;
|
||||
if (oi->delta_base_sha1)
|
||||
hashclr(oi->delta_base_sha1);
|
||||
oi->whence = OI_CACHED;
|
||||
return 0;
|
||||
}
|
||||
|
@ -262,4 +262,38 @@ test_expect_success "--batch-check with multiple sha1s gives correct format" '
|
||||
"$(echo_without_newline "$batch_check_input" | git cat-file --batch-check)"
|
||||
'
|
||||
|
||||
test_expect_success 'setup blobs which are likely to delta' '
|
||||
test-genrandom foo 10240 >foo &&
|
||||
{ cat foo; echo plus; } >foo-plus &&
|
||||
git add foo foo-plus &&
|
||||
git commit -m foo &&
|
||||
cat >blobs <<-\EOF
|
||||
HEAD:foo
|
||||
HEAD:foo-plus
|
||||
EOF
|
||||
'
|
||||
|
||||
test_expect_success 'confirm that neither loose blob is a delta' '
|
||||
cat >expect <<-EOF
|
||||
$_z40
|
||||
$_z40
|
||||
EOF
|
||||
git cat-file --batch-check="%(deltabase)" <blobs >actual &&
|
||||
test_cmp expect actual
|
||||
'
|
||||
|
||||
# To avoid relying too much on the current delta heuristics,
|
||||
# we will check only that one of the two objects is a delta
|
||||
# against the other, but not the order. We can do so by just
|
||||
# asking for the base of both, and checking whether either
|
||||
# sha1 appears in the output.
|
||||
test_expect_success '%(deltabase) reports packed delta bases' '
|
||||
git repack -ad &&
|
||||
git cat-file --batch-check="%(deltabase)" <blobs >actual &&
|
||||
{
|
||||
grep "$(git rev-parse HEAD:foo)" actual ||
|
||||
grep "$(git rev-parse HEAD:foo-plus)" actual
|
||||
}
|
||||
'
|
||||
|
||||
test_done
|
||||
|
Loading…
Reference in New Issue
Block a user