Merge branch 'jk/oi-delta-base'

Teach "cat-file --batch" to show delta-base object name for a
packed object that is represented as a delta.

* jk/oi-delta-base:
  cat-file: provide %(deltabase) batch format
  sha1_object_info_extended: provide delta base sha1s
This commit is contained in:
Junio C Hamano 2014-01-10 10:33:11 -08:00
commit b2132068c6
5 changed files with 103 additions and 3 deletions

View File

@ -109,6 +109,11 @@ newline. The available atoms are:
The size, in bytes, that the object takes up on disk. See the
note about on-disk sizes in the `CAVEATS` section below.
`deltabase`::
If the object is stored as a delta on-disk, this expands to the
40-hex sha1 of the delta base object. Otherwise, expands to the
null sha1 (40 zeroes). See `CAVEATS` below.
`rest`::
If this atom is used in the output string, input lines are split
at the first whitespace boundary. All characters before that
@ -152,10 +157,11 @@ should be taken in drawing conclusions about which refs or objects are
responsible for disk usage. The size of a packed non-delta object may be
much larger than the size of objects which delta against it, but the
choice of which object is the base and which is the delta is arbitrary
and is subject to change during a repack. Note also that multiple copies
of an object may be present in the object database; in this case, it is
undefined which copy's size will be reported.
and is subject to change during a repack.
Note also that multiple copies of an object may be present in the object
database; in this case, it is undefined which copy's size or delta base
will be reported.
GIT
---

View File

@ -118,6 +118,7 @@ struct expand_data {
unsigned long size;
unsigned long disk_size;
const char *rest;
unsigned char delta_base_sha1[20];
/*
* If mark_query is true, we do not expand anything, but rather
@ -174,6 +175,11 @@ static void expand_atom(struct strbuf *sb, const char *atom, int len,
data->split_on_whitespace = 1;
else if (data->rest)
strbuf_addstr(sb, data->rest);
} else if (is_atom("deltabase", atom, len)) {
if (data->mark_query)
data->info.delta_base_sha1 = data->delta_base_sha1;
else
strbuf_addstr(sb, sha1_to_hex(data->delta_base_sha1));
} else
die("unknown format element: %.*s", len, atom);
}

View File

@ -1080,6 +1080,7 @@ struct object_info {
enum object_type *typep;
unsigned long *sizep;
unsigned long *disk_sizep;
unsigned char *delta_base_sha1;
/* Response */
enum {

View File

@ -1690,6 +1690,38 @@ static off_t get_delta_base(struct packed_git *p,
return base_offset;
}
/*
* Like get_delta_base above, but we return the sha1 instead of the pack
* offset. This means it is cheaper for REF deltas (we do not have to do
* the final object lookup), but more expensive for OFS deltas (we
* have to load the revidx to convert the offset back into a sha1).
*/
static const unsigned char *get_delta_base_sha1(struct packed_git *p,
struct pack_window **w_curs,
off_t curpos,
enum object_type type,
off_t delta_obj_offset)
{
if (type == OBJ_REF_DELTA) {
unsigned char *base = use_pack(p, w_curs, curpos, NULL);
return base;
} else if (type == OBJ_OFS_DELTA) {
struct revindex_entry *revidx;
off_t base_offset = get_delta_base(p, w_curs, &curpos,
type, delta_obj_offset);
if (!base_offset)
return NULL;
revidx = find_pack_revindex(p, base_offset);
if (!revidx)
return NULL;
return nth_packed_object_sha1(p, revidx->nr);
} else
return NULL;
}
int unpack_object_header(struct packed_git *p,
struct pack_window **w_curs,
off_t *curpos,
@ -1847,6 +1879,22 @@ static int packed_object_info(struct packed_git *p, off_t obj_offset,
}
}
if (oi->delta_base_sha1) {
if (type == OBJ_OFS_DELTA || type == OBJ_REF_DELTA) {
const unsigned char *base;
base = get_delta_base_sha1(p, &w_curs, curpos,
type, obj_offset);
if (!base) {
type = OBJ_BAD;
goto out;
}
hashcpy(oi->delta_base_sha1, base);
} else
hashclr(oi->delta_base_sha1);
}
out:
unuse_pack(&w_curs);
return type;
@ -2430,6 +2478,9 @@ static int sha1_loose_object_info(const unsigned char *sha1,
git_zstream stream;
char hdr[32];
if (oi->delta_base_sha1)
hashclr(oi->delta_base_sha1);
/*
* If we don't care about type or size, then we don't
* need to look inside the object at all. Note that we
@ -2481,6 +2532,8 @@ int sha1_object_info_extended(const unsigned char *sha1, struct object_info *oi,
*(oi->sizep) = co->size;
if (oi->disk_sizep)
*(oi->disk_sizep) = 0;
if (oi->delta_base_sha1)
hashclr(oi->delta_base_sha1);
oi->whence = OI_CACHED;
return 0;
}

View File

@ -262,4 +262,38 @@ test_expect_success "--batch-check with multiple sha1s gives correct format" '
"$(echo_without_newline "$batch_check_input" | git cat-file --batch-check)"
'
test_expect_success 'setup blobs which are likely to delta' '
test-genrandom foo 10240 >foo &&
{ cat foo; echo plus; } >foo-plus &&
git add foo foo-plus &&
git commit -m foo &&
cat >blobs <<-\EOF
HEAD:foo
HEAD:foo-plus
EOF
'
test_expect_success 'confirm that neither loose blob is a delta' '
cat >expect <<-EOF
$_z40
$_z40
EOF
git cat-file --batch-check="%(deltabase)" <blobs >actual &&
test_cmp expect actual
'
# To avoid relying too much on the current delta heuristics,
# we will check only that one of the two objects is a delta
# against the other, but not the order. We can do so by just
# asking for the base of both, and checking whether either
# sha1 appears in the output.
test_expect_success '%(deltabase) reports packed delta bases' '
git repack -ad &&
git cat-file --batch-check="%(deltabase)" <blobs >actual &&
{
grep "$(git rev-parse HEAD:foo)" actual ||
grep "$(git rev-parse HEAD:foo-plus)" actual
}
'
test_done