diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c index 59ae64d83f..67eefa2932 100644 --- a/builtin-pack-objects.c +++ b/builtin-pack-objects.c @@ -286,6 +286,7 @@ static unsigned long write_object(struct sha1file *f, */ if (!to_reuse) { + no_reuse: if (!usable_delta) { buf = read_sha1_file(entry->idx.sha1, &type, &size); if (!buf) @@ -367,46 +368,60 @@ static unsigned long write_object(struct sha1file *f, struct revindex_entry *revidx; off_t offset; - if (entry->delta) { + if (entry->delta) type = (allow_ofs_delta && entry->delta->idx.offset) ? OBJ_OFS_DELTA : OBJ_REF_DELTA; - reused_delta++; - } hdrlen = encode_header(type, entry->size, header); + offset = entry->in_pack_offset; revidx = find_pack_revindex(p, offset); datalen = revidx[1].offset - offset; if (!pack_to_stdout && p->index_version > 1 && - check_pack_crc(p, &w_curs, offset, datalen, revidx->nr)) - die("bad packed object CRC for %s", sha1_to_hex(entry->idx.sha1)); + check_pack_crc(p, &w_curs, offset, datalen, revidx->nr)) { + error("bad packed object CRC for %s", sha1_to_hex(entry->idx.sha1)); + unuse_pack(&w_curs); + goto no_reuse; + } + offset += entry->in_pack_header_size; datalen -= entry->in_pack_header_size; + if (!pack_to_stdout && p->index_version == 1 && + check_pack_inflate(p, &w_curs, offset, datalen, entry->size)) { + error("corrupt packed object for %s", sha1_to_hex(entry->idx.sha1)); + unuse_pack(&w_curs); + goto no_reuse; + } + if (type == OBJ_OFS_DELTA) { off_t ofs = entry->idx.offset - entry->delta->idx.offset; unsigned pos = sizeof(dheader) - 1; dheader[pos] = ofs & 127; while (ofs >>= 7) dheader[--pos] = 128 | (--ofs & 127); - if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) + if (limit && hdrlen + sizeof(dheader) - pos + datalen + 20 >= limit) { + unuse_pack(&w_curs); return 0; + } sha1write(f, header, hdrlen); sha1write(f, dheader + pos, sizeof(dheader) - pos); hdrlen += sizeof(dheader) - pos; + reused_delta++; } else if (type == OBJ_REF_DELTA) { - if (limit && hdrlen + 20 + datalen + 20 >= limit) + if (limit && hdrlen + 20 + datalen + 20 >= limit) { + unuse_pack(&w_curs); return 0; + } sha1write(f, header, hdrlen); sha1write(f, entry->delta->idx.sha1, 20); hdrlen += 20; + reused_delta++; } else { - if (limit && hdrlen + datalen + 20 >= limit) + if (limit && hdrlen + datalen + 20 >= limit) { + unuse_pack(&w_curs); return 0; + } sha1write(f, header, hdrlen); } - - if (!pack_to_stdout && p->index_version == 1 && - check_pack_inflate(p, &w_curs, offset, datalen, entry->size)) - die("corrupt packed object for %s", sha1_to_hex(entry->idx.sha1)); copy_pack_data(f, p, &w_curs, offset, datalen); unuse_pack(&w_curs); reused++; @@ -1016,9 +1031,11 @@ static void check_object(struct object_entry *entry) * We want in_pack_type even if we do not reuse delta * since non-delta representations could still be reused. */ - used = unpack_object_header_gently(buf, avail, + used = unpack_object_header_buffer(buf, avail, &entry->in_pack_type, &entry->size); + if (used == 0) + goto give_up; /* * Determine if this is a delta and if so whether we can @@ -1030,6 +1047,8 @@ static void check_object(struct object_entry *entry) /* Not a delta hence we've already got all we need. */ entry->type = entry->in_pack_type; entry->in_pack_header_size = used; + if (entry->type < OBJ_COMMIT || entry->type > OBJ_BLOB) + goto give_up; unuse_pack(&w_curs); return; case OBJ_REF_DELTA: @@ -1046,19 +1065,25 @@ static void check_object(struct object_entry *entry) ofs = c & 127; while (c & 128) { ofs += 1; - if (!ofs || MSB(ofs, 7)) - die("delta base offset overflow in pack for %s", - sha1_to_hex(entry->idx.sha1)); + if (!ofs || MSB(ofs, 7)) { + error("delta base offset overflow in pack for %s", + sha1_to_hex(entry->idx.sha1)); + goto give_up; + } c = buf[used_0++]; ofs = (ofs << 7) + (c & 127); } - if (ofs >= entry->in_pack_offset) - die("delta base offset out of bound for %s", - sha1_to_hex(entry->idx.sha1)); ofs = entry->in_pack_offset - ofs; + if (ofs <= 0 || ofs >= entry->in_pack_offset) { + error("delta base offset out of bound for %s", + sha1_to_hex(entry->idx.sha1)); + goto give_up; + } if (reuse_delta && !entry->preferred_base) { struct revindex_entry *revidx; revidx = find_pack_revindex(p, ofs); + if (!revidx) + goto give_up; base_ref = nth_packed_object_sha1(p, revidx->nr); } entry->in_pack_header_size = used + used_0; @@ -1078,6 +1103,7 @@ static void check_object(struct object_entry *entry) */ entry->type = entry->in_pack_type; entry->delta = base_entry; + entry->delta_size = entry->size; entry->delta_sibling = base_entry->delta_child; base_entry->delta_child = entry; unuse_pack(&w_curs); @@ -1092,6 +1118,8 @@ static void check_object(struct object_entry *entry) */ entry->size = get_size_from_delta(p, &w_curs, entry->in_pack_offset + entry->in_pack_header_size); + if (entry->size == 0) + goto give_up; unuse_pack(&w_curs); return; } @@ -1101,6 +1129,7 @@ static void check_object(struct object_entry *entry) * with sha1_object_info() to find about the object type * at this point... */ + give_up: unuse_pack(&w_curs); } @@ -1712,6 +1741,16 @@ static void prepare_pack(int window, int depth) get_object_details(); + /* + * If we're locally repacking then we need to be doubly careful + * from now on in order to make sure no stealth corruption gets + * propagated to the new pack. Clients receiving streamed packs + * should validate everything they get anyway so no need to incur + * the additional cost here in that case. + */ + if (!pack_to_stdout) + do_check_packed_object_crc = 1; + if (!nr_objects || !window || !depth) return; diff --git a/builtin-unpack-objects.c b/builtin-unpack-objects.c index 9f4bdd3296..47ed610677 100644 --- a/builtin-unpack-objects.c +++ b/builtin-unpack-objects.c @@ -370,6 +370,8 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size, base_offset = (base_offset << 7) + (c & 127); } base_offset = obj_list[nr].offset - base_offset; + if (base_offset <= 0 || base_offset >= obj_list[nr].offset) + die("offset value out of bound for delta base object"); delta_data = get_data(delta_size); if (dry_run || !delta_data) { diff --git a/cache.h b/cache.h index c776f2f5ab..3b5f0c4c00 100644 --- a/cache.h +++ b/cache.h @@ -574,6 +574,9 @@ extern int force_object_loose(const unsigned char *sha1, time_t mtime); /* just like read_sha1_file(), but non fatal in presence of bad objects */ extern void *read_object(const unsigned char *sha1, enum object_type *type, unsigned long *size); +/* global flag to enable extra checks when accessing packed objects */ +extern int do_check_packed_object_crc; + extern int check_sha1_signature(const unsigned char *sha1, void *buf, unsigned long size, const char *type); extern int move_temp_to_file(const char *tmpfile, const char *filename); @@ -762,7 +765,7 @@ extern const unsigned char *nth_packed_object_sha1(struct packed_git *, uint32_t extern off_t nth_packed_object_offset(const struct packed_git *, uint32_t); extern off_t find_pack_entry_one(const unsigned char *, struct packed_git *); extern void *unpack_entry(struct packed_git *, off_t, enum object_type *, unsigned long *); -extern unsigned long unpack_object_header_gently(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep); +extern unsigned long unpack_object_header_buffer(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep); extern unsigned long get_size_from_delta(struct packed_git *, struct pack_window **, off_t); extern const char *packed_object_info_detail(struct packed_git *, off_t, unsigned long *, unsigned long *, unsigned int *, unsigned char *); extern int matches_pack_name(struct packed_git *p, const char *name); diff --git a/index-pack.c b/index-pack.c index fe75332a9c..60ed41a993 100644 --- a/index-pack.c +++ b/index-pack.c @@ -338,7 +338,7 @@ static void *unpack_raw_entry(struct object_entry *obj, union delta_base *delta_ base_offset = (base_offset << 7) + (c & 127); } delta_base->offset = obj->idx.offset - base_offset; - if (delta_base->offset >= obj->idx.offset) + if (delta_base->offset <= 0 || delta_base->offset >= obj->idx.offset) bad_object(obj->idx.offset, "delta base offset is out of bound"); break; case OBJ_COMMIT: diff --git a/pack-revindex.c b/pack-revindex.c index 6096b6224a..1de53c8934 100644 --- a/pack-revindex.c +++ b/pack-revindex.c @@ -140,7 +140,8 @@ struct revindex_entry *find_pack_revindex(struct packed_git *p, off_t ofs) else lo = mi + 1; } while (lo < hi); - die("internal error: pack revindex corrupt"); + error("bad offset for revindex"); + return NULL; } void discard_revindex(void) diff --git a/sha1_file.c b/sha1_file.c index 491220572b..0fa65baa59 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1122,7 +1122,8 @@ static int legacy_loose_object(unsigned char *map) return 0; } -unsigned long unpack_object_header_gently(const unsigned char *buf, unsigned long len, enum object_type *type, unsigned long *sizep) +unsigned long unpack_object_header_buffer(const unsigned char *buf, + unsigned long len, enum object_type *type, unsigned long *sizep) { unsigned shift; unsigned char c; @@ -1134,10 +1135,10 @@ unsigned long unpack_object_header_gently(const unsigned char *buf, unsigned lon size = c & 15; shift = 4; while (c & 0x80) { - if (len <= used) - return 0; - if (sizeof(long) * 8 <= shift) + if (len <= used || sizeof(long) * 8 <= shift) { + error("bad object header"); return 0; + } c = buf[used++]; size += (c & 0x7f) << shift; shift += 7; @@ -1176,7 +1177,7 @@ static int unpack_sha1_header(z_stream *stream, unsigned char *map, unsigned lon * really worth it and we don't write it any longer. But we * can still read it. */ - used = unpack_object_header_gently(map, mapsize, &type, &size); + used = unpack_object_header_buffer(map, mapsize, &type, &size); if (!used || !valid_loose_object_type[type]) return -1; map += used; @@ -1325,8 +1326,10 @@ unsigned long get_size_from_delta(struct packed_git *p, } while ((st == Z_OK || st == Z_BUF_ERROR) && stream.total_out < sizeof(delta_head)); inflateEnd(&stream); - if ((st != Z_STREAM_END) && stream.total_out != sizeof(delta_head)) - die("delta data unpack-initial failed"); + if ((st != Z_STREAM_END) && stream.total_out != sizeof(delta_head)) { + error("delta data unpack-initial failed"); + return 0; + } /* Examine the initial part of the delta to figure out * the result size. @@ -1367,7 +1370,7 @@ static off_t get_delta_base(struct packed_git *p, base_offset = (base_offset << 7) + (c & 127); } base_offset = delta_obj_offset - base_offset; - if (base_offset >= delta_obj_offset) + if (base_offset <= 0 || base_offset >= delta_obj_offset) return 0; /* out of bound */ *curpos += used; } else if (type == OBJ_REF_DELTA) { @@ -1393,15 +1396,32 @@ static int packed_delta_info(struct packed_git *p, off_t base_offset; base_offset = get_delta_base(p, w_curs, &curpos, type, obj_offset); + if (!base_offset) + return OBJ_BAD; type = packed_object_info(p, base_offset, NULL); + if (type <= OBJ_NONE) { + struct revindex_entry *revidx; + const unsigned char *base_sha1; + revidx = find_pack_revindex(p, base_offset); + if (!revidx) + return OBJ_BAD; + base_sha1 = nth_packed_object_sha1(p, revidx->nr); + mark_bad_packed_object(p, base_sha1); + type = sha1_object_info(base_sha1, NULL); + if (type <= OBJ_NONE) + return OBJ_BAD; + } /* We choose to only get the type of the base object and * ignore potentially corrupt pack file that expects the delta * based on a base with a wrong size. This saves tons of * inflate() calls. */ - if (sizep) + if (sizep) { *sizep = get_size_from_delta(p, w_curs, curpos); + if (*sizep == 0) + type = OBJ_BAD; + } return type; } @@ -1423,10 +1443,11 @@ static int unpack_object_header(struct packed_git *p, * insane, so we know won't exceed what we have been given. */ base = use_pack(p, w_curs, *curpos, &left); - used = unpack_object_header_gently(base, left, &type, sizep); - if (!used) - die("object offset outside of pack file"); - *curpos += used; + used = unpack_object_header_buffer(base, left, &type, sizep); + if (!used) { + type = OBJ_BAD; + } else + *curpos += used; return type; } @@ -1510,8 +1531,9 @@ static int packed_object_info(struct packed_git *p, off_t obj_offset, *sizep = size; break; default: - die("pack %s contains unknown object type %d", - p->pack_name, type); + error("unknown object type %i at offset %"PRIuMAX" in %s", + type, (uintmax_t)obj_offset, p->pack_name); + type = OBJ_BAD; } unuse_pack(&w_curs); return type; @@ -1675,9 +1697,12 @@ static void *unpack_delta_entry(struct packed_git *p, * This is costly but should happen only in the presence * of a corrupted pack, and is better than failing outright. */ - struct revindex_entry *revidx = find_pack_revindex(p, base_offset); - const unsigned char *base_sha1 = - nth_packed_object_sha1(p, revidx->nr); + struct revindex_entry *revidx; + const unsigned char *base_sha1; + revidx = find_pack_revindex(p, base_offset); + if (!revidx) + return NULL; + base_sha1 = nth_packed_object_sha1(p, revidx->nr); error("failed to read delta base object %s" " at offset %"PRIuMAX" from %s", sha1_to_hex(base_sha1), (uintmax_t)base_offset, @@ -1706,6 +1731,8 @@ static void *unpack_delta_entry(struct packed_git *p, return result; } +int do_check_packed_object_crc; + void *unpack_entry(struct packed_git *p, off_t obj_offset, enum object_type *type, unsigned long *sizep) { @@ -1713,6 +1740,19 @@ void *unpack_entry(struct packed_git *p, off_t obj_offset, off_t curpos = obj_offset; void *data; + if (do_check_packed_object_crc && p->index_version > 1) { + struct revindex_entry *revidx = find_pack_revindex(p, obj_offset); + unsigned long len = revidx[1].offset - obj_offset; + if (check_pack_crc(p, &w_curs, obj_offset, len, revidx->nr)) { + const unsigned char *sha1 = + nth_packed_object_sha1(p, revidx->nr); + error("bad packed object CRC for %s", + sha1_to_hex(sha1)); + mark_bad_packed_object(p, sha1); + return NULL; + } + } + *type = unpack_object_header(p, &w_curs, &curpos, sizep); switch (*type) { case OBJ_OFS_DELTA: @@ -1966,7 +2006,14 @@ int sha1_object_info(const unsigned char *sha1, unsigned long *sizep) if (!find_pack_entry(sha1, &e, NULL)) return status; } - return packed_object_info(e.p, e.offset, sizep); + + status = packed_object_info(e.p, e.offset, sizep); + if (status < 0) { + mark_bad_packed_object(e.p, sha1); + status = sha1_object_info(sha1, sizep); + } + + return status; } static void *read_packed_sha1(const unsigned char *sha1, diff --git a/t/t5302-pack-index.sh b/t/t5302-pack-index.sh index b0b0fdaca5..884e24253a 100755 --- a/t/t5302-pack-index.sh +++ b/t/t5302-pack-index.sh @@ -196,7 +196,8 @@ test_expect_success \ test_expect_success \ '[index v2] 5) pack-objects refuses to reuse corrupted data' \ - 'test_must_fail git pack-objects test-5 /dev/null && test_must_fail git cat-file blob $blob_2 > /dev/null && test_must_fail git cat-file blob $blob_3 > /dev/null' @@ -119,7 +125,7 @@ test_expect_success \ 'create corruption in header of first delta' \ 'create_new_pack && git prune-packed && - do_corrupt_object $blob_2 0 && + do_corrupt_object $blob_2 0 < /dev/zero && git cat-file blob $blob_1 > /dev/null && test_must_fail git cat-file blob $blob_2 > /dev/null && test_must_fail git cat-file blob $blob_3 > /dev/null' @@ -133,6 +139,15 @@ test_expect_success \ git cat-file blob $blob_2 > /dev/null && git cat-file blob $blob_3 > /dev/null' +test_expect_success \ + '... and then a repack "clears" the corruption' \ + 'do_repack && + git prune-packed && + git verify-pack ${pack}.pack && + git cat-file blob $blob_1 > /dev/null && + git cat-file blob $blob_2 > /dev/null && + git cat-file blob $blob_3 > /dev/null' + test_expect_success \ 'create corruption in data of first delta' \ 'create_new_pack && @@ -152,11 +167,20 @@ test_expect_success \ git cat-file blob $blob_2 > /dev/null && git cat-file blob $blob_3 > /dev/null' +test_expect_success \ + '... and then a repack "clears" the corruption' \ + 'do_repack && + git prune-packed && + git verify-pack ${pack}.pack && + git cat-file blob $blob_1 > /dev/null && + git cat-file blob $blob_2 > /dev/null && + git cat-file blob $blob_3 > /dev/null' + test_expect_success \ 'corruption in delta base reference of first delta (OBJ_REF_DELTA)' \ 'create_new_pack && git prune-packed && - do_corrupt_object $blob_2 2 && + do_corrupt_object $blob_2 2 < /dev/zero && git cat-file blob $blob_1 > /dev/null && test_must_fail git cat-file blob $blob_2 > /dev/null && test_must_fail git cat-file blob $blob_3 > /dev/null' @@ -171,17 +195,75 @@ test_expect_success \ git cat-file blob $blob_3 > /dev/null' test_expect_success \ - 'corruption in delta base reference of first delta (OBJ_OFS_DELTA)' \ + '... and then a repack "clears" the corruption' \ + 'do_repack && + git prune-packed && + git verify-pack ${pack}.pack && + git cat-file blob $blob_1 > /dev/null && + git cat-file blob $blob_2 > /dev/null && + git cat-file blob $blob_3 > /dev/null' + +test_expect_success \ + 'corruption #0 in delta base reference of first delta (OBJ_OFS_DELTA)' \ 'create_new_pack --delta-base-offset && git prune-packed && - do_corrupt_object $blob_2 2 && + do_corrupt_object $blob_2 2 < /dev/zero && git cat-file blob $blob_1 > /dev/null && test_must_fail git cat-file blob $blob_2 > /dev/null && test_must_fail git cat-file blob $blob_3 > /dev/null' test_expect_success \ - '... and a redundant pack allows for full recovery too' \ + '... but having a loose copy allows for full recovery' \ 'mv ${pack}.idx tmp && + git hash-object -t blob -w file_2 && + mv tmp ${pack}.idx && + git cat-file blob $blob_1 > /dev/null && + git cat-file blob $blob_2 > /dev/null && + git cat-file blob $blob_3 > /dev/null' + +test_expect_success \ + '... and then a repack "clears" the corruption' \ + 'do_repack --delta-base-offset && + git prune-packed && + git verify-pack ${pack}.pack && + git cat-file blob $blob_1 > /dev/null && + git cat-file blob $blob_2 > /dev/null && + git cat-file blob $blob_3 > /dev/null' + +test_expect_success \ + 'corruption #1 in delta base reference of first delta (OBJ_OFS_DELTA)' \ + 'create_new_pack --delta-base-offset && + git prune-packed && + printf "\001" | do_corrupt_object $blob_2 2 && + git cat-file blob $blob_1 > /dev/null && + test_must_fail git cat-file blob $blob_2 > /dev/null && + test_must_fail git cat-file blob $blob_3 > /dev/null' + +test_expect_success \ + '... but having a loose copy allows for full recovery' \ + 'mv ${pack}.idx tmp && + git hash-object -t blob -w file_2 && + mv tmp ${pack}.idx && + git cat-file blob $blob_1 > /dev/null && + git cat-file blob $blob_2 > /dev/null && + git cat-file blob $blob_3 > /dev/null' + +test_expect_success \ + '... and then a repack "clears" the corruption' \ + 'do_repack --delta-base-offset && + git prune-packed && + git verify-pack ${pack}.pack && + git cat-file blob $blob_1 > /dev/null && + git cat-file blob $blob_2 > /dev/null && + git cat-file blob $blob_3 > /dev/null' + +test_expect_success \ + '... and a redundant pack allows for full recovery too' \ + 'do_corrupt_object $blob_2 2 < /dev/zero && + git cat-file blob $blob_1 > /dev/null && + test_must_fail git cat-file blob $blob_2 > /dev/null && + test_must_fail git cat-file blob $blob_3 > /dev/null && + mv ${pack}.idx tmp && git hash-object -t blob -w file_1 && git hash-object -t blob -w file_2 && printf "$blob_1\n$blob_2\n" | git pack-objects .git/objects/pack/pack &&