git/pack-check.c

165 lines
4.2 KiB
C
Raw Normal View History

#include "cache.h"
#include "pack.h"
Replace use_packed_git with window cursors. Part of the implementation concept of the sliding mmap window for pack access is to permit multiple windows per pack to be mapped independently. Since the inuse_cnt is associated with the mmap and not with the file, this value is in struct pack_window and needs to be incremented/decremented for each pack_window accessed by any code. To faciliate that implementation we need to replace all uses of use_packed_git() and unuse_packed_git() with a different API that follows struct pack_window objects rather than struct packed_git. The way this works is when we need to start accessing a pack for the first time we should setup a new window 'cursor' by declaring a local and setting it to NULL: struct pack_windows *w_curs = NULL; To obtain the memory region which contains a specific section of the pack file we invoke use_pack(), supplying the address of our current window cursor: unsigned int len; unsigned char *addr = use_pack(p, &w_curs, offset, &len); the returned address `addr` will be the first byte at `offset` within the pack file. The optional variable len will also be updated with the number of bytes remaining following the address. Multiple calls to use_pack() with the same window cursor will update the window cursor, moving it from one window to another when necessary. In this way each window cursor variable maintains only one struct pack_window inuse at a time. Finally before exiting the scope which originally declared the window cursor we must invoke unuse_pack() to unuse the current window (which may be different from the one that was first obtained from use_pack): unuse_pack(&w_curs); This implementation is still not complete with regards to multiple windows, as only one window per pack file is supported right now. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-12-23 15:34:08 +08:00
static int verify_packfile(struct packed_git *p,
struct pack_window **w_curs)
{
off_t index_size = p->index_size;
const unsigned char *index_base = p->index_data;
SHA_CTX ctx;
unsigned char sha1[20];
off_t offset = 0, pack_sig = p->pack_size - 20;
uint32_t nr_objects, i;
int err;
/* Note that the pack header checks are actually performed by
* use_pack when it first opens the pack file. If anything
* goes wrong during those checks then the call will die out
* immediately.
*/
SHA1_Init(&ctx);
while (offset < pack_sig) {
unsigned int remaining;
unsigned char *in = use_pack(p, w_curs, offset, &remaining);
offset += remaining;
if (offset > pack_sig)
remaining -= (unsigned int)(offset - pack_sig);
SHA1_Update(&ctx, in, remaining);
}
SHA1_Final(sha1, &ctx);
if (hashcmp(sha1, use_pack(p, w_curs, pack_sig, NULL)))
return error("Packfile %s SHA1 mismatch with itself",
p->pack_name);
if (hashcmp(sha1, index_base + index_size - 40))
return error("Packfile %s SHA1 mismatch with idx",
p->pack_name);
unuse_pack(w_curs);
/* Make sure everything reachable from idx is valid. Since we
* have verified that nr_objects matches between idx and pack,
* we do not do scan-streaming check on the pack file.
*/
nr_objects = p->num_objects;
for (i = 0, err = 0; i < nr_objects; i++) {
const unsigned char *sha1;
void *data;
enum object_type type;
unsigned long size;
off_t offset;
sha1 = nth_packed_object_sha1(p, i);
if (!sha1)
die("internal error pack-check nth-packed-object");
offset = find_pack_entry_one(sha1, p);
if (!offset)
die("internal error pack-check find-pack-entry-one");
data = unpack_entry(p, offset, &type, &size);
if (!data) {
err = error("cannot unpack %s from %s",
sha1_to_hex(sha1), p->pack_name);
continue;
}
if (check_sha1_signature(sha1, data, size, typename(type))) {
err = error("packed %s from %s is corrupt",
sha1_to_hex(sha1), p->pack_name);
free(data);
continue;
}
free(data);
}
return err;
}
#define MAX_CHAIN 40
static void show_pack_info(struct packed_git *p)
{
uint32_t nr_objects, i, chain_histogram[MAX_CHAIN];
nr_objects = p->num_objects;
memset(chain_histogram, 0, sizeof(chain_histogram));
for (i = 0; i < nr_objects; i++) {
const unsigned char *sha1;
unsigned char base_sha1[20];
const char *type;
unsigned long size;
unsigned long store_size;
off_t offset;
unsigned int delta_chain_length;
sha1 = nth_packed_object_sha1(p, i);
if (!sha1)
die("internal error pack-check nth-packed-object");
offset = find_pack_entry_one(sha1, p);
if (!offset)
die("internal error pack-check find-pack-entry-one");
type = packed_object_info_detail(p, offset, &size, &store_size,
&delta_chain_length,
base_sha1);
printf("%s ", sha1_to_hex(sha1));
if (!delta_chain_length)
printf("%-6s %lu %"PRIuMAX"\n",
type, size, (uintmax_t)offset);
else {
printf("%-6s %lu %"PRIuMAX" %u %s\n",
type, size, (uintmax_t)offset,
delta_chain_length, sha1_to_hex(base_sha1));
if (delta_chain_length < MAX_CHAIN)
chain_histogram[delta_chain_length]++;
else
chain_histogram[0]++;
}
}
for (i = 0; i < MAX_CHAIN; i++) {
if (!chain_histogram[i])
continue;
printf("chain length %s %d: %d object%s\n",
i ? "=" : ">=",
i ? i : MAX_CHAIN,
chain_histogram[i],
1 < chain_histogram[i] ? "s" : "");
}
}
int verify_pack(struct packed_git *p, int verbose)
{
off_t index_size = p->index_size;
const unsigned char *index_base = p->index_data;
SHA_CTX ctx;
unsigned char sha1[20];
int ret;
ret = 0;
/* Verify SHA1 sum of the index file */
SHA1_Init(&ctx);
SHA1_Update(&ctx, index_base, (unsigned int)(index_size - 20));
SHA1_Final(sha1, &ctx);
if (hashcmp(sha1, index_base + index_size - 20))
ret = error("Packfile index for %s SHA1 mismatch",
p->pack_name);
if (!ret) {
/* Verify pack file */
Replace use_packed_git with window cursors. Part of the implementation concept of the sliding mmap window for pack access is to permit multiple windows per pack to be mapped independently. Since the inuse_cnt is associated with the mmap and not with the file, this value is in struct pack_window and needs to be incremented/decremented for each pack_window accessed by any code. To faciliate that implementation we need to replace all uses of use_packed_git() and unuse_packed_git() with a different API that follows struct pack_window objects rather than struct packed_git. The way this works is when we need to start accessing a pack for the first time we should setup a new window 'cursor' by declaring a local and setting it to NULL: struct pack_windows *w_curs = NULL; To obtain the memory region which contains a specific section of the pack file we invoke use_pack(), supplying the address of our current window cursor: unsigned int len; unsigned char *addr = use_pack(p, &w_curs, offset, &len); the returned address `addr` will be the first byte at `offset` within the pack file. The optional variable len will also be updated with the number of bytes remaining following the address. Multiple calls to use_pack() with the same window cursor will update the window cursor, moving it from one window to another when necessary. In this way each window cursor variable maintains only one struct pack_window inuse at a time. Finally before exiting the scope which originally declared the window cursor we must invoke unuse_pack() to unuse the current window (which may be different from the one that was first obtained from use_pack): unuse_pack(&w_curs); This implementation is still not complete with regards to multiple windows, as only one window per pack file is supported right now. Signed-off-by: Shawn O. Pearce <spearce@spearce.org> Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-12-23 15:34:08 +08:00
struct pack_window *w_curs = NULL;
ret = verify_packfile(p, &w_curs);
unuse_pack(&w_curs);
}
if (verbose) {
if (ret)
printf("%s: bad\n", p->pack_name);
else {
show_pack_info(p);
printf("%s: ok\n", p->pack_name);
}
}
return ret;
}