git/builtin-cat-file.c
Björn Steinbrink 5b8a94b1db git cat-file: Fix memory leak in batch mode
When run in batch mode, git cat-file never frees the memory for the blob
contents it is printing. This quickly adds up and causes git-svn to be
hardly usable for imports of large svn repos, because it uses cat-file in
batch mode and cat-file's memory usage easily reaches several hundred MB
without any good reason.

Signed-off-by: Björn Steinbrink <B.Steinbrink@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-06-28 19:50:56 -07:00

259 lines
5.6 KiB
C

/*
* GIT - The information manager from hell
*
* Copyright (C) Linus Torvalds, 2005
*/
#include "cache.h"
#include "exec_cmd.h"
#include "tag.h"
#include "tree.h"
#include "builtin.h"
#include "parse-options.h"
#define BATCH 1
#define BATCH_CHECK 2
static void pprint_tag(const unsigned char *sha1, const char *buf, unsigned long size)
{
/* the parser in tag.c is useless here. */
const char *endp = buf + size;
const char *cp = buf;
while (cp < endp) {
char c = *cp++;
if (c != '\n')
continue;
if (7 <= endp - cp && !memcmp("tagger ", cp, 7)) {
const char *tagger = cp;
/* Found the tagger line. Copy out the contents
* of the buffer so far.
*/
write_or_die(1, buf, cp - buf);
/*
* Do something intelligent, like pretty-printing
* the date.
*/
while (cp < endp) {
if (*cp++ == '\n') {
/* tagger to cp is a line
* that has ident and time.
*/
const char *sp = tagger;
char *ep;
unsigned long date;
long tz;
while (sp < cp && *sp != '>')
sp++;
if (sp == cp) {
/* give up */
write_or_die(1, tagger,
cp - tagger);
break;
}
while (sp < cp &&
!('0' <= *sp && *sp <= '9'))
sp++;
write_or_die(1, tagger, sp - tagger);
date = strtoul(sp, &ep, 10);
tz = strtol(ep, NULL, 10);
sp = show_date(date, tz, 0);
write_or_die(1, sp, strlen(sp));
xwrite(1, "\n", 1);
break;
}
}
break;
}
if (cp < endp && *cp == '\n')
/* end of header */
break;
}
/* At this point, we have copied out the header up to the end of
* the tagger line and cp points at one past \n. It could be the
* next header line after the tagger line, or it could be another
* \n that marks the end of the headers. We need to copy out the
* remainder as is.
*/
if (cp < endp)
write_or_die(1, cp, endp - cp);
}
static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
{
unsigned char sha1[20];
enum object_type type;
void *buf;
unsigned long size;
if (get_sha1(obj_name, sha1))
die("Not a valid object name %s", obj_name);
buf = NULL;
switch (opt) {
case 't':
type = sha1_object_info(sha1, NULL);
if (type > 0) {
printf("%s\n", typename(type));
return 0;
}
break;
case 's':
type = sha1_object_info(sha1, &size);
if (type > 0) {
printf("%lu\n", size);
return 0;
}
break;
case 'e':
return !has_sha1_file(sha1);
case 'p':
type = sha1_object_info(sha1, NULL);
if (type < 0)
die("Not a valid object name %s", obj_name);
/* custom pretty-print here */
if (type == OBJ_TREE) {
const char *ls_args[3] = {"ls-tree", obj_name, NULL};
return cmd_ls_tree(2, ls_args, NULL);
}
buf = read_sha1_file(sha1, &type, &size);
if (!buf)
die("Cannot read object %s", obj_name);
if (type == OBJ_TAG) {
pprint_tag(sha1, buf, size);
return 0;
}
/* otherwise just spit out the data */
break;
case 0:
buf = read_object_with_reference(sha1, exp_type, &size, NULL);
break;
default:
die("git-cat-file: unknown option: %s\n", exp_type);
}
if (!buf)
die("git-cat-file %s: bad file", obj_name);
write_or_die(1, buf, size);
return 0;
}
static int batch_one_object(const char *obj_name, int print_contents)
{
unsigned char sha1[20];
enum object_type type = 0;
unsigned long size;
void *contents = contents;
if (!obj_name)
return 1;
if (get_sha1(obj_name, sha1)) {
printf("%s missing\n", obj_name);
fflush(stdout);
return 0;
}
if (print_contents == BATCH)
contents = read_sha1_file(sha1, &type, &size);
else
type = sha1_object_info(sha1, &size);
if (type <= 0) {
printf("%s missing\n", obj_name);
fflush(stdout);
return 0;
}
printf("%s %s %lu\n", sha1_to_hex(sha1), typename(type), size);
fflush(stdout);
if (print_contents == BATCH) {
write_or_die(1, contents, size);
printf("\n");
fflush(stdout);
free(contents);
}
return 0;
}
static int batch_objects(int print_contents)
{
struct strbuf buf;
strbuf_init(&buf, 0);
while (strbuf_getline(&buf, stdin, '\n') != EOF) {
int error = batch_one_object(buf.buf, print_contents);
if (error)
return error;
}
return 0;
}
static const char * const cat_file_usage[] = {
"git-cat-file [-t|-s|-e|-p|<type>] <sha1>",
"git-cat-file [--batch|--batch-check] < <list_of_sha1s>",
NULL
};
int cmd_cat_file(int argc, const char **argv, const char *prefix)
{
int opt = 0, batch = 0;
const char *exp_type = NULL, *obj_name = NULL;
const struct option options[] = {
OPT_GROUP("<type> can be one of: blob, tree, commit, tag"),
OPT_SET_INT('t', NULL, &opt, "show object type", 't'),
OPT_SET_INT('s', NULL, &opt, "show object size", 's'),
OPT_SET_INT('e', NULL, &opt,
"exit with zero when there's no error", 'e'),
OPT_SET_INT('p', NULL, &opt, "pretty-print object's content", 'p'),
OPT_SET_INT(0, "batch", &batch,
"show info and content of objects feeded on stdin", BATCH),
OPT_SET_INT(0, "batch-check", &batch,
"show info about objects feeded on stdin",
BATCH_CHECK),
OPT_END()
};
git_config(git_default_config, NULL);
if (argc != 3 && argc != 2)
usage_with_options(cat_file_usage, options);
argc = parse_options(argc, argv, options, cat_file_usage, 0);
if (opt) {
if (argc == 1)
obj_name = argv[0];
else
usage_with_options(cat_file_usage, options);
}
if (!opt && !batch) {
if (argc == 2) {
exp_type = argv[0];
obj_name = argv[1];
} else
usage_with_options(cat_file_usage, options);
}
if (batch && (opt || argc)) {
usage_with_options(cat_file_usage, options);
}
if (batch)
return batch_objects(batch);
return cat_one_file(opt, exp_type, obj_name);
}