git/builtin-for-each-ref.c
Nicolas Pitre 21666f1aae convert object type handling from a string to a number
We currently have two parallel notation for dealing with object types
in the code: a string and a numerical value.  One of them is obviously
redundent, and the most used one requires more stack space and a bunch
of strcmp() all over the place.

This is an initial step for the removal of the version using a char array
found in object reading code paths.  The patch is unfortunately large but
there is no sane way to split it in smaller parts without breaking the
system.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2007-02-27 01:34:21 -08:00

906 lines
21 KiB
C

#include "cache.h"
#include "refs.h"
#include "object.h"
#include "tag.h"
#include "commit.h"
#include "tree.h"
#include "blob.h"
#include "quote.h"
/* Quoting styles */
#define QUOTE_NONE 0
#define QUOTE_SHELL 1
#define QUOTE_PERL 2
#define QUOTE_PYTHON 3
#define QUOTE_TCL 4
typedef enum { FIELD_STR, FIELD_ULONG, FIELD_TIME } cmp_type;
struct atom_value {
const char *s;
unsigned long ul; /* used for sorting when not FIELD_STR */
};
struct ref_sort {
struct ref_sort *next;
int atom; /* index into used_atom array */
unsigned reverse : 1;
};
struct refinfo {
char *refname;
unsigned char objectname[20];
struct atom_value *value;
};
static struct {
const char *name;
cmp_type cmp_type;
} valid_atom[] = {
{ "refname" },
{ "objecttype" },
{ "objectsize", FIELD_ULONG },
{ "objectname" },
{ "tree" },
{ "parent" }, /* NEEDSWORK: how to address 2nd and later parents? */
{ "numparent", FIELD_ULONG },
{ "object" },
{ "type" },
{ "tag" },
{ "author" },
{ "authorname" },
{ "authoremail" },
{ "authordate", FIELD_TIME },
{ "committer" },
{ "committername" },
{ "committeremail" },
{ "committerdate", FIELD_TIME },
{ "tagger" },
{ "taggername" },
{ "taggeremail" },
{ "taggerdate", FIELD_TIME },
{ "creator" },
{ "creatordate", FIELD_TIME },
{ "subject" },
{ "body" },
{ "contents" },
};
/*
* An atom is a valid field atom listed above, possibly prefixed with
* a "*" to denote deref_tag().
*
* We parse given format string and sort specifiers, and make a list
* of properties that we need to extract out of objects. refinfo
* structure will hold an array of values extracted that can be
* indexed with the "atom number", which is an index into this
* array.
*/
static const char **used_atom;
static cmp_type *used_atom_type;
static int used_atom_cnt, sort_atom_limit, need_tagged;
/*
* Used to parse format string and sort specifiers
*/
static int parse_atom(const char *atom, const char *ep)
{
const char *sp;
char *n;
int i, at;
sp = atom;
if (*sp == '*' && sp < ep)
sp++; /* deref */
if (ep <= sp)
die("malformed field name: %.*s", (int)(ep-atom), atom);
/* Do we have the atom already used elsewhere? */
for (i = 0; i < used_atom_cnt; i++) {
int len = strlen(used_atom[i]);
if (len == ep - atom && !memcmp(used_atom[i], atom, len))
return i;
}
/* Is the atom a valid one? */
for (i = 0; i < ARRAY_SIZE(valid_atom); i++) {
int len = strlen(valid_atom[i].name);
if (len == ep - sp && !memcmp(valid_atom[i].name, sp, len))
break;
}
if (ARRAY_SIZE(valid_atom) <= i)
die("unknown field name: %.*s", (int)(ep-atom), atom);
/* Add it in, including the deref prefix */
at = used_atom_cnt;
used_atom_cnt++;
used_atom = xrealloc(used_atom,
(sizeof *used_atom) * used_atom_cnt);
used_atom_type = xrealloc(used_atom_type,
(sizeof(*used_atom_type) * used_atom_cnt));
n = xmalloc(ep - atom + 1);
memcpy(n, atom, ep - atom);
n[ep-atom] = 0;
used_atom[at] = n;
used_atom_type[at] = valid_atom[i].cmp_type;
return at;
}
/*
* In a format string, find the next occurrence of %(atom).
*/
static const char *find_next(const char *cp)
{
while (*cp) {
if (*cp == '%') {
/* %( is the start of an atom;
* %% is a quoted per-cent.
*/
if (cp[1] == '(')
return cp;
else if (cp[1] == '%')
cp++; /* skip over two % */
/* otherwise this is a singleton, literal % */
}
cp++;
}
return NULL;
}
/*
* Make sure the format string is well formed, and parse out
* the used atoms.
*/
static void verify_format(const char *format)
{
const char *cp, *sp;
for (cp = format; *cp && (sp = find_next(cp)); ) {
const char *ep = strchr(sp, ')');
if (!ep)
die("malformatted format string %s", sp);
/* sp points at "%(" and ep points at the closing ")" */
parse_atom(sp + 2, ep);
cp = ep + 1;
}
}
/*
* Given an object name, read the object data and size, and return a
* "struct object". If the object data we are returning is also borrowed
* by the "struct object" representation, set *eaten as well---it is a
* signal from parse_object_buffer to us not to free the buffer.
*/
static void *get_obj(const unsigned char *sha1, struct object **obj, unsigned long *sz, int *eaten)
{
enum object_type type;
void *buf = read_sha1_file(sha1, &type, sz);
if (buf)
*obj = parse_object_buffer(sha1, type, *sz, buf, eaten);
else
*obj = NULL;
return buf;
}
/* See grab_values */
static void grab_common_values(struct atom_value *val, int deref, struct object *obj, void *buf, unsigned long sz)
{
int i;
for (i = 0; i < used_atom_cnt; i++) {
const char *name = used_atom[i];
struct atom_value *v = &val[i];
if (!!deref != (*name == '*'))
continue;
if (deref)
name++;
if (!strcmp(name, "objecttype"))
v->s = typename(obj->type);
else if (!strcmp(name, "objectsize")) {
char *s = xmalloc(40);
sprintf(s, "%lu", sz);
v->ul = sz;
v->s = s;
}
else if (!strcmp(name, "objectname")) {
char *s = xmalloc(41);
strcpy(s, sha1_to_hex(obj->sha1));
v->s = s;
}
}
}
/* See grab_values */
static void grab_tag_values(struct atom_value *val, int deref, struct object *obj, void *buf, unsigned long sz)
{
int i;
struct tag *tag = (struct tag *) obj;
for (i = 0; i < used_atom_cnt; i++) {
const char *name = used_atom[i];
struct atom_value *v = &val[i];
if (!!deref != (*name == '*'))
continue;
if (deref)
name++;
if (!strcmp(name, "tag"))
v->s = tag->tag;
}
}
static int num_parents(struct commit *commit)
{
struct commit_list *parents;
int i;
for (i = 0, parents = commit->parents;
parents;
parents = parents->next)
i++;
return i;
}
/* See grab_values */
static void grab_commit_values(struct atom_value *val, int deref, struct object *obj, void *buf, unsigned long sz)
{
int i;
struct commit *commit = (struct commit *) obj;
for (i = 0; i < used_atom_cnt; i++) {
const char *name = used_atom[i];
struct atom_value *v = &val[i];
if (!!deref != (*name == '*'))
continue;
if (deref)
name++;
if (!strcmp(name, "tree")) {
char *s = xmalloc(41);
strcpy(s, sha1_to_hex(commit->tree->object.sha1));
v->s = s;
}
if (!strcmp(name, "numparent")) {
char *s = xmalloc(40);
sprintf(s, "%lu", v->ul);
v->s = s;
v->ul = num_parents(commit);
}
else if (!strcmp(name, "parent")) {
int num = num_parents(commit);
int i;
struct commit_list *parents;
char *s = xmalloc(42 * num);
v->s = s;
for (i = 0, parents = commit->parents;
parents;
parents = parents->next, i = i + 42) {
struct commit *parent = parents->item;
strcpy(s+i, sha1_to_hex(parent->object.sha1));
if (parents->next)
s[i+40] = ' ';
}
}
}
}
static const char *find_wholine(const char *who, int wholen, const char *buf, unsigned long sz)
{
const char *eol;
while (*buf) {
if (!strncmp(buf, who, wholen) &&
buf[wholen] == ' ')
return buf + wholen + 1;
eol = strchr(buf, '\n');
if (!eol)
return "";
eol++;
if (eol[1] == '\n')
return ""; /* end of header */
buf = eol;
}
return "";
}
static char *copy_line(const char *buf)
{
const char *eol = strchr(buf, '\n');
char *line;
int len;
if (!eol)
return "";
len = eol - buf;
line = xmalloc(len + 1);
memcpy(line, buf, len);
line[len] = 0;
return line;
}
static char *copy_name(const char *buf)
{
const char *eol = strchr(buf, '\n');
const char *eoname = strstr(buf, " <");
char *line;
int len;
if (!(eoname && eol && eoname < eol))
return "";
len = eoname - buf;
line = xmalloc(len + 1);
memcpy(line, buf, len);
line[len] = 0;
return line;
}
static char *copy_email(const char *buf)
{
const char *email = strchr(buf, '<');
const char *eoemail = strchr(email, '>');
char *line;
int len;
if (!email || !eoemail)
return "";
eoemail++;
len = eoemail - email;
line = xmalloc(len + 1);
memcpy(line, email, len);
line[len] = 0;
return line;
}
static void grab_date(const char *buf, struct atom_value *v)
{
const char *eoemail = strstr(buf, "> ");
char *zone;
unsigned long timestamp;
long tz;
if (!eoemail)
goto bad;
timestamp = strtoul(eoemail + 2, &zone, 10);
if (timestamp == ULONG_MAX)
goto bad;
tz = strtol(zone, NULL, 10);
if ((tz == LONG_MIN || tz == LONG_MAX) && errno == ERANGE)
goto bad;
v->s = xstrdup(show_date(timestamp, tz, 0));
v->ul = timestamp;
return;
bad:
v->s = "";
v->ul = 0;
}
/* See grab_values */
static void grab_person(const char *who, struct atom_value *val, int deref, struct object *obj, void *buf, unsigned long sz)
{
int i;
int wholen = strlen(who);
const char *wholine = NULL;
for (i = 0; i < used_atom_cnt; i++) {
const char *name = used_atom[i];
struct atom_value *v = &val[i];
if (!!deref != (*name == '*'))
continue;
if (deref)
name++;
if (strncmp(who, name, wholen))
continue;
if (name[wholen] != 0 &&
strcmp(name + wholen, "name") &&
strcmp(name + wholen, "email") &&
strcmp(name + wholen, "date"))
continue;
if (!wholine)
wholine = find_wholine(who, wholen, buf, sz);
if (!wholine)
return; /* no point looking for it */
if (name[wholen] == 0)
v->s = copy_line(wholine);
else if (!strcmp(name + wholen, "name"))
v->s = copy_name(wholine);
else if (!strcmp(name + wholen, "email"))
v->s = copy_email(wholine);
else if (!strcmp(name + wholen, "date"))
grab_date(wholine, v);
}
/* For a tag or a commit object, if "creator" or "creatordate" is
* requested, do something special.
*/
if (strcmp(who, "tagger") && strcmp(who, "committer"))
return; /* "author" for commit object is not wanted */
if (!wholine)
wholine = find_wholine(who, wholen, buf, sz);
if (!wholine)
return;
for (i = 0; i < used_atom_cnt; i++) {
const char *name = used_atom[i];
struct atom_value *v = &val[i];
if (!!deref != (*name == '*'))
continue;
if (deref)
name++;
if (!strcmp(name, "creatordate"))
grab_date(wholine, v);
else if (!strcmp(name, "creator"))
v->s = copy_line(wholine);
}
}
static void find_subpos(const char *buf, unsigned long sz, const char **sub, const char **body)
{
while (*buf) {
const char *eol = strchr(buf, '\n');
if (!eol)
return;
if (eol[1] == '\n') {
buf = eol + 1;
break; /* found end of header */
}
buf = eol + 1;
}
while (*buf == '\n')
buf++;
if (!*buf)
return;
*sub = buf; /* first non-empty line */
buf = strchr(buf, '\n');
if (!buf)
return; /* no body */
while (*buf == '\n')
buf++; /* skip blank between subject and body */
*body = buf;
}
/* See grab_values */
static void grab_sub_body_contents(struct atom_value *val, int deref, struct object *obj, void *buf, unsigned long sz)
{
int i;
const char *subpos = NULL, *bodypos = NULL;
for (i = 0; i < used_atom_cnt; i++) {
const char *name = used_atom[i];
struct atom_value *v = &val[i];
if (!!deref != (*name == '*'))
continue;
if (deref)
name++;
if (strcmp(name, "subject") &&
strcmp(name, "body") &&
strcmp(name, "contents"))
continue;
if (!subpos)
find_subpos(buf, sz, &subpos, &bodypos);
if (!subpos)
return;
if (!strcmp(name, "subject"))
v->s = copy_line(subpos);
else if (!strcmp(name, "body"))
v->s = xstrdup(bodypos);
else if (!strcmp(name, "contents"))
v->s = xstrdup(subpos);
}
}
/* We want to have empty print-string for field requests
* that do not apply (e.g. "authordate" for a tag object)
*/
static void fill_missing_values(struct atom_value *val)
{
int i;
for (i = 0; i < used_atom_cnt; i++) {
struct atom_value *v = &val[i];
if (v->s == NULL)
v->s = "";
}
}
/*
* val is a list of atom_value to hold returned values. Extract
* the values for atoms in used_atom array out of (obj, buf, sz).
* when deref is false, (obj, buf, sz) is the object that is
* pointed at by the ref itself; otherwise it is the object the
* ref (which is a tag) refers to.
*/
static void grab_values(struct atom_value *val, int deref, struct object *obj, void *buf, unsigned long sz)
{
grab_common_values(val, deref, obj, buf, sz);
switch (obj->type) {
case OBJ_TAG:
grab_tag_values(val, deref, obj, buf, sz);
grab_sub_body_contents(val, deref, obj, buf, sz);
grab_person("tagger", val, deref, obj, buf, sz);
break;
case OBJ_COMMIT:
grab_commit_values(val, deref, obj, buf, sz);
grab_sub_body_contents(val, deref, obj, buf, sz);
grab_person("author", val, deref, obj, buf, sz);
grab_person("committer", val, deref, obj, buf, sz);
break;
case OBJ_TREE:
// grab_tree_values(val, deref, obj, buf, sz);
break;
case OBJ_BLOB:
// grab_blob_values(val, deref, obj, buf, sz);
break;
default:
die("Eh? Object of type %d?", obj->type);
}
}
/*
* Parse the object referred by ref, and grab needed value.
*/
static void populate_value(struct refinfo *ref)
{
void *buf;
struct object *obj;
int eaten, i;
unsigned long size;
const unsigned char *tagged;
ref->value = xcalloc(sizeof(struct atom_value), used_atom_cnt);
buf = get_obj(ref->objectname, &obj, &size, &eaten);
if (!buf)
die("missing object %s for %s",
sha1_to_hex(ref->objectname), ref->refname);
if (!obj)
die("parse_object_buffer failed on %s for %s",
sha1_to_hex(ref->objectname), ref->refname);
/* Fill in specials first */
for (i = 0; i < used_atom_cnt; i++) {
const char *name = used_atom[i];
struct atom_value *v = &ref->value[i];
if (!strcmp(name, "refname"))
v->s = ref->refname;
else if (!strcmp(name, "*refname")) {
int len = strlen(ref->refname);
char *s = xmalloc(len + 4);
sprintf(s, "%s^{}", ref->refname);
v->s = s;
}
}
grab_values(ref->value, 0, obj, buf, size);
if (!eaten)
free(buf);
/* If there is no atom that wants to know about tagged
* object, we are done.
*/
if (!need_tagged || (obj->type != OBJ_TAG))
return;
/* If it is a tag object, see if we use a value that derefs
* the object, and if we do grab the object it refers to.
*/
tagged = ((struct tag *)obj)->tagged->sha1;
/* NEEDSWORK: This derefs tag only once, which
* is good to deal with chains of trust, but
* is not consistent with what deref_tag() does
* which peels the onion to the core.
*/
buf = get_obj(tagged, &obj, &size, &eaten);
if (!buf)
die("missing object %s for %s",
sha1_to_hex(tagged), ref->refname);
if (!obj)
die("parse_object_buffer failed on %s for %s",
sha1_to_hex(tagged), ref->refname);
grab_values(ref->value, 1, obj, buf, size);
if (!eaten)
free(buf);
}
/*
* Given a ref, return the value for the atom. This lazily gets value
* out of the object by calling populate value.
*/
static void get_value(struct refinfo *ref, int atom, struct atom_value **v)
{
if (!ref->value) {
populate_value(ref);
fill_missing_values(ref->value);
}
*v = &ref->value[atom];
}
struct grab_ref_cbdata {
struct refinfo **grab_array;
const char **grab_pattern;
int grab_cnt;
};
/*
* A call-back given to for_each_ref(). It is unfortunate that we
* need to use global variables to pass extra information to this
* function.
*/
static int grab_single_ref(const char *refname, const unsigned char *sha1, int flag, void *cb_data)
{
struct grab_ref_cbdata *cb = cb_data;
struct refinfo *ref;
int cnt;
if (*cb->grab_pattern) {
const char **pattern;
int namelen = strlen(refname);
for (pattern = cb->grab_pattern; *pattern; pattern++) {
const char *p = *pattern;
int plen = strlen(p);
if ((plen <= namelen) &&
!strncmp(refname, p, plen) &&
(refname[plen] == '\0' ||
refname[plen] == '/'))
break;
if (!fnmatch(p, refname, FNM_PATHNAME))
break;
}
if (!*pattern)
return 0;
}
/* We do not open the object yet; sort may only need refname
* to do its job and the resulting list may yet to be pruned
* by maxcount logic.
*/
ref = xcalloc(1, sizeof(*ref));
ref->refname = xstrdup(refname);
hashcpy(ref->objectname, sha1);
cnt = cb->grab_cnt;
cb->grab_array = xrealloc(cb->grab_array,
sizeof(*cb->grab_array) * (cnt + 1));
cb->grab_array[cnt++] = ref;
cb->grab_cnt = cnt;
return 0;
}
static int cmp_ref_sort(struct ref_sort *s, struct refinfo *a, struct refinfo *b)
{
struct atom_value *va, *vb;
int cmp;
cmp_type cmp_type = used_atom_type[s->atom];
get_value(a, s->atom, &va);
get_value(b, s->atom, &vb);
switch (cmp_type) {
case FIELD_STR:
cmp = strcmp(va->s, vb->s);
break;
default:
if (va->ul < vb->ul)
cmp = -1;
else if (va->ul == vb->ul)
cmp = 0;
else
cmp = 1;
break;
}
return (s->reverse) ? -cmp : cmp;
}
static struct ref_sort *ref_sort;
static int compare_refs(const void *a_, const void *b_)
{
struct refinfo *a = *((struct refinfo **)a_);
struct refinfo *b = *((struct refinfo **)b_);
struct ref_sort *s;
for (s = ref_sort; s; s = s->next) {
int cmp = cmp_ref_sort(s, a, b);
if (cmp)
return cmp;
}
return 0;
}
static void sort_refs(struct ref_sort *sort, struct refinfo **refs, int num_refs)
{
ref_sort = sort;
qsort(refs, num_refs, sizeof(struct refinfo *), compare_refs);
}
static void print_value(struct refinfo *ref, int atom, int quote_style)
{
struct atom_value *v;
get_value(ref, atom, &v);
switch (quote_style) {
case QUOTE_NONE:
fputs(v->s, stdout);
break;
case QUOTE_SHELL:
sq_quote_print(stdout, v->s);
break;
case QUOTE_PERL:
perl_quote_print(stdout, v->s);
break;
case QUOTE_PYTHON:
python_quote_print(stdout, v->s);
break;
case QUOTE_TCL:
tcl_quote_print(stdout, v->s);
break;
}
}
static int hex1(char ch)
{
if ('0' <= ch && ch <= '9')
return ch - '0';
else if ('a' <= ch && ch <= 'f')
return ch - 'a' + 10;
else if ('A' <= ch && ch <= 'F')
return ch - 'A' + 10;
return -1;
}
static int hex2(const char *cp)
{
if (cp[0] && cp[1])
return (hex1(cp[0]) << 4) | hex1(cp[1]);
else
return -1;
}
static void emit(const char *cp, const char *ep)
{
while (*cp && (!ep || cp < ep)) {
if (*cp == '%') {
if (cp[1] == '%')
cp++;
else {
int ch = hex2(cp + 1);
if (0 <= ch) {
putchar(ch);
cp += 3;
continue;
}
}
}
putchar(*cp);
cp++;
}
}
static void show_ref(struct refinfo *info, const char *format, int quote_style)
{
const char *cp, *sp, *ep;
for (cp = format; *cp && (sp = find_next(cp)); cp = ep + 1) {
ep = strchr(sp, ')');
if (cp < sp)
emit(cp, sp);
print_value(info, parse_atom(sp + 2, ep), quote_style);
}
if (*cp) {
sp = cp + strlen(cp);
emit(cp, sp);
}
putchar('\n');
}
static struct ref_sort *default_sort(void)
{
static const char cstr_name[] = "refname";
struct ref_sort *sort = xcalloc(1, sizeof(*sort));
sort->next = NULL;
sort->atom = parse_atom(cstr_name, cstr_name + strlen(cstr_name));
return sort;
}
int cmd_for_each_ref(int ac, const char **av, char *prefix)
{
int i, num_refs;
const char *format = NULL;
struct ref_sort *sort = NULL, **sort_tail = &sort;
int maxcount = 0;
int quote_style = -1; /* unspecified yet */
struct refinfo **refs;
struct grab_ref_cbdata cbdata;
for (i = 1; i < ac; i++) {
const char *arg = av[i];
if (arg[0] != '-')
break;
if (!strcmp(arg, "--")) {
i++;
break;
}
if (!prefixcmp(arg, "--format=")) {
if (format)
die("more than one --format?");
format = arg + 9;
continue;
}
if (!strcmp(arg, "-s") || !strcmp(arg, "--shell") ) {
if (0 <= quote_style)
die("more than one quoting style?");
quote_style = QUOTE_SHELL;
continue;
}
if (!strcmp(arg, "-p") || !strcmp(arg, "--perl") ) {
if (0 <= quote_style)
die("more than one quoting style?");
quote_style = QUOTE_PERL;
continue;
}
if (!strcmp(arg, "--python") ) {
if (0 <= quote_style)
die("more than one quoting style?");
quote_style = QUOTE_PYTHON;
continue;
}
if (!strcmp(arg, "--tcl") ) {
if (0 <= quote_style)
die("more than one quoting style?");
quote_style = QUOTE_TCL;
continue;
}
if (!prefixcmp(arg, "--count=")) {
if (maxcount)
die("more than one --count?");
maxcount = atoi(arg + 8);
if (maxcount <= 0)
die("The number %s did not parse", arg);
continue;
}
if (!prefixcmp(arg, "--sort=")) {
struct ref_sort *s = xcalloc(1, sizeof(*s));
int len;
s->next = NULL;
*sort_tail = s;
sort_tail = &s->next;
arg += 7;
if (*arg == '-') {
s->reverse = 1;
arg++;
}
len = strlen(arg);
sort->atom = parse_atom(arg, arg+len);
continue;
}
break;
}
if (quote_style < 0)
quote_style = QUOTE_NONE;
if (!sort)
sort = default_sort();
sort_atom_limit = used_atom_cnt;
if (!format)
format = "%(objectname) %(objecttype)\t%(refname)";
verify_format(format);
memset(&cbdata, 0, sizeof(cbdata));
cbdata.grab_pattern = av + i;
for_each_ref(grab_single_ref, &cbdata);
refs = cbdata.grab_array;
num_refs = cbdata.grab_cnt;
for (i = 0; i < used_atom_cnt; i++) {
if (used_atom[i][0] == '*') {
need_tagged = 1;
break;
}
}
sort_refs(sort, refs, num_refs);
if (!maxcount || num_refs < maxcount)
maxcount = num_refs;
for (i = 0; i < maxcount; i++)
show_ref(refs[i], format, quote_style);
return 0;
}