2008-10-06 05:43:21 +08:00
|
|
|
#include "userdiff.h"
|
|
|
|
#include "cache.h"
|
|
|
|
#include "attr.h"
|
|
|
|
|
|
|
|
static struct userdiff_driver *drivers;
|
|
|
|
static int ndrivers;
|
|
|
|
static int drivers_alloc;
|
|
|
|
|
2009-01-21 12:59:54 +08:00
|
|
|
#define PATTERNS(name, pattern, word_regex) \
|
|
|
|
{ name, NULL, -1, { pattern, REG_EXTENDED }, word_regex }
|
2008-10-06 05:43:21 +08:00
|
|
|
static struct userdiff_driver builtin_drivers[] = {
|
2009-01-18 00:29:48 +08:00
|
|
|
PATTERNS("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$",
|
|
|
|
"[^<>= \t]+|[^[:space:]]|[\x80-\xff]+"),
|
|
|
|
PATTERNS("java",
|
2008-10-06 05:43:21 +08:00
|
|
|
"!^[ \t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)\n"
|
2009-01-18 00:29:48 +08:00
|
|
|
"^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$",
|
|
|
|
"[a-zA-Z_][a-zA-Z0-9_]*"
|
|
|
|
"|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?"
|
|
|
|
"|[-+*/<>%&^|=!]="
|
|
|
|
"|--|\\+\\+|<<=?|>>>?=?|&&|\\|\\|"
|
|
|
|
"|[^[:space:]]|[\x80-\xff]+"),
|
|
|
|
PATTERNS("objc",
|
2008-10-06 05:43:21 +08:00
|
|
|
/* Negate C statements that can look like functions */
|
|
|
|
"!^[ \t]*(do|for|if|else|return|switch|while)\n"
|
|
|
|
/* Objective-C methods */
|
|
|
|
"^[ \t]*([-+][ \t]*\\([ \t]*[A-Za-z_][A-Za-z_0-9* \t]*\\)[ \t]*[A-Za-z_].*)$\n"
|
|
|
|
/* C functions */
|
|
|
|
"^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$\n"
|
|
|
|
/* Objective-C class/protocol definitions */
|
2009-01-18 00:29:48 +08:00
|
|
|
"^(@(implementation|interface|protocol)[ \t].*)$",
|
|
|
|
/* -- */
|
|
|
|
"[a-zA-Z_][a-zA-Z0-9_]*"
|
|
|
|
"|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?"
|
|
|
|
"|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->"
|
|
|
|
"|[^[:space:]]|[\x80-\xff]+"),
|
|
|
|
PATTERNS("pascal",
|
2008-10-06 05:43:21 +08:00
|
|
|
"^((procedure|function|constructor|destructor|interface|"
|
|
|
|
"implementation|initialization|finalization)[ \t]*.*)$"
|
|
|
|
"\n"
|
2009-01-18 00:29:48 +08:00
|
|
|
"^(.*=[ \t]*(class|record).*)$",
|
|
|
|
/* -- */
|
|
|
|
"[a-zA-Z_][a-zA-Z0-9_]*"
|
|
|
|
"|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+"
|
|
|
|
"|<>|<=|>=|:=|\\.\\."
|
|
|
|
"|[^[:space:]]|[\x80-\xff]+"),
|
|
|
|
PATTERNS("php", "^[\t ]*((function|class).*)",
|
|
|
|
/* -- */
|
|
|
|
"[a-zA-Z_][a-zA-Z0-9_]*"
|
|
|
|
"|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+"
|
|
|
|
"|[-+*/<>%&^|=!.]=|--|\\+\\+|<<=?|>>=?|===|&&|\\|\\||::|->"
|
|
|
|
"|[^[:space:]]|[\x80-\xff]+"),
|
|
|
|
PATTERNS("python", "^[ \t]*((class|def)[ \t].*)$",
|
|
|
|
/* -- */
|
|
|
|
"[a-zA-Z_][a-zA-Z0-9_]*"
|
|
|
|
"|[-+0-9.e]+[jJlL]?|0[xX]?[0-9a-fA-F]+[lL]?"
|
|
|
|
"|[-+*/<>%&^|=!]=|//=?|<<=?|>>=?|\\*\\*=?"
|
|
|
|
"|[^[:space:]|[\x80-\xff]+"),
|
|
|
|
/* -- */
|
|
|
|
PATTERNS("ruby", "^[ \t]*((class|module|def)[ \t].*)$",
|
|
|
|
/* -- */
|
|
|
|
"(@|@@|\\$)?[a-zA-Z_][a-zA-Z0-9_]*"
|
|
|
|
"|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+|\\?(\\\\C-)?(\\\\M-)?."
|
|
|
|
"|//=?|[-+*/<>%&^|=!]=|<<=?|>>=?|===|\\.{1,3}|::|[!=]~"
|
|
|
|
"|[^[:space:]|[\x80-\xff]+"),
|
|
|
|
PATTERNS("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$",
|
|
|
|
"[={}\"]|[^={}\" \t]+"),
|
|
|
|
PATTERNS("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$",
|
|
|
|
"\\\\[a-zA-Z@]+|\\\\.|[a-zA-Z0-9\x80-\xff]+|[^[:space:]]"),
|
|
|
|
PATTERNS("cpp",
|
|
|
|
/* Jump targets or access declarations */
|
|
|
|
"!^[ \t]*[A-Za-z_][A-Za-z_0-9]*:.*$\n"
|
|
|
|
/* C/++ functions/methods at top level */
|
|
|
|
"^([A-Za-z_][A-Za-z_0-9]*([ \t]+[A-Za-z_][A-Za-z_0-9]*([ \t]*::[ \t]*[^[:space:]]+)?){1,}[ \t]*\\([^;]*)$\n"
|
|
|
|
/* compound type at top level */
|
|
|
|
"^((struct|class|enum)[^;]*)$",
|
|
|
|
/* -- */
|
|
|
|
"[a-zA-Z_][a-zA-Z0-9_]*"
|
|
|
|
"|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?"
|
|
|
|
"|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->"
|
|
|
|
"|[^[:space:]]|[\x80-\xff]+"),
|
diff: introduce diff.<driver>.binary
The "diff" gitattribute is somewhat overloaded right now. It
can say one of three things:
1. this file is definitely binary, or definitely not
(i.e., diff or !diff)
2. this file should use an external diff engine (i.e.,
diff=foo, diff.foo.command = custom-script)
3. this file should use particular funcname patterns
(i.e., diff=foo, diff.foo.(x?)funcname = some-regex)
Most of the time, there is no conflict between these uses,
since using one implies that the other is irrelevant (e.g.,
an external diff engine will decide for itself whether the
file is binary).
However, there is at least one conflicting situation: there
is no way to say "use the regular rules to determine whether
this file is binary, but if we do diff it textually, use
this funcname pattern." That is, currently setting diff=foo
indicates that the file is definitely text.
This patch introduces a "binary" config option for a diff
driver, so that one can explicitly set diff.foo.binary. We
default this value to "don't know". That is, setting a diff
attribute to "foo" and using "diff.foo.funcname" will have
no effect on the binaryness of a file. To get the current
behavior, one can set diff.foo.binary to true.
This patch also has one additional advantage: it cleans up
the interface to the userdiff code a bit. Before, calling
code had to know more about whether attributes were false,
true, or unset to determine binaryness. Now that binaryness
is a property of a driver, we can represent these situations
just by passing back a driver struct.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2008-10-06 05:43:36 +08:00
|
|
|
{ "default", NULL, -1, { NULL, 0 } },
|
2008-10-06 05:43:21 +08:00
|
|
|
};
|
2009-01-18 00:29:48 +08:00
|
|
|
#undef PATTERNS
|
2008-10-06 05:43:21 +08:00
|
|
|
|
|
|
|
static struct userdiff_driver driver_true = {
|
|
|
|
"diff=true",
|
|
|
|
NULL,
|
diff: introduce diff.<driver>.binary
The "diff" gitattribute is somewhat overloaded right now. It
can say one of three things:
1. this file is definitely binary, or definitely not
(i.e., diff or !diff)
2. this file should use an external diff engine (i.e.,
diff=foo, diff.foo.command = custom-script)
3. this file should use particular funcname patterns
(i.e., diff=foo, diff.foo.(x?)funcname = some-regex)
Most of the time, there is no conflict between these uses,
since using one implies that the other is irrelevant (e.g.,
an external diff engine will decide for itself whether the
file is binary).
However, there is at least one conflicting situation: there
is no way to say "use the regular rules to determine whether
this file is binary, but if we do diff it textually, use
this funcname pattern." That is, currently setting diff=foo
indicates that the file is definitely text.
This patch introduces a "binary" config option for a diff
driver, so that one can explicitly set diff.foo.binary. We
default this value to "don't know". That is, setting a diff
attribute to "foo" and using "diff.foo.funcname" will have
no effect on the binaryness of a file. To get the current
behavior, one can set diff.foo.binary to true.
This patch also has one additional advantage: it cleans up
the interface to the userdiff code a bit. Before, calling
code had to know more about whether attributes were false,
true, or unset to determine binaryness. Now that binaryness
is a property of a driver, we can represent these situations
just by passing back a driver struct.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2008-10-06 05:43:36 +08:00
|
|
|
0,
|
2008-10-06 05:43:21 +08:00
|
|
|
{ NULL, 0 }
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct userdiff_driver driver_false = {
|
|
|
|
"!diff",
|
|
|
|
NULL,
|
diff: introduce diff.<driver>.binary
The "diff" gitattribute is somewhat overloaded right now. It
can say one of three things:
1. this file is definitely binary, or definitely not
(i.e., diff or !diff)
2. this file should use an external diff engine (i.e.,
diff=foo, diff.foo.command = custom-script)
3. this file should use particular funcname patterns
(i.e., diff=foo, diff.foo.(x?)funcname = some-regex)
Most of the time, there is no conflict between these uses,
since using one implies that the other is irrelevant (e.g.,
an external diff engine will decide for itself whether the
file is binary).
However, there is at least one conflicting situation: there
is no way to say "use the regular rules to determine whether
this file is binary, but if we do diff it textually, use
this funcname pattern." That is, currently setting diff=foo
indicates that the file is definitely text.
This patch introduces a "binary" config option for a diff
driver, so that one can explicitly set diff.foo.binary. We
default this value to "don't know". That is, setting a diff
attribute to "foo" and using "diff.foo.funcname" will have
no effect on the binaryness of a file. To get the current
behavior, one can set diff.foo.binary to true.
This patch also has one additional advantage: it cleans up
the interface to the userdiff code a bit. Before, calling
code had to know more about whether attributes were false,
true, or unset to determine binaryness. Now that binaryness
is a property of a driver, we can represent these situations
just by passing back a driver struct.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2008-10-06 05:43:36 +08:00
|
|
|
1,
|
2008-10-06 05:43:21 +08:00
|
|
|
{ NULL, 0 }
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct userdiff_driver *userdiff_find_by_namelen(const char *k, int len)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < ndrivers; i++) {
|
|
|
|
struct userdiff_driver *drv = drivers + i;
|
|
|
|
if (!strncmp(drv->name, k, len) && !drv->name[len])
|
|
|
|
return drv;
|
|
|
|
}
|
|
|
|
for (i = 0; i < ARRAY_SIZE(builtin_drivers); i++) {
|
|
|
|
struct userdiff_driver *drv = builtin_drivers + i;
|
|
|
|
if (!strncmp(drv->name, k, len) && !drv->name[len])
|
|
|
|
return drv;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct userdiff_driver *parse_driver(const char *var,
|
|
|
|
const char *value, const char *type)
|
|
|
|
{
|
|
|
|
struct userdiff_driver *drv;
|
|
|
|
const char *dot;
|
|
|
|
const char *name;
|
|
|
|
int namelen;
|
|
|
|
|
|
|
|
if (prefixcmp(var, "diff."))
|
|
|
|
return NULL;
|
|
|
|
dot = strrchr(var, '.');
|
|
|
|
if (dot == var + 4)
|
|
|
|
return NULL;
|
|
|
|
if (strcmp(type, dot+1))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
name = var + 5;
|
|
|
|
namelen = dot - name;
|
|
|
|
drv = userdiff_find_by_namelen(name, namelen);
|
|
|
|
if (!drv) {
|
|
|
|
ALLOC_GROW(drivers, ndrivers+1, drivers_alloc);
|
|
|
|
drv = &drivers[ndrivers++];
|
|
|
|
memset(drv, 0, sizeof(*drv));
|
|
|
|
drv->name = xmemdupz(name, namelen);
|
diff: introduce diff.<driver>.binary
The "diff" gitattribute is somewhat overloaded right now. It
can say one of three things:
1. this file is definitely binary, or definitely not
(i.e., diff or !diff)
2. this file should use an external diff engine (i.e.,
diff=foo, diff.foo.command = custom-script)
3. this file should use particular funcname patterns
(i.e., diff=foo, diff.foo.(x?)funcname = some-regex)
Most of the time, there is no conflict between these uses,
since using one implies that the other is irrelevant (e.g.,
an external diff engine will decide for itself whether the
file is binary).
However, there is at least one conflicting situation: there
is no way to say "use the regular rules to determine whether
this file is binary, but if we do diff it textually, use
this funcname pattern." That is, currently setting diff=foo
indicates that the file is definitely text.
This patch introduces a "binary" config option for a diff
driver, so that one can explicitly set diff.foo.binary. We
default this value to "don't know". That is, setting a diff
attribute to "foo" and using "diff.foo.funcname" will have
no effect on the binaryness of a file. To get the current
behavior, one can set diff.foo.binary to true.
This patch also has one additional advantage: it cleans up
the interface to the userdiff code a bit. Before, calling
code had to know more about whether attributes were false,
true, or unset to determine binaryness. Now that binaryness
is a property of a driver, we can represent these situations
just by passing back a driver struct.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2008-10-06 05:43:36 +08:00
|
|
|
drv->binary = -1;
|
2008-10-06 05:43:21 +08:00
|
|
|
}
|
|
|
|
return drv;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int parse_funcname(struct userdiff_funcname *f, const char *k,
|
|
|
|
const char *v, int cflags)
|
|
|
|
{
|
|
|
|
if (git_config_string(&f->pattern, k, v) < 0)
|
|
|
|
return -1;
|
|
|
|
f->cflags = cflags;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int parse_string(const char **d, const char *k, const char *v)
|
|
|
|
{
|
|
|
|
if (git_config_string(d, k, v) < 0)
|
|
|
|
return -1;
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
diff: introduce diff.<driver>.binary
The "diff" gitattribute is somewhat overloaded right now. It
can say one of three things:
1. this file is definitely binary, or definitely not
(i.e., diff or !diff)
2. this file should use an external diff engine (i.e.,
diff=foo, diff.foo.command = custom-script)
3. this file should use particular funcname patterns
(i.e., diff=foo, diff.foo.(x?)funcname = some-regex)
Most of the time, there is no conflict between these uses,
since using one implies that the other is irrelevant (e.g.,
an external diff engine will decide for itself whether the
file is binary).
However, there is at least one conflicting situation: there
is no way to say "use the regular rules to determine whether
this file is binary, but if we do diff it textually, use
this funcname pattern." That is, currently setting diff=foo
indicates that the file is definitely text.
This patch introduces a "binary" config option for a diff
driver, so that one can explicitly set diff.foo.binary. We
default this value to "don't know". That is, setting a diff
attribute to "foo" and using "diff.foo.funcname" will have
no effect on the binaryness of a file. To get the current
behavior, one can set diff.foo.binary to true.
This patch also has one additional advantage: it cleans up
the interface to the userdiff code a bit. Before, calling
code had to know more about whether attributes were false,
true, or unset to determine binaryness. Now that binaryness
is a property of a driver, we can represent these situations
just by passing back a driver struct.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2008-10-06 05:43:36 +08:00
|
|
|
static int parse_tristate(int *b, const char *k, const char *v)
|
|
|
|
{
|
|
|
|
if (v && !strcasecmp(v, "auto"))
|
|
|
|
*b = -1;
|
|
|
|
else
|
|
|
|
*b = git_config_bool(k, v);
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2008-10-26 12:45:55 +08:00
|
|
|
int userdiff_config(const char *k, const char *v)
|
2008-10-06 05:43:21 +08:00
|
|
|
{
|
|
|
|
struct userdiff_driver *drv;
|
|
|
|
|
|
|
|
if ((drv = parse_driver(k, v, "funcname")))
|
|
|
|
return parse_funcname(&drv->funcname, k, v, 0);
|
|
|
|
if ((drv = parse_driver(k, v, "xfuncname")))
|
|
|
|
return parse_funcname(&drv->funcname, k, v, REG_EXTENDED);
|
diff: introduce diff.<driver>.binary
The "diff" gitattribute is somewhat overloaded right now. It
can say one of three things:
1. this file is definitely binary, or definitely not
(i.e., diff or !diff)
2. this file should use an external diff engine (i.e.,
diff=foo, diff.foo.command = custom-script)
3. this file should use particular funcname patterns
(i.e., diff=foo, diff.foo.(x?)funcname = some-regex)
Most of the time, there is no conflict between these uses,
since using one implies that the other is irrelevant (e.g.,
an external diff engine will decide for itself whether the
file is binary).
However, there is at least one conflicting situation: there
is no way to say "use the regular rules to determine whether
this file is binary, but if we do diff it textually, use
this funcname pattern." That is, currently setting diff=foo
indicates that the file is definitely text.
This patch introduces a "binary" config option for a diff
driver, so that one can explicitly set diff.foo.binary. We
default this value to "don't know". That is, setting a diff
attribute to "foo" and using "diff.foo.funcname" will have
no effect on the binaryness of a file. To get the current
behavior, one can set diff.foo.binary to true.
This patch also has one additional advantage: it cleans up
the interface to the userdiff code a bit. Before, calling
code had to know more about whether attributes were false,
true, or unset to determine binaryness. Now that binaryness
is a property of a driver, we can represent these situations
just by passing back a driver struct.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
2008-10-06 05:43:36 +08:00
|
|
|
if ((drv = parse_driver(k, v, "binary")))
|
|
|
|
return parse_tristate(&drv->binary, k, v);
|
2008-10-06 05:43:21 +08:00
|
|
|
if ((drv = parse_driver(k, v, "command")))
|
|
|
|
return parse_string(&drv->external, k, v);
|
2008-10-06 05:43:45 +08:00
|
|
|
if ((drv = parse_driver(k, v, "textconv")))
|
|
|
|
return parse_string(&drv->textconv, k, v);
|
2009-01-18 00:29:48 +08:00
|
|
|
if ((drv = parse_driver(k, v, "wordregex")))
|
|
|
|
return parse_string(&drv->word_regex, k, v);
|
2008-10-06 05:43:21 +08:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct userdiff_driver *userdiff_find_by_name(const char *name) {
|
|
|
|
int len = strlen(name);
|
|
|
|
return userdiff_find_by_namelen(name, len);
|
|
|
|
}
|
|
|
|
|
|
|
|
struct userdiff_driver *userdiff_find_by_path(const char *path)
|
|
|
|
{
|
|
|
|
static struct git_attr *attr;
|
|
|
|
struct git_attr_check check;
|
|
|
|
|
|
|
|
if (!attr)
|
|
|
|
attr = git_attr("diff", 4);
|
|
|
|
check.attr = attr;
|
|
|
|
|
|
|
|
if (!path)
|
|
|
|
return NULL;
|
|
|
|
if (git_checkattr(path, 1, &check))
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
if (ATTR_TRUE(check.value))
|
|
|
|
return &driver_true;
|
|
|
|
if (ATTR_FALSE(check.value))
|
|
|
|
return &driver_false;
|
|
|
|
if (ATTR_UNSET(check.value))
|
|
|
|
return NULL;
|
|
|
|
return userdiff_find_by_name(check.value);
|
|
|
|
}
|