2006-05-01 14:28:15 +08:00
|
|
|
/*
|
|
|
|
* Builtin "git grep"
|
|
|
|
*
|
|
|
|
* Copyright (c) 2006 Junio C Hamano
|
|
|
|
*/
|
|
|
|
#include "cache.h"
|
|
|
|
#include "blob.h"
|
|
|
|
#include "tree.h"
|
|
|
|
#include "commit.h"
|
|
|
|
#include "tag.h"
|
2006-05-02 06:58:29 +08:00
|
|
|
#include "tree-walk.h"
|
2006-05-01 14:28:15 +08:00
|
|
|
#include "builtin.h"
|
2009-05-08 03:46:48 +08:00
|
|
|
#include "parse-options.h"
|
2009-07-02 06:07:24 +08:00
|
|
|
#include "userdiff.h"
|
2006-09-18 07:02:52 +08:00
|
|
|
#include "grep.h"
|
2009-09-05 20:31:17 +08:00
|
|
|
#include "quote.h"
|
2010-01-16 04:52:40 +08:00
|
|
|
#include "dir.h"
|
2008-03-13 05:39:16 +08:00
|
|
|
|
2009-05-08 03:46:48 +08:00
|
|
|
static char const * const grep_usage[] = {
|
|
|
|
"git grep [options] [-e] <pattern> [<rev>...] [[--] path...]",
|
|
|
|
NULL
|
|
|
|
};
|
|
|
|
|
2009-03-07 20:32:32 +08:00
|
|
|
static int grep_config(const char *var, const char *value, void *cb)
|
|
|
|
{
|
|
|
|
struct grep_opt *opt = cb;
|
|
|
|
|
2009-07-02 06:07:24 +08:00
|
|
|
switch (userdiff_config(var, value)) {
|
|
|
|
case 0: break;
|
|
|
|
case -1: return -1;
|
|
|
|
default: return 0;
|
|
|
|
}
|
|
|
|
|
2009-04-21 06:58:15 +08:00
|
|
|
if (!strcmp(var, "color.grep")) {
|
2009-03-07 20:32:32 +08:00
|
|
|
opt->color = git_config_colorbool(var, value, -1);
|
|
|
|
return 0;
|
|
|
|
}
|
2009-04-21 06:58:15 +08:00
|
|
|
if (!strcmp(var, "color.grep.match")) {
|
2009-03-07 20:32:32 +08:00
|
|
|
if (!value)
|
|
|
|
return config_error_nonbool(var);
|
|
|
|
color_parse(value, var, opt->color_match);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return git_color_default_config(var, value, cb);
|
|
|
|
}
|
|
|
|
|
grep: Add --max-depth option.
It is useful to grep directories non-recursively, e.g. when one wants to
look for all files in the toplevel directory, but not in any subdirectory,
or in Documentation/, but not in Documentation/technical/.
This patch adds support for --max-depth <depth> option to git-grep. If it is
given, git-grep descends at most <depth> levels of directories below paths
specified on the command line.
Note that if path specified on command line contains wildcards, this option
makes no sense, e.g.
$ git grep -l --max-depth 0 GNU -- 'contrib/*'
(note the quotes) will search all files in contrib/, even in
subdirectories, because '*' matches all files.
Documentation updates, bash-completion and simple test cases are also
provided.
Signed-off-by: Michał Kiedrowicz <michal.kiedrowicz@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-07-23 01:52:15 +08:00
|
|
|
/*
|
|
|
|
* Return non-zero if max_depth is negative or path has no more then max_depth
|
|
|
|
* slashes.
|
|
|
|
*/
|
|
|
|
static int accept_subdir(const char *path, int max_depth)
|
|
|
|
{
|
|
|
|
if (max_depth < 0)
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
while ((path = strchr(path, '/')) != NULL) {
|
|
|
|
max_depth--;
|
|
|
|
if (max_depth < 0)
|
|
|
|
return 0;
|
|
|
|
path++;
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Return non-zero if name is a subdirectory of match and is not too deep.
|
|
|
|
*/
|
|
|
|
static int is_subdir(const char *name, int namelen,
|
|
|
|
const char *match, int matchlen, int max_depth)
|
|
|
|
{
|
|
|
|
if (matchlen > namelen || strncmp(name, match, matchlen))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (name[matchlen] == '\0') /* exact match */
|
|
|
|
return 1;
|
|
|
|
|
|
|
|
if (!matchlen || match[matchlen-1] == '/' || name[matchlen] == '/')
|
|
|
|
return accept_subdir(name + matchlen + 1, max_depth);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2006-05-02 03:27:56 +08:00
|
|
|
/*
|
|
|
|
* git grep pathspecs are somewhat different from diff-tree pathspecs;
|
|
|
|
* pathname wildcards are allowed.
|
|
|
|
*/
|
grep: Add --max-depth option.
It is useful to grep directories non-recursively, e.g. when one wants to
look for all files in the toplevel directory, but not in any subdirectory,
or in Documentation/, but not in Documentation/technical/.
This patch adds support for --max-depth <depth> option to git-grep. If it is
given, git-grep descends at most <depth> levels of directories below paths
specified on the command line.
Note that if path specified on command line contains wildcards, this option
makes no sense, e.g.
$ git grep -l --max-depth 0 GNU -- 'contrib/*'
(note the quotes) will search all files in contrib/, even in
subdirectories, because '*' matches all files.
Documentation updates, bash-completion and simple test cases are also
provided.
Signed-off-by: Michał Kiedrowicz <michal.kiedrowicz@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-07-23 01:52:15 +08:00
|
|
|
static int pathspec_matches(const char **paths, const char *name, int max_depth)
|
2006-05-01 14:28:15 +08:00
|
|
|
{
|
2006-05-02 03:27:56 +08:00
|
|
|
int namelen, i;
|
2006-05-02 06:58:29 +08:00
|
|
|
if (!paths || !*paths)
|
grep: Add --max-depth option.
It is useful to grep directories non-recursively, e.g. when one wants to
look for all files in the toplevel directory, but not in any subdirectory,
or in Documentation/, but not in Documentation/technical/.
This patch adds support for --max-depth <depth> option to git-grep. If it is
given, git-grep descends at most <depth> levels of directories below paths
specified on the command line.
Note that if path specified on command line contains wildcards, this option
makes no sense, e.g.
$ git grep -l --max-depth 0 GNU -- 'contrib/*'
(note the quotes) will search all files in contrib/, even in
subdirectories, because '*' matches all files.
Documentation updates, bash-completion and simple test cases are also
provided.
Signed-off-by: Michał Kiedrowicz <michal.kiedrowicz@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-07-23 01:52:15 +08:00
|
|
|
return accept_subdir(name, max_depth);
|
2006-05-01 14:28:15 +08:00
|
|
|
namelen = strlen(name);
|
2006-05-02 06:58:29 +08:00
|
|
|
for (i = 0; paths[i]; i++) {
|
|
|
|
const char *match = paths[i];
|
|
|
|
int matchlen = strlen(match);
|
2006-05-03 08:27:07 +08:00
|
|
|
const char *cp, *meta;
|
2006-05-02 03:27:56 +08:00
|
|
|
|
grep: Add --max-depth option.
It is useful to grep directories non-recursively, e.g. when one wants to
look for all files in the toplevel directory, but not in any subdirectory,
or in Documentation/, but not in Documentation/technical/.
This patch adds support for --max-depth <depth> option to git-grep. If it is
given, git-grep descends at most <depth> levels of directories below paths
specified on the command line.
Note that if path specified on command line contains wildcards, this option
makes no sense, e.g.
$ git grep -l --max-depth 0 GNU -- 'contrib/*'
(note the quotes) will search all files in contrib/, even in
subdirectories, because '*' matches all files.
Documentation updates, bash-completion and simple test cases are also
provided.
Signed-off-by: Michał Kiedrowicz <michal.kiedrowicz@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-07-23 01:52:15 +08:00
|
|
|
if (is_subdir(name, namelen, match, matchlen, max_depth))
|
2006-05-02 03:27:56 +08:00
|
|
|
return 1;
|
|
|
|
if (!fnmatch(match, name, 0))
|
|
|
|
return 1;
|
|
|
|
if (name[namelen-1] != '/')
|
2006-05-01 14:28:15 +08:00
|
|
|
continue;
|
2006-05-02 03:27:56 +08:00
|
|
|
|
2006-05-03 08:27:07 +08:00
|
|
|
/* We are being asked if the directory ("name") is worth
|
2006-05-02 03:27:56 +08:00
|
|
|
* descending into.
|
|
|
|
*
|
|
|
|
* Find the longest leading directory name that does
|
|
|
|
* not have metacharacter in the pathspec; the name
|
|
|
|
* we are looking at must overlap with that directory.
|
2006-05-01 14:28:15 +08:00
|
|
|
*/
|
2006-05-03 08:27:07 +08:00
|
|
|
for (cp = match, meta = NULL; cp - match < matchlen; cp++) {
|
2006-05-02 03:27:56 +08:00
|
|
|
char ch = *cp;
|
2006-05-03 08:27:07 +08:00
|
|
|
if (ch == '*' || ch == '[' || ch == '?') {
|
|
|
|
meta = cp;
|
2006-05-01 14:28:15 +08:00
|
|
|
break;
|
2006-05-03 08:27:07 +08:00
|
|
|
}
|
2006-05-02 03:27:56 +08:00
|
|
|
}
|
2006-05-03 08:27:07 +08:00
|
|
|
if (!meta)
|
|
|
|
meta = cp; /* fully literal */
|
|
|
|
|
|
|
|
if (namelen <= meta - match) {
|
2006-05-02 03:27:56 +08:00
|
|
|
/* Looking at "Documentation/" and
|
|
|
|
* the pattern says "Documentation/howto/", or
|
2006-05-03 08:27:07 +08:00
|
|
|
* "Documentation/diff*.txt". The name we
|
|
|
|
* have should match prefix.
|
2006-05-02 03:27:56 +08:00
|
|
|
*/
|
|
|
|
if (!memcmp(match, name, namelen))
|
|
|
|
return 1;
|
2006-05-03 08:27:07 +08:00
|
|
|
continue;
|
2006-05-02 03:27:56 +08:00
|
|
|
}
|
2006-05-03 08:27:07 +08:00
|
|
|
|
|
|
|
if (meta - match < namelen) {
|
2006-05-02 03:27:56 +08:00
|
|
|
/* Looking at "Documentation/howto/" and
|
2006-05-03 08:27:07 +08:00
|
|
|
* the pattern says "Documentation/h*";
|
|
|
|
* match up to "Do.../h"; this avoids descending
|
|
|
|
* into "Documentation/technical/".
|
2006-05-02 03:27:56 +08:00
|
|
|
*/
|
2006-05-03 08:27:07 +08:00
|
|
|
if (!memcmp(match, name, meta - match))
|
2006-05-02 03:27:56 +08:00
|
|
|
return 1;
|
2006-05-03 08:27:07 +08:00
|
|
|
continue;
|
2006-05-02 03:27:56 +08:00
|
|
|
}
|
2006-05-01 14:28:15 +08:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2006-08-11 15:44:42 +08:00
|
|
|
static int grep_sha1(struct grep_opt *opt, const unsigned char *sha1, const char *name, int tree_name_len)
|
2006-05-01 14:28:15 +08:00
|
|
|
{
|
|
|
|
unsigned long size;
|
|
|
|
char *data;
|
2007-02-27 03:55:59 +08:00
|
|
|
enum object_type type;
|
2006-05-01 14:28:15 +08:00
|
|
|
int hit;
|
2009-09-05 20:31:17 +08:00
|
|
|
struct strbuf pathbuf = STRBUF_INIT;
|
2006-08-11 15:44:42 +08:00
|
|
|
|
2007-02-27 03:55:59 +08:00
|
|
|
data = read_sha1_file(sha1, &type, &size);
|
2006-05-01 14:28:15 +08:00
|
|
|
if (!data) {
|
|
|
|
error("'%s': unable to read %s", name, sha1_to_hex(sha1));
|
|
|
|
return 0;
|
|
|
|
}
|
2006-08-11 15:44:42 +08:00
|
|
|
if (opt->relative && opt->prefix_length) {
|
2009-09-05 20:31:17 +08:00
|
|
|
quote_path_relative(name + tree_name_len, -1, &pathbuf, opt->prefix);
|
|
|
|
strbuf_insert(&pathbuf, 0, name, tree_name_len);
|
|
|
|
name = pathbuf.buf;
|
2006-08-11 15:44:42 +08:00
|
|
|
}
|
2006-05-01 14:28:15 +08:00
|
|
|
hit = grep_buffer(opt, name, data, size);
|
2009-09-05 20:31:17 +08:00
|
|
|
strbuf_release(&pathbuf);
|
2006-05-01 14:28:15 +08:00
|
|
|
free(data);
|
|
|
|
return hit;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int grep_file(struct grep_opt *opt, const char *filename)
|
|
|
|
{
|
|
|
|
struct stat st;
|
|
|
|
int i;
|
|
|
|
char *data;
|
2007-03-07 09:44:37 +08:00
|
|
|
size_t sz;
|
2009-09-05 20:31:17 +08:00
|
|
|
struct strbuf buf = STRBUF_INIT;
|
2007-03-07 09:44:37 +08:00
|
|
|
|
2006-05-01 14:28:15 +08:00
|
|
|
if (lstat(filename, &st) < 0) {
|
|
|
|
err_ret:
|
|
|
|
if (errno != ENOENT)
|
|
|
|
error("'%s': %s", filename, strerror(errno));
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
if (!S_ISREG(st.st_mode))
|
|
|
|
return 0;
|
2007-03-07 09:44:37 +08:00
|
|
|
sz = xsize_t(st.st_size);
|
2006-05-01 14:28:15 +08:00
|
|
|
i = open(filename, O_RDONLY);
|
|
|
|
if (i < 0)
|
|
|
|
goto err_ret;
|
2007-03-07 09:44:37 +08:00
|
|
|
data = xmalloc(sz + 1);
|
|
|
|
if (st.st_size != read_in_full(i, data, sz)) {
|
2006-05-01 14:28:15 +08:00
|
|
|
error("'%s': short read %s", filename, strerror(errno));
|
|
|
|
close(i);
|
|
|
|
free(data);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
close(i);
|
2010-01-19 05:55:07 +08:00
|
|
|
data[sz] = 0;
|
2006-08-11 15:44:42 +08:00
|
|
|
if (opt->relative && opt->prefix_length)
|
2009-09-05 20:31:17 +08:00
|
|
|
filename = quote_path_relative(filename, -1, &buf, opt->prefix);
|
2007-03-07 09:44:37 +08:00
|
|
|
i = grep_buffer(opt, filename, data, sz);
|
2009-09-05 20:31:17 +08:00
|
|
|
strbuf_release(&buf);
|
2006-05-01 14:28:15 +08:00
|
|
|
free(data);
|
|
|
|
return i;
|
|
|
|
}
|
|
|
|
|
2010-01-13 11:06:41 +08:00
|
|
|
static int grep_cache(struct grep_opt *opt, const char **paths, int cached)
|
2006-05-01 14:28:15 +08:00
|
|
|
{
|
|
|
|
int hit = 0;
|
|
|
|
int nr;
|
|
|
|
read_cache();
|
|
|
|
|
|
|
|
for (nr = 0; nr < active_nr; nr++) {
|
|
|
|
struct cache_entry *ce = active_cache[nr];
|
2008-01-15 08:03:17 +08:00
|
|
|
if (!S_ISREG(ce->ce_mode))
|
2006-05-01 14:28:15 +08:00
|
|
|
continue;
|
grep: Add --max-depth option.
It is useful to grep directories non-recursively, e.g. when one wants to
look for all files in the toplevel directory, but not in any subdirectory,
or in Documentation/, but not in Documentation/technical/.
This patch adds support for --max-depth <depth> option to git-grep. If it is
given, git-grep descends at most <depth> levels of directories below paths
specified on the command line.
Note that if path specified on command line contains wildcards, this option
makes no sense, e.g.
$ git grep -l --max-depth 0 GNU -- 'contrib/*'
(note the quotes) will search all files in contrib/, even in
subdirectories, because '*' matches all files.
Documentation updates, bash-completion and simple test cases are also
provided.
Signed-off-by: Michał Kiedrowicz <michal.kiedrowicz@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-07-23 01:52:15 +08:00
|
|
|
if (!pathspec_matches(paths, ce->name, opt->max_depth))
|
2006-05-01 14:28:15 +08:00
|
|
|
continue;
|
2008-12-27 16:21:03 +08:00
|
|
|
/*
|
|
|
|
* If CE_VALID is on, we assume worktree file and its cache entry
|
|
|
|
* are identical, even if worktree file has been modified, so use
|
|
|
|
* cache version instead
|
|
|
|
*/
|
2009-08-20 21:46:58 +08:00
|
|
|
if (cached || (ce->ce_flags & CE_VALID) || ce_skip_worktree(ce)) {
|
2006-11-27 04:47:52 +08:00
|
|
|
if (ce_stage(ce))
|
|
|
|
continue;
|
2006-08-11 15:44:42 +08:00
|
|
|
hit |= grep_sha1(opt, ce->sha1, ce->name, 0);
|
2006-11-27 04:47:52 +08:00
|
|
|
}
|
2006-05-01 14:28:15 +08:00
|
|
|
else
|
|
|
|
hit |= grep_file(opt, ce->name);
|
2006-11-27 04:47:52 +08:00
|
|
|
if (ce_stage(ce)) {
|
|
|
|
do {
|
|
|
|
nr++;
|
|
|
|
} while (nr < active_nr &&
|
|
|
|
!strcmp(ce->name, active_cache[nr]->name));
|
|
|
|
nr--; /* compensate for loop control */
|
|
|
|
}
|
2006-05-01 14:28:15 +08:00
|
|
|
}
|
2006-09-28 07:27:10 +08:00
|
|
|
free_grep_patterns(opt);
|
2006-05-01 14:28:15 +08:00
|
|
|
return hit;
|
|
|
|
}
|
|
|
|
|
2006-05-02 06:58:29 +08:00
|
|
|
static int grep_tree(struct grep_opt *opt, const char **paths,
|
2006-05-01 14:28:15 +08:00
|
|
|
struct tree_desc *tree,
|
|
|
|
const char *tree_name, const char *base)
|
|
|
|
{
|
|
|
|
int len;
|
|
|
|
int hit = 0;
|
tree_entry(): new tree-walking helper function
This adds a "tree_entry()" function that combines the common operation of
doing a "tree_entry_extract()" + "update_tree_entry()".
It also has a simplified calling convention, designed for simple loops
that traverse over a whole tree: the arguments are pointers to the tree
descriptor and a name_entry structure to fill in, and it returns a boolean
"true" if there was an entry left to be gotten in the tree.
This allows tree traversal with
struct tree_desc desc;
struct name_entry entry;
desc.buf = tree->buffer;
desc.size = tree->size;
while (tree_entry(&desc, &entry) {
... use "entry.{path, sha1, mode, pathlen}" ...
}
which is not only shorter than writing it out in full, it's hopefully less
error prone too.
[ It's actually a tad faster too - we don't need to recalculate the entry
pathlength in both extract and update, but need to do it only once.
Also, some callers can avoid doing a "strlen()" on the result, since
it's returned as part of the name_entry structure.
However, by now we're talking just 1% speedup on "git-rev-list --objects
--all", and we're definitely at the point where tree walking is no
longer the issue any more. ]
NOTE! Not everybody wants to use this new helper function, since some of
the tree walkers very much on purpose do the descriptor update separately
from the entry extraction. So the "extract + update" sequence still
remains as the core sequence, this is just a simplified interface.
We should probably add a silly two-line inline helper function for
initializing the descriptor from the "struct tree" too, just to cut down
on the noise from that common "desc" initializer.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-05-31 00:45:45 +08:00
|
|
|
struct name_entry entry;
|
2006-05-02 03:27:56 +08:00
|
|
|
char *down;
|
2006-08-11 15:44:42 +08:00
|
|
|
int tn_len = strlen(tree_name);
|
2008-07-16 23:33:29 +08:00
|
|
|
struct strbuf pathbuf;
|
|
|
|
|
|
|
|
strbuf_init(&pathbuf, PATH_MAX + tn_len);
|
2006-05-01 14:28:15 +08:00
|
|
|
|
2006-08-11 15:44:42 +08:00
|
|
|
if (tn_len) {
|
2008-07-16 23:33:29 +08:00
|
|
|
strbuf_add(&pathbuf, tree_name, tn_len);
|
|
|
|
strbuf_addch(&pathbuf, ':');
|
|
|
|
tn_len = pathbuf.len;
|
2006-05-01 14:28:15 +08:00
|
|
|
}
|
2008-07-16 23:33:29 +08:00
|
|
|
strbuf_addstr(&pathbuf, base);
|
|
|
|
len = pathbuf.len;
|
2006-05-01 14:28:15 +08:00
|
|
|
|
tree_entry(): new tree-walking helper function
This adds a "tree_entry()" function that combines the common operation of
doing a "tree_entry_extract()" + "update_tree_entry()".
It also has a simplified calling convention, designed for simple loops
that traverse over a whole tree: the arguments are pointers to the tree
descriptor and a name_entry structure to fill in, and it returns a boolean
"true" if there was an entry left to be gotten in the tree.
This allows tree traversal with
struct tree_desc desc;
struct name_entry entry;
desc.buf = tree->buffer;
desc.size = tree->size;
while (tree_entry(&desc, &entry) {
... use "entry.{path, sha1, mode, pathlen}" ...
}
which is not only shorter than writing it out in full, it's hopefully less
error prone too.
[ It's actually a tad faster too - we don't need to recalculate the entry
pathlength in both extract and update, but need to do it only once.
Also, some callers can avoid doing a "strlen()" on the result, since
it's returned as part of the name_entry structure.
However, by now we're talking just 1% speedup on "git-rev-list --objects
--all", and we're definitely at the point where tree walking is no
longer the issue any more. ]
NOTE! Not everybody wants to use this new helper function, since some of
the tree walkers very much on purpose do the descriptor update separately
from the entry extraction. So the "extract + update" sequence still
remains as the core sequence, this is just a simplified interface.
We should probably add a silly two-line inline helper function for
initializing the descriptor from the "struct tree" too, just to cut down
on the noise from that common "desc" initializer.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-05-31 00:45:45 +08:00
|
|
|
while (tree_entry(tree, &entry)) {
|
2008-07-16 23:33:29 +08:00
|
|
|
int te_len = tree_entry_len(entry.path, entry.sha1);
|
|
|
|
pathbuf.len = len;
|
|
|
|
strbuf_add(&pathbuf, entry.path, te_len);
|
2006-05-01 14:28:15 +08:00
|
|
|
|
tree_entry(): new tree-walking helper function
This adds a "tree_entry()" function that combines the common operation of
doing a "tree_entry_extract()" + "update_tree_entry()".
It also has a simplified calling convention, designed for simple loops
that traverse over a whole tree: the arguments are pointers to the tree
descriptor and a name_entry structure to fill in, and it returns a boolean
"true" if there was an entry left to be gotten in the tree.
This allows tree traversal with
struct tree_desc desc;
struct name_entry entry;
desc.buf = tree->buffer;
desc.size = tree->size;
while (tree_entry(&desc, &entry) {
... use "entry.{path, sha1, mode, pathlen}" ...
}
which is not only shorter than writing it out in full, it's hopefully less
error prone too.
[ It's actually a tad faster too - we don't need to recalculate the entry
pathlength in both extract and update, but need to do it only once.
Also, some callers can avoid doing a "strlen()" on the result, since
it's returned as part of the name_entry structure.
However, by now we're talking just 1% speedup on "git-rev-list --objects
--all", and we're definitely at the point where tree walking is no
longer the issue any more. ]
NOTE! Not everybody wants to use this new helper function, since some of
the tree walkers very much on purpose do the descriptor update separately
from the entry extraction. So the "extract + update" sequence still
remains as the core sequence, this is just a simplified interface.
We should probably add a silly two-line inline helper function for
initializing the descriptor from the "struct tree" too, just to cut down
on the noise from that common "desc" initializer.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-05-31 00:45:45 +08:00
|
|
|
if (S_ISDIR(entry.mode))
|
2006-05-02 03:27:56 +08:00
|
|
|
/* Match "abc/" against pathspec to
|
|
|
|
* decide if we want to descend into "abc"
|
|
|
|
* directory.
|
|
|
|
*/
|
2008-07-16 23:33:29 +08:00
|
|
|
strbuf_addch(&pathbuf, '/');
|
2006-05-02 03:27:56 +08:00
|
|
|
|
2008-07-16 23:33:29 +08:00
|
|
|
down = pathbuf.buf + tn_len;
|
grep: Add --max-depth option.
It is useful to grep directories non-recursively, e.g. when one wants to
look for all files in the toplevel directory, but not in any subdirectory,
or in Documentation/, but not in Documentation/technical/.
This patch adds support for --max-depth <depth> option to git-grep. If it is
given, git-grep descends at most <depth> levels of directories below paths
specified on the command line.
Note that if path specified on command line contains wildcards, this option
makes no sense, e.g.
$ git grep -l --max-depth 0 GNU -- 'contrib/*'
(note the quotes) will search all files in contrib/, even in
subdirectories, because '*' matches all files.
Documentation updates, bash-completion and simple test cases are also
provided.
Signed-off-by: Michał Kiedrowicz <michal.kiedrowicz@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-07-23 01:52:15 +08:00
|
|
|
if (!pathspec_matches(paths, down, opt->max_depth))
|
2006-05-01 14:28:15 +08:00
|
|
|
;
|
tree_entry(): new tree-walking helper function
This adds a "tree_entry()" function that combines the common operation of
doing a "tree_entry_extract()" + "update_tree_entry()".
It also has a simplified calling convention, designed for simple loops
that traverse over a whole tree: the arguments are pointers to the tree
descriptor and a name_entry structure to fill in, and it returns a boolean
"true" if there was an entry left to be gotten in the tree.
This allows tree traversal with
struct tree_desc desc;
struct name_entry entry;
desc.buf = tree->buffer;
desc.size = tree->size;
while (tree_entry(&desc, &entry) {
... use "entry.{path, sha1, mode, pathlen}" ...
}
which is not only shorter than writing it out in full, it's hopefully less
error prone too.
[ It's actually a tad faster too - we don't need to recalculate the entry
pathlength in both extract and update, but need to do it only once.
Also, some callers can avoid doing a "strlen()" on the result, since
it's returned as part of the name_entry structure.
However, by now we're talking just 1% speedup on "git-rev-list --objects
--all", and we're definitely at the point where tree walking is no
longer the issue any more. ]
NOTE! Not everybody wants to use this new helper function, since some of
the tree walkers very much on purpose do the descriptor update separately
from the entry extraction. So the "extract + update" sequence still
remains as the core sequence, this is just a simplified interface.
We should probably add a silly two-line inline helper function for
initializing the descriptor from the "struct tree" too, just to cut down
on the noise from that common "desc" initializer.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-05-31 00:45:45 +08:00
|
|
|
else if (S_ISREG(entry.mode))
|
2008-07-16 23:33:29 +08:00
|
|
|
hit |= grep_sha1(opt, entry.sha1, pathbuf.buf, tn_len);
|
tree_entry(): new tree-walking helper function
This adds a "tree_entry()" function that combines the common operation of
doing a "tree_entry_extract()" + "update_tree_entry()".
It also has a simplified calling convention, designed for simple loops
that traverse over a whole tree: the arguments are pointers to the tree
descriptor and a name_entry structure to fill in, and it returns a boolean
"true" if there was an entry left to be gotten in the tree.
This allows tree traversal with
struct tree_desc desc;
struct name_entry entry;
desc.buf = tree->buffer;
desc.size = tree->size;
while (tree_entry(&desc, &entry) {
... use "entry.{path, sha1, mode, pathlen}" ...
}
which is not only shorter than writing it out in full, it's hopefully less
error prone too.
[ It's actually a tad faster too - we don't need to recalculate the entry
pathlength in both extract and update, but need to do it only once.
Also, some callers can avoid doing a "strlen()" on the result, since
it's returned as part of the name_entry structure.
However, by now we're talking just 1% speedup on "git-rev-list --objects
--all", and we're definitely at the point where tree walking is no
longer the issue any more. ]
NOTE! Not everybody wants to use this new helper function, since some of
the tree walkers very much on purpose do the descriptor update separately
from the entry extraction. So the "extract + update" sequence still
remains as the core sequence, this is just a simplified interface.
We should probably add a silly two-line inline helper function for
initializing the descriptor from the "struct tree" too, just to cut down
on the noise from that common "desc" initializer.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-05-31 00:45:45 +08:00
|
|
|
else if (S_ISDIR(entry.mode)) {
|
2007-02-27 03:55:59 +08:00
|
|
|
enum object_type type;
|
2006-05-01 14:28:15 +08:00
|
|
|
struct tree_desc sub;
|
|
|
|
void *data;
|
2007-03-22 01:08:25 +08:00
|
|
|
unsigned long size;
|
|
|
|
|
|
|
|
data = read_sha1_file(entry.sha1, &type, &size);
|
2006-05-01 14:28:15 +08:00
|
|
|
if (!data)
|
|
|
|
die("unable to read tree (%s)",
|
tree_entry(): new tree-walking helper function
This adds a "tree_entry()" function that combines the common operation of
doing a "tree_entry_extract()" + "update_tree_entry()".
It also has a simplified calling convention, designed for simple loops
that traverse over a whole tree: the arguments are pointers to the tree
descriptor and a name_entry structure to fill in, and it returns a boolean
"true" if there was an entry left to be gotten in the tree.
This allows tree traversal with
struct tree_desc desc;
struct name_entry entry;
desc.buf = tree->buffer;
desc.size = tree->size;
while (tree_entry(&desc, &entry) {
... use "entry.{path, sha1, mode, pathlen}" ...
}
which is not only shorter than writing it out in full, it's hopefully less
error prone too.
[ It's actually a tad faster too - we don't need to recalculate the entry
pathlength in both extract and update, but need to do it only once.
Also, some callers can avoid doing a "strlen()" on the result, since
it's returned as part of the name_entry structure.
However, by now we're talking just 1% speedup on "git-rev-list --objects
--all", and we're definitely at the point where tree walking is no
longer the issue any more. ]
NOTE! Not everybody wants to use this new helper function, since some of
the tree walkers very much on purpose do the descriptor update separately
from the entry extraction. So the "extract + update" sequence still
remains as the core sequence, this is just a simplified interface.
We should probably add a silly two-line inline helper function for
initializing the descriptor from the "struct tree" too, just to cut down
on the noise from that common "desc" initializer.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-05-31 00:45:45 +08:00
|
|
|
sha1_to_hex(entry.sha1));
|
2007-03-22 01:08:25 +08:00
|
|
|
init_tree_desc(&sub, data, size);
|
2006-05-02 06:58:29 +08:00
|
|
|
hit |= grep_tree(opt, paths, &sub, tree_name, down);
|
2006-05-01 14:28:15 +08:00
|
|
|
free(data);
|
|
|
|
}
|
|
|
|
}
|
2008-07-16 23:33:29 +08:00
|
|
|
strbuf_release(&pathbuf);
|
2006-05-01 14:28:15 +08:00
|
|
|
return hit;
|
|
|
|
}
|
|
|
|
|
2006-05-02 06:58:29 +08:00
|
|
|
static int grep_object(struct grep_opt *opt, const char **paths,
|
2006-05-01 14:28:15 +08:00
|
|
|
struct object *obj, const char *name)
|
|
|
|
{
|
2006-07-12 11:45:31 +08:00
|
|
|
if (obj->type == OBJ_BLOB)
|
2006-08-11 15:44:42 +08:00
|
|
|
return grep_sha1(opt, obj->sha1, name, 0);
|
2006-07-12 11:45:31 +08:00
|
|
|
if (obj->type == OBJ_COMMIT || obj->type == OBJ_TREE) {
|
2006-05-01 14:28:15 +08:00
|
|
|
struct tree_desc tree;
|
|
|
|
void *data;
|
2007-03-22 01:08:25 +08:00
|
|
|
unsigned long size;
|
2006-05-01 14:28:15 +08:00
|
|
|
int hit;
|
|
|
|
data = read_object_with_reference(obj->sha1, tree_type,
|
2007-03-22 01:08:25 +08:00
|
|
|
&size, NULL);
|
2006-05-01 14:28:15 +08:00
|
|
|
if (!data)
|
|
|
|
die("unable to read tree (%s)", sha1_to_hex(obj->sha1));
|
2007-03-22 01:08:25 +08:00
|
|
|
init_tree_desc(&tree, data, size);
|
2006-05-02 06:58:29 +08:00
|
|
|
hit = grep_tree(opt, paths, &tree, name, "");
|
2006-05-01 14:28:15 +08:00
|
|
|
free(data);
|
|
|
|
return hit;
|
|
|
|
}
|
Shrink "struct object" a bit
This shrinks "struct object" by a small amount, by getting rid of the
"struct type *" pointer and replacing it with a 3-bit bitfield instead.
In addition, we merge the bitfields and the "flags" field, which
incidentally should also remove a useless 4-byte padding from the object
when in 64-bit mode.
Now, our "struct object" is still too damn large, but it's now less
obviously bloated, and of the remaining fields, only the "util" (which is
not used by most things) is clearly something that should be eventually
discarded.
This shrinks the "git-rev-list --all" memory use by about 2.5% on the
kernel archive (and, perhaps more importantly, on the larger mozilla
archive). That may not sound like much, but I suspect it's more on a
64-bit platform.
There are other remaining inefficiencies (the parent lists, for example,
probably have horrible malloc overhead), but this was pretty obvious.
Most of the patch is just changing the comparison of the "type" pointer
from one of the constant string pointers to the appropriate new TYPE_xxx
small integer constant.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-15 07:45:13 +08:00
|
|
|
die("unable to grep from object of type %s", typename(obj->type));
|
2006-05-01 14:28:15 +08:00
|
|
|
}
|
|
|
|
|
2010-01-16 04:52:40 +08:00
|
|
|
static int grep_directory(struct grep_opt *opt, const char **paths)
|
|
|
|
{
|
|
|
|
struct dir_struct dir;
|
|
|
|
int i, hit = 0;
|
|
|
|
|
|
|
|
memset(&dir, 0, sizeof(dir));
|
|
|
|
setup_standard_excludes(&dir);
|
|
|
|
|
|
|
|
fill_directory(&dir, paths);
|
|
|
|
for (i = 0; i < dir.nr; i++)
|
|
|
|
hit |= grep_file(opt, dir.entries[i]->name);
|
|
|
|
free_grep_patterns(opt);
|
|
|
|
return hit;
|
|
|
|
}
|
|
|
|
|
2009-05-21 06:05:22 +08:00
|
|
|
static int context_callback(const struct option *opt, const char *arg,
|
|
|
|
int unset)
|
2009-05-08 03:46:48 +08:00
|
|
|
{
|
|
|
|
struct grep_opt *grep_opt = opt->value;
|
|
|
|
int value;
|
|
|
|
const char *endp;
|
|
|
|
|
|
|
|
if (unset) {
|
|
|
|
grep_opt->pre_context = grep_opt->post_context = 0;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
value = strtol(arg, (char **)&endp, 10);
|
|
|
|
if (*endp) {
|
|
|
|
return error("switch `%c' expects a numerical value",
|
|
|
|
opt->short_name);
|
|
|
|
}
|
|
|
|
grep_opt->pre_context = grep_opt->post_context = value;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-05-21 06:05:22 +08:00
|
|
|
static int file_callback(const struct option *opt, const char *arg, int unset)
|
2009-05-08 03:46:48 +08:00
|
|
|
{
|
|
|
|
struct grep_opt *grep_opt = opt->value;
|
|
|
|
FILE *patterns;
|
|
|
|
int lno = 0;
|
2009-10-16 22:13:25 +08:00
|
|
|
struct strbuf sb = STRBUF_INIT;
|
2009-05-08 03:46:48 +08:00
|
|
|
|
|
|
|
patterns = fopen(arg, "r");
|
|
|
|
if (!patterns)
|
2009-06-27 23:58:46 +08:00
|
|
|
die_errno("cannot open '%s'", arg);
|
2009-05-08 03:46:48 +08:00
|
|
|
while (strbuf_getline(&sb, patterns, '\n') == 0) {
|
|
|
|
/* ignore empty line like grep does */
|
|
|
|
if (sb.len == 0)
|
|
|
|
continue;
|
|
|
|
append_grep_pattern(grep_opt, strbuf_detach(&sb, NULL), arg,
|
|
|
|
++lno, GREP_PATTERN);
|
|
|
|
}
|
|
|
|
fclose(patterns);
|
|
|
|
strbuf_release(&sb);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-05-21 06:05:22 +08:00
|
|
|
static int not_callback(const struct option *opt, const char *arg, int unset)
|
2009-05-08 03:46:48 +08:00
|
|
|
{
|
|
|
|
struct grep_opt *grep_opt = opt->value;
|
|
|
|
append_grep_pattern(grep_opt, "--not", "command line", 0, GREP_NOT);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-05-21 06:05:22 +08:00
|
|
|
static int and_callback(const struct option *opt, const char *arg, int unset)
|
2009-05-08 03:46:48 +08:00
|
|
|
{
|
|
|
|
struct grep_opt *grep_opt = opt->value;
|
|
|
|
append_grep_pattern(grep_opt, "--and", "command line", 0, GREP_AND);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-05-21 06:05:22 +08:00
|
|
|
static int open_callback(const struct option *opt, const char *arg, int unset)
|
2009-05-08 03:46:48 +08:00
|
|
|
{
|
|
|
|
struct grep_opt *grep_opt = opt->value;
|
|
|
|
append_grep_pattern(grep_opt, "(", "command line", 0, GREP_OPEN_PAREN);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-05-21 06:05:22 +08:00
|
|
|
static int close_callback(const struct option *opt, const char *arg, int unset)
|
2009-05-08 03:46:48 +08:00
|
|
|
{
|
|
|
|
struct grep_opt *grep_opt = opt->value;
|
|
|
|
append_grep_pattern(grep_opt, ")", "command line", 0, GREP_CLOSE_PAREN);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-05-21 06:05:22 +08:00
|
|
|
static int pattern_callback(const struct option *opt, const char *arg,
|
|
|
|
int unset)
|
2009-05-08 03:46:48 +08:00
|
|
|
{
|
|
|
|
struct grep_opt *grep_opt = opt->value;
|
|
|
|
append_grep_pattern(grep_opt, arg, "-e option", 0, GREP_PATTERN);
|
|
|
|
return 0;
|
|
|
|
}
|
2006-05-01 14:28:15 +08:00
|
|
|
|
2009-05-21 06:05:22 +08:00
|
|
|
static int help_callback(const struct option *opt, const char *arg, int unset)
|
2009-05-08 03:46:48 +08:00
|
|
|
{
|
|
|
|
return -1;
|
|
|
|
}
|
2006-07-04 17:44:48 +08:00
|
|
|
|
2006-07-29 13:44:25 +08:00
|
|
|
int cmd_grep(int argc, const char **argv, const char *prefix)
|
2006-05-01 14:28:15 +08:00
|
|
|
{
|
|
|
|
int hit = 0;
|
|
|
|
int cached = 0;
|
2006-05-09 14:55:47 +08:00
|
|
|
int seen_dashdash = 0;
|
2010-01-13 11:06:41 +08:00
|
|
|
int external_grep_allowed__ignored;
|
2006-05-01 14:28:15 +08:00
|
|
|
struct grep_opt opt;
|
Add "named object array" concept
We've had this notion of a "object_list" for a long time, which eventually
grew a "name" member because some users (notably git-rev-list) wanted to
name each object as it is generated.
That object_list is great for some things, but it isn't all that wonderful
for others, and the "name" member is generally not used by everybody.
This patch splits the users of the object_list array up into two: the
traditional list users, who want the list-like format, and who don't
actually use or want the name. And another class of users that really used
the list as an extensible array, and generally wanted to name the objects.
The patch is fairly straightforward, but it's also biggish. Most of it
really just cleans things up: switching the revision parsing and listing
over to the array makes things like the builtin-diff usage much simpler
(we now see exactly how many members the array has, and we don't get the
objects reversed from the order they were on the command line).
One of the main reasons for doing this at all is that the malloc overhead
of the simple object list was actually pretty high, and the array is just
a lot denser. So this patch brings down memory usage by git-rev-list by
just under 3% (on top of all the other memory use optimizations) on the
mozilla archive.
It does add more lines than it removes, and more importantly, it adds a
whole new infrastructure for maintaining lists of objects, but on the
other hand, the new dynamic array code is pretty obvious. The change to
builtin-diff-tree.c shows a fairly good example of why an array interface
is sometimes more natural, and just much simpler for everybody.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-20 08:42:35 +08:00
|
|
|
struct object_array list = { 0, 0, NULL };
|
2006-05-02 06:58:29 +08:00
|
|
|
const char **paths = NULL;
|
2006-05-09 14:55:47 +08:00
|
|
|
int i;
|
2009-05-08 03:46:48 +08:00
|
|
|
int dummy;
|
2010-01-16 04:50:54 +08:00
|
|
|
int nongit = 0, use_index = 1;
|
2009-05-08 03:46:48 +08:00
|
|
|
struct option options[] = {
|
|
|
|
OPT_BOOLEAN(0, "cached", &cached,
|
|
|
|
"search in index instead of in the work tree"),
|
2010-01-16 04:52:40 +08:00
|
|
|
OPT_BOOLEAN(0, "index", &use_index,
|
|
|
|
"--no-index finds in contents not managed by git"),
|
2009-05-08 03:46:48 +08:00
|
|
|
OPT_GROUP(""),
|
|
|
|
OPT_BOOLEAN('v', "invert-match", &opt.invert,
|
|
|
|
"show non-matching lines"),
|
2009-11-06 17:22:35 +08:00
|
|
|
OPT_BOOLEAN('i', "ignore-case", &opt.ignore_case,
|
|
|
|
"case insensitive matching"),
|
2009-05-08 03:46:48 +08:00
|
|
|
OPT_BOOLEAN('w', "word-regexp", &opt.word_regexp,
|
|
|
|
"match patterns only at word boundaries"),
|
|
|
|
OPT_SET_INT('a', "text", &opt.binary,
|
|
|
|
"process binary files as text", GREP_BINARY_TEXT),
|
|
|
|
OPT_SET_INT('I', NULL, &opt.binary,
|
|
|
|
"don't match patterns in binary files",
|
|
|
|
GREP_BINARY_NOMATCH),
|
grep: Add --max-depth option.
It is useful to grep directories non-recursively, e.g. when one wants to
look for all files in the toplevel directory, but not in any subdirectory,
or in Documentation/, but not in Documentation/technical/.
This patch adds support for --max-depth <depth> option to git-grep. If it is
given, git-grep descends at most <depth> levels of directories below paths
specified on the command line.
Note that if path specified on command line contains wildcards, this option
makes no sense, e.g.
$ git grep -l --max-depth 0 GNU -- 'contrib/*'
(note the quotes) will search all files in contrib/, even in
subdirectories, because '*' matches all files.
Documentation updates, bash-completion and simple test cases are also
provided.
Signed-off-by: Michał Kiedrowicz <michal.kiedrowicz@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-07-23 01:52:15 +08:00
|
|
|
{ OPTION_INTEGER, 0, "max-depth", &opt.max_depth, "depth",
|
|
|
|
"descend at most <depth> levels", PARSE_OPT_NONEG,
|
|
|
|
NULL, 1 },
|
2009-05-08 03:46:48 +08:00
|
|
|
OPT_GROUP(""),
|
|
|
|
OPT_BIT('E', "extended-regexp", &opt.regflags,
|
|
|
|
"use extended POSIX regular expressions", REG_EXTENDED),
|
|
|
|
OPT_NEGBIT('G', "basic-regexp", &opt.regflags,
|
|
|
|
"use basic POSIX regular expressions (default)",
|
|
|
|
REG_EXTENDED),
|
|
|
|
OPT_BOOLEAN('F', "fixed-strings", &opt.fixed,
|
|
|
|
"interpret patterns as fixed strings"),
|
|
|
|
OPT_GROUP(""),
|
|
|
|
OPT_BOOLEAN('n', NULL, &opt.linenum, "show line numbers"),
|
|
|
|
OPT_NEGBIT('h', NULL, &opt.pathname, "don't show filenames", 1),
|
|
|
|
OPT_BIT('H', NULL, &opt.pathname, "show filenames", 1),
|
|
|
|
OPT_NEGBIT(0, "full-name", &opt.relative,
|
|
|
|
"show filenames relative to top directory", 1),
|
|
|
|
OPT_BOOLEAN('l', "files-with-matches", &opt.name_only,
|
|
|
|
"show only filenames instead of matching lines"),
|
|
|
|
OPT_BOOLEAN(0, "name-only", &opt.name_only,
|
|
|
|
"synonym for --files-with-matches"),
|
|
|
|
OPT_BOOLEAN('L', "files-without-match",
|
|
|
|
&opt.unmatch_name_only,
|
|
|
|
"show only the names of files without match"),
|
|
|
|
OPT_BOOLEAN('z', "null", &opt.null_following_name,
|
|
|
|
"print NUL after filenames"),
|
|
|
|
OPT_BOOLEAN('c', "count", &opt.count,
|
|
|
|
"show the number of matches instead of matching lines"),
|
|
|
|
OPT_SET_INT(0, "color", &opt.color, "highlight matches", 1),
|
|
|
|
OPT_GROUP(""),
|
|
|
|
OPT_CALLBACK('C', NULL, &opt, "n",
|
|
|
|
"show <n> context lines before and after matches",
|
|
|
|
context_callback),
|
|
|
|
OPT_INTEGER('B', NULL, &opt.pre_context,
|
|
|
|
"show <n> context lines before matches"),
|
|
|
|
OPT_INTEGER('A', NULL, &opt.post_context,
|
|
|
|
"show <n> context lines after matches"),
|
|
|
|
OPT_NUMBER_CALLBACK(&opt, "shortcut for -C NUM",
|
|
|
|
context_callback),
|
2009-07-02 06:06:34 +08:00
|
|
|
OPT_BOOLEAN('p', "show-function", &opt.funcname,
|
|
|
|
"show a line with the function name before matches"),
|
2009-05-08 03:46:48 +08:00
|
|
|
OPT_GROUP(""),
|
|
|
|
OPT_CALLBACK('f', NULL, &opt, "file",
|
|
|
|
"read patterns from file", file_callback),
|
|
|
|
{ OPTION_CALLBACK, 'e', NULL, &opt, "pattern",
|
|
|
|
"match <pattern>", PARSE_OPT_NONEG, pattern_callback },
|
|
|
|
{ OPTION_CALLBACK, 0, "and", &opt, NULL,
|
|
|
|
"combine patterns specified with -e",
|
|
|
|
PARSE_OPT_NOARG | PARSE_OPT_NONEG, and_callback },
|
|
|
|
OPT_BOOLEAN(0, "or", &dummy, ""),
|
|
|
|
{ OPTION_CALLBACK, 0, "not", &opt, NULL, "",
|
|
|
|
PARSE_OPT_NOARG | PARSE_OPT_NONEG, not_callback },
|
|
|
|
{ OPTION_CALLBACK, '(', NULL, &opt, NULL, "",
|
|
|
|
PARSE_OPT_NOARG | PARSE_OPT_NONEG | PARSE_OPT_NODASH,
|
|
|
|
open_callback },
|
|
|
|
{ OPTION_CALLBACK, ')', NULL, &opt, NULL, "",
|
|
|
|
PARSE_OPT_NOARG | PARSE_OPT_NONEG | PARSE_OPT_NODASH,
|
|
|
|
close_callback },
|
|
|
|
OPT_BOOLEAN(0, "all-match", &opt.all_match,
|
|
|
|
"show only matches from files that match all patterns"),
|
|
|
|
OPT_GROUP(""),
|
2010-01-13 11:06:41 +08:00
|
|
|
OPT_BOOLEAN(0, "ext-grep", &external_grep_allowed__ignored,
|
|
|
|
"allow calling of grep(1) (ignored by this build)"),
|
2009-05-08 03:46:48 +08:00
|
|
|
{ OPTION_CALLBACK, 0, "help-all", &options, NULL, "show usage",
|
|
|
|
PARSE_OPT_HIDDEN | PARSE_OPT_NOARG, help_callback },
|
|
|
|
OPT_END()
|
|
|
|
};
|
2006-05-01 14:28:15 +08:00
|
|
|
|
2010-01-16 04:50:54 +08:00
|
|
|
prefix = setup_git_directory_gently(&nongit);
|
|
|
|
|
2009-11-09 23:04:42 +08:00
|
|
|
/*
|
|
|
|
* 'git grep -h', unlike 'git grep -h <pattern>', is a request
|
|
|
|
* to show usage information and exit.
|
|
|
|
*/
|
|
|
|
if (argc == 2 && !strcmp(argv[1], "-h"))
|
|
|
|
usage_with_options(grep_usage, options);
|
|
|
|
|
2006-05-01 14:28:15 +08:00
|
|
|
memset(&opt, 0, sizeof(opt));
|
2009-09-05 20:31:17 +08:00
|
|
|
opt.prefix = prefix;
|
2006-08-11 15:44:42 +08:00
|
|
|
opt.prefix_length = (prefix && *prefix) ? strlen(prefix) : 0;
|
|
|
|
opt.relative = 1;
|
2006-09-15 01:45:12 +08:00
|
|
|
opt.pathname = 1;
|
2006-05-03 06:40:49 +08:00
|
|
|
opt.pattern_tail = &opt.pattern_list;
|
2006-05-01 14:28:15 +08:00
|
|
|
opt.regflags = REG_NEWLINE;
|
grep: Add --max-depth option.
It is useful to grep directories non-recursively, e.g. when one wants to
look for all files in the toplevel directory, but not in any subdirectory,
or in Documentation/, but not in Documentation/technical/.
This patch adds support for --max-depth <depth> option to git-grep. If it is
given, git-grep descends at most <depth> levels of directories below paths
specified on the command line.
Note that if path specified on command line contains wildcards, this option
makes no sense, e.g.
$ git grep -l --max-depth 0 GNU -- 'contrib/*'
(note the quotes) will search all files in contrib/, even in
subdirectories, because '*' matches all files.
Documentation updates, bash-completion and simple test cases are also
provided.
Signed-off-by: Michał Kiedrowicz <michal.kiedrowicz@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-07-23 01:52:15 +08:00
|
|
|
opt.max_depth = -1;
|
2006-05-01 14:28:15 +08:00
|
|
|
|
2009-03-07 20:32:32 +08:00
|
|
|
strcpy(opt.color_match, GIT_COLOR_RED GIT_COLOR_BOLD);
|
|
|
|
opt.color = -1;
|
|
|
|
git_config(grep_config, &opt);
|
|
|
|
if (opt.color == -1)
|
|
|
|
opt.color = git_use_color_default;
|
|
|
|
|
2006-05-01 14:28:15 +08:00
|
|
|
/*
|
2006-05-09 14:55:47 +08:00
|
|
|
* If there is no -- then the paths must exist in the working
|
|
|
|
* tree. If there is no explicit pattern specified with -e or
|
|
|
|
* -f, we take the first unrecognized non option to be the
|
|
|
|
* pattern, but then what follows it must be zero or more
|
|
|
|
* valid refs up to the -- (if exists), and then existing
|
|
|
|
* paths. If there is an explicit pattern, then the first
|
2006-07-10 13:50:18 +08:00
|
|
|
* unrecognized non option is the beginning of the refs list
|
2006-05-09 14:55:47 +08:00
|
|
|
* that continues up to the -- (if exists), and then paths.
|
2006-05-01 14:28:15 +08:00
|
|
|
*/
|
2009-05-24 02:53:12 +08:00
|
|
|
argc = parse_options(argc, argv, prefix, options, grep_usage,
|
2009-05-08 03:46:48 +08:00
|
|
|
PARSE_OPT_KEEP_DASHDASH |
|
|
|
|
PARSE_OPT_STOP_AT_NON_OPTION |
|
|
|
|
PARSE_OPT_NO_INTERNAL_HELP);
|
|
|
|
|
2010-01-16 04:50:54 +08:00
|
|
|
if (use_index && nongit)
|
|
|
|
/* die the same way as if we did it at the beginning */
|
|
|
|
setup_git_directory();
|
|
|
|
|
2009-05-08 03:46:48 +08:00
|
|
|
/* First unrecognized non-option token */
|
|
|
|
if (argc > 0 && !opt.pattern_list) {
|
|
|
|
append_grep_pattern(&opt, argv[0], "command line", 0,
|
|
|
|
GREP_PATTERN);
|
|
|
|
argv++;
|
|
|
|
argc--;
|
2006-05-01 14:28:15 +08:00
|
|
|
}
|
2006-05-09 14:55:47 +08:00
|
|
|
|
2006-05-03 06:40:49 +08:00
|
|
|
if (!opt.pattern_list)
|
2006-05-01 14:28:15 +08:00
|
|
|
die("no pattern given.");
|
2009-11-06 17:22:35 +08:00
|
|
|
if (!opt.fixed && opt.ignore_case)
|
|
|
|
opt.regflags |= REG_ICASE;
|
2006-05-10 09:28:41 +08:00
|
|
|
if ((opt.regflags != REG_NEWLINE) && opt.fixed)
|
|
|
|
die("cannot mix --fixed-strings and regexp");
|
2006-09-18 07:02:52 +08:00
|
|
|
compile_grep_patterns(&opt);
|
2006-05-09 14:55:47 +08:00
|
|
|
|
|
|
|
/* Check revs and then paths */
|
2009-05-08 03:46:48 +08:00
|
|
|
for (i = 0; i < argc; i++) {
|
2006-05-09 14:55:47 +08:00
|
|
|
const char *arg = argv[i];
|
2006-05-02 06:58:29 +08:00
|
|
|
unsigned char sha1[20];
|
2006-05-09 14:55:47 +08:00
|
|
|
/* Is it a rev? */
|
|
|
|
if (!get_sha1(arg, sha1)) {
|
|
|
|
struct object *object = parse_object(sha1);
|
|
|
|
if (!object)
|
|
|
|
die("bad object %s", arg);
|
Add "named object array" concept
We've had this notion of a "object_list" for a long time, which eventually
grew a "name" member because some users (notably git-rev-list) wanted to
name each object as it is generated.
That object_list is great for some things, but it isn't all that wonderful
for others, and the "name" member is generally not used by everybody.
This patch splits the users of the object_list array up into two: the
traditional list users, who want the list-like format, and who don't
actually use or want the name. And another class of users that really used
the list as an extensible array, and generally wanted to name the objects.
The patch is fairly straightforward, but it's also biggish. Most of it
really just cleans things up: switching the revision parsing and listing
over to the array makes things like the builtin-diff usage much simpler
(we now see exactly how many members the array has, and we don't get the
objects reversed from the order they were on the command line).
One of the main reasons for doing this at all is that the malloc overhead
of the simple object list was actually pretty high, and the array is just
a lot denser. So this patch brings down memory usage by git-rev-list by
just under 3% (on top of all the other memory use optimizations) on the
mozilla archive.
It does add more lines than it removes, and more importantly, it adds a
whole new infrastructure for maintaining lists of objects, but on the
other hand, the new dynamic array code is pretty obvious. The change to
builtin-diff-tree.c shows a fairly good example of why an array interface
is sometimes more natural, and just much simpler for everybody.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-20 08:42:35 +08:00
|
|
|
add_object_array(object, arg, &list);
|
2006-05-09 14:55:47 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!strcmp(arg, "--")) {
|
|
|
|
i++;
|
|
|
|
seen_dashdash = 1;
|
|
|
|
}
|
|
|
|
break;
|
2006-05-02 06:58:29 +08:00
|
|
|
}
|
2006-05-09 14:55:47 +08:00
|
|
|
|
|
|
|
/* The rest are paths */
|
|
|
|
if (!seen_dashdash) {
|
|
|
|
int j;
|
2006-05-10 09:15:21 +08:00
|
|
|
for (j = i; j < argc; j++)
|
2006-05-09 14:55:47 +08:00
|
|
|
verify_filename(prefix, argv[j]);
|
|
|
|
}
|
|
|
|
|
2009-09-05 20:31:17 +08:00
|
|
|
if (i < argc)
|
2006-05-09 14:55:47 +08:00
|
|
|
paths = get_pathspec(prefix, argv + i);
|
2006-05-02 06:58:29 +08:00
|
|
|
else if (prefix) {
|
|
|
|
paths = xcalloc(2, sizeof(const char *));
|
|
|
|
paths[0] = prefix;
|
|
|
|
paths[1] = NULL;
|
|
|
|
}
|
2006-05-01 14:28:15 +08:00
|
|
|
|
2010-01-16 04:52:40 +08:00
|
|
|
if (!use_index) {
|
|
|
|
if (cached)
|
|
|
|
die("--cached cannot be used with --no-index.");
|
|
|
|
if (list.nr)
|
|
|
|
die("--no-index cannot be used with revs.");
|
|
|
|
return !grep_directory(&opt, paths);
|
|
|
|
}
|
|
|
|
|
2008-08-28 21:04:30 +08:00
|
|
|
if (!list.nr) {
|
|
|
|
if (!cached)
|
|
|
|
setup_work_tree();
|
2010-01-13 11:06:41 +08:00
|
|
|
return !grep_cache(&opt, paths, cached);
|
2008-08-28 21:04:30 +08:00
|
|
|
}
|
2006-05-09 04:28:27 +08:00
|
|
|
|
2006-05-01 14:28:15 +08:00
|
|
|
if (cached)
|
2006-05-09 04:28:27 +08:00
|
|
|
die("both --cached and trees are given.");
|
2006-05-01 14:28:15 +08:00
|
|
|
|
Add "named object array" concept
We've had this notion of a "object_list" for a long time, which eventually
grew a "name" member because some users (notably git-rev-list) wanted to
name each object as it is generated.
That object_list is great for some things, but it isn't all that wonderful
for others, and the "name" member is generally not used by everybody.
This patch splits the users of the object_list array up into two: the
traditional list users, who want the list-like format, and who don't
actually use or want the name. And another class of users that really used
the list as an extensible array, and generally wanted to name the objects.
The patch is fairly straightforward, but it's also biggish. Most of it
really just cleans things up: switching the revision parsing and listing
over to the array makes things like the builtin-diff usage much simpler
(we now see exactly how many members the array has, and we don't get the
objects reversed from the order they were on the command line).
One of the main reasons for doing this at all is that the malloc overhead
of the simple object list was actually pretty high, and the array is just
a lot denser. So this patch brings down memory usage by git-rev-list by
just under 3% (on top of all the other memory use optimizations) on the
mozilla archive.
It does add more lines than it removes, and more importantly, it adds a
whole new infrastructure for maintaining lists of objects, but on the
other hand, the new dynamic array code is pretty obvious. The change to
builtin-diff-tree.c shows a fairly good example of why an array interface
is sometimes more natural, and just much simpler for everybody.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-20 08:42:35 +08:00
|
|
|
for (i = 0; i < list.nr; i++) {
|
2006-05-01 14:28:15 +08:00
|
|
|
struct object *real_obj;
|
Add "named object array" concept
We've had this notion of a "object_list" for a long time, which eventually
grew a "name" member because some users (notably git-rev-list) wanted to
name each object as it is generated.
That object_list is great for some things, but it isn't all that wonderful
for others, and the "name" member is generally not used by everybody.
This patch splits the users of the object_list array up into two: the
traditional list users, who want the list-like format, and who don't
actually use or want the name. And another class of users that really used
the list as an extensible array, and generally wanted to name the objects.
The patch is fairly straightforward, but it's also biggish. Most of it
really just cleans things up: switching the revision parsing and listing
over to the array makes things like the builtin-diff usage much simpler
(we now see exactly how many members the array has, and we don't get the
objects reversed from the order they were on the command line).
One of the main reasons for doing this at all is that the malloc overhead
of the simple object list was actually pretty high, and the array is just
a lot denser. So this patch brings down memory usage by git-rev-list by
just under 3% (on top of all the other memory use optimizations) on the
mozilla archive.
It does add more lines than it removes, and more importantly, it adds a
whole new infrastructure for maintaining lists of objects, but on the
other hand, the new dynamic array code is pretty obvious. The change to
builtin-diff-tree.c shows a fairly good example of why an array interface
is sometimes more natural, and just much simpler for everybody.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2006-06-20 08:42:35 +08:00
|
|
|
real_obj = deref_tag(list.objects[i].item, NULL, 0);
|
|
|
|
if (grep_object(&opt, paths, real_obj, list.objects[i].name))
|
2006-05-01 14:28:15 +08:00
|
|
|
hit = 1;
|
|
|
|
}
|
2006-09-28 07:27:10 +08:00
|
|
|
free_grep_patterns(&opt);
|
2006-05-01 14:28:15 +08:00
|
|
|
return !hit;
|
|
|
|
}
|