2005-04-24 10:04:40 +08:00
|
|
|
#include "cache.h"
|
2005-10-06 05:49:54 +08:00
|
|
|
#include "refs.h"
|
2005-06-30 02:30:24 +08:00
|
|
|
#include "tag.h"
|
2005-04-24 10:04:40 +08:00
|
|
|
#include "commit.h"
|
2005-06-25 13:56:58 +08:00
|
|
|
#include "tree.h"
|
|
|
|
#include "blob.h"
|
2005-06-06 23:39:40 +08:00
|
|
|
#include "epoch.h"
|
2005-10-21 12:25:09 +08:00
|
|
|
#include "diff.h"
|
2005-04-24 10:04:40 +08:00
|
|
|
|
2005-05-31 09:46:32 +08:00
|
|
|
#define SEEN (1u << 0)
|
|
|
|
#define INTERESTING (1u << 1)
|
2005-06-18 13:54:50 +08:00
|
|
|
#define COUNTED (1u << 2)
|
2005-07-07 00:56:16 +08:00
|
|
|
#define SHOWN (1u << 3)
|
2005-10-22 07:40:54 +08:00
|
|
|
#define TREECHANGE (1u << 4)
|
2006-01-30 07:24:42 +08:00
|
|
|
#define TMP_MARK (1u << 5) /* for isolated cases; clean after use */
|
2005-05-31 09:46:32 +08:00
|
|
|
|
2005-05-26 09:29:09 +08:00
|
|
|
static const char rev_list_usage[] =
|
2005-10-30 17:03:45 +08:00
|
|
|
"git-rev-list [OPTION] <commit-id>... [ -- paths... ]\n"
|
|
|
|
" limiting output:\n"
|
|
|
|
" --max-count=nr\n"
|
|
|
|
" --max-age=epoch\n"
|
|
|
|
" --min-age=epoch\n"
|
|
|
|
" --sparse\n"
|
|
|
|
" --no-merges\n"
|
2006-01-27 17:39:24 +08:00
|
|
|
" --remove-empty\n"
|
2005-10-30 17:03:45 +08:00
|
|
|
" --all\n"
|
|
|
|
" ordering output:\n"
|
|
|
|
" --merge-order [ --show-breaks ]\n"
|
|
|
|
" --topo-order\n"
|
|
|
|
" formatting output:\n"
|
|
|
|
" --parents\n"
|
|
|
|
" --objects\n"
|
|
|
|
" --unpacked\n"
|
|
|
|
" --header | --pretty\n"
|
|
|
|
" special purpose:\n"
|
|
|
|
" --bisect"
|
|
|
|
;
|
2005-05-26 09:29:09 +08:00
|
|
|
|
2005-10-26 06:24:55 +08:00
|
|
|
static int dense = 1;
|
2005-07-04 04:29:54 +08:00
|
|
|
static int unpacked = 0;
|
2005-06-18 13:54:50 +08:00
|
|
|
static int bisect_list = 0;
|
2005-06-30 01:40:14 +08:00
|
|
|
static int tag_objects = 0;
|
2005-06-25 13:56:58 +08:00
|
|
|
static int tree_objects = 0;
|
|
|
|
static int blob_objects = 0;
|
2005-06-03 00:19:53 +08:00
|
|
|
static int verbose_header = 0;
|
|
|
|
static int show_parents = 0;
|
|
|
|
static int hdr_termination = 0;
|
2005-08-25 05:58:42 +08:00
|
|
|
static const char *commit_prefix = "";
|
2005-06-03 00:19:53 +08:00
|
|
|
static unsigned long max_age = -1;
|
|
|
|
static unsigned long min_age = -1;
|
|
|
|
static int max_count = -1;
|
2005-06-06 00:02:03 +08:00
|
|
|
static enum cmit_fmt commit_format = CMIT_FMT_RAW;
|
2005-06-06 23:39:40 +08:00
|
|
|
static int merge_order = 0;
|
|
|
|
static int show_breaks = 0;
|
2005-06-20 10:29:41 +08:00
|
|
|
static int stop_traversal = 0;
|
2005-07-07 01:25:04 +08:00
|
|
|
static int topo_order = 0;
|
2005-08-08 17:37:21 +08:00
|
|
|
static int no_merges = 0;
|
2005-10-21 12:25:09 +08:00
|
|
|
static const char **paths = NULL;
|
rev-list: stop when the file disappears
The one thing I've considered doing (I really should) is to add a "stop
when you don't find the file" option to "git-rev-list". This patch does
some of the work towards that: it removes the "parent" thing when the
file disappears, so a "git annotate" could do do something like
git-rev-list --remove-empty --parents HEAD -- "$filename"
and it would get a good graph that stops when the filename disappears
(it's not perfect though: it won't remove all the unintersting commits).
It also simplifies the logic of finding tree differences a bit, at the
cost of making it a tad less efficient.
The old logic was two-phase: it would first simplify _only_ merges tree as
it traversed the tree, and then simplify the linear parts of the remainder
independently. That was pretty optimal from an efficiency standpoint
because it avoids doing any comparisons that we can see are unnecessary,
but it made it much harder to understand than it really needed to be.
The new logic is a lot more straightforward, and compares the trees as it
traverses the graph (ie everything is a single phase). That makes it much
easier to stop graph traversal at any point where a file disappears.
As an example, let's say that you have a git repository that has had a
file called "A" some time in the past. That file gets renamed to B, and
then gets renamed back again to A. The old "git-rev-list" would show two
commits: the commit that renames B to A (because it changes A) _and_ as
its parent the commit that renames A to B (because it changes A).
With the new --remove-empty flag, git-rev-list will show just the commit
that renames B to A as the "root" commit, and stop traversal there
(because that's what you want for "annotate" - you want to stop there, and
for every "root" commit you then separately see if it really is a new
file, or if the paths history disappeared because it was renamed from some
other file).
With this patch, you should be able to basically do a "poor mans 'git
annotate'" with a fairly simple loop:
push("HEAD", "$filename")
while (revision,filename = pop()) {
for each i in $(git-rev-list --parents --remove-empty $revision -- "$filename")
pseudo-parents($i) = git-rev-list parents for that line
if (pseudo-parents($i) is non-empty) {
show diff of $i against pseudo-parents
continue
}
/* See if the _real_ parents of $i had a rename */
parent($i) = real-parent($i)
if (find-rename in $parent($i)->$i)
push $parent($i), "old-name"
}
which should be doable in perl or something (doing stacks in shell is just
too painful to be worth it, so I'm not going to do this).
Anybody want to try?
Linus
2006-01-19 06:47:30 +08:00
|
|
|
static int remove_empty_trees = 0;
|
2005-06-03 00:19:53 +08:00
|
|
|
|
|
|
|
static void show_commit(struct commit *commit)
|
|
|
|
{
|
2005-06-20 10:29:38 +08:00
|
|
|
commit->object.flags |= SHOWN;
|
2005-06-06 23:39:40 +08:00
|
|
|
if (show_breaks) {
|
2005-08-25 05:58:42 +08:00
|
|
|
commit_prefix = "| ";
|
2005-06-06 23:39:40 +08:00
|
|
|
if (commit->object.flags & DISCONTINUITY) {
|
2005-08-25 05:58:42 +08:00
|
|
|
commit_prefix = "^ ";
|
2005-06-06 23:39:40 +08:00
|
|
|
} else if (commit->object.flags & BOUNDARY) {
|
2005-08-25 05:58:42 +08:00
|
|
|
commit_prefix = "= ";
|
2005-06-06 23:39:40 +08:00
|
|
|
}
|
|
|
|
}
|
2005-08-25 05:58:42 +08:00
|
|
|
printf("%s%s", commit_prefix, sha1_to_hex(commit->object.sha1));
|
2005-06-03 00:19:53 +08:00
|
|
|
if (show_parents) {
|
|
|
|
struct commit_list *parents = commit->parents;
|
|
|
|
while (parents) {
|
2006-01-30 07:24:42 +08:00
|
|
|
struct object *o = &(parents->item->object);
|
2005-06-03 00:19:53 +08:00
|
|
|
parents = parents->next;
|
2006-01-30 07:24:42 +08:00
|
|
|
if (o->flags & TMP_MARK)
|
|
|
|
continue;
|
|
|
|
printf(" %s", sha1_to_hex(o->sha1));
|
|
|
|
o->flags |= TMP_MARK;
|
2005-06-03 00:19:53 +08:00
|
|
|
}
|
2006-01-30 07:24:42 +08:00
|
|
|
/* TMP_MARK is a general purpose flag that can
|
|
|
|
* be used locally, but the user should clean
|
|
|
|
* things up after it is done with them.
|
|
|
|
*/
|
|
|
|
for (parents = commit->parents;
|
|
|
|
parents;
|
|
|
|
parents = parents->next)
|
|
|
|
parents->item->object.flags &= ~TMP_MARK;
|
2005-06-03 00:19:53 +08:00
|
|
|
}
|
2005-08-09 13:15:40 +08:00
|
|
|
if (commit_format == CMIT_FMT_ONELINE)
|
|
|
|
putchar(' ');
|
|
|
|
else
|
|
|
|
putchar('\n');
|
|
|
|
|
2005-06-03 00:19:53 +08:00
|
|
|
if (verbose_header) {
|
2005-06-06 00:02:03 +08:00
|
|
|
static char pretty_header[16384];
|
2006-01-27 17:54:59 +08:00
|
|
|
pretty_print_commit(commit_format, commit, ~0, pretty_header, sizeof(pretty_header), 0);
|
2005-06-06 00:02:03 +08:00
|
|
|
printf("%s%c", pretty_header, hdr_termination);
|
2005-07-05 07:36:48 +08:00
|
|
|
}
|
|
|
|
fflush(stdout);
|
2005-06-06 23:39:40 +08:00
|
|
|
}
|
|
|
|
|
2005-10-26 02:50:46 +08:00
|
|
|
static int rewrite_one(struct commit **pp)
|
2005-10-22 07:40:54 +08:00
|
|
|
{
|
|
|
|
for (;;) {
|
|
|
|
struct commit *p = *pp;
|
|
|
|
if (p->object.flags & (TREECHANGE | UNINTERESTING))
|
2005-10-26 02:50:46 +08:00
|
|
|
return 0;
|
|
|
|
if (!p->parents)
|
|
|
|
return -1;
|
2005-10-22 07:40:54 +08:00
|
|
|
*pp = p->parents->item;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void rewrite_parents(struct commit *commit)
|
|
|
|
{
|
2005-10-26 02:50:46 +08:00
|
|
|
struct commit_list **pp = &commit->parents;
|
|
|
|
while (*pp) {
|
|
|
|
struct commit_list *parent = *pp;
|
|
|
|
if (rewrite_one(&parent->item) < 0) {
|
|
|
|
*pp = parent->next;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
pp = &parent->next;
|
2005-10-22 07:40:54 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-06-06 23:39:40 +08:00
|
|
|
static int filter_commit(struct commit * commit)
|
|
|
|
{
|
2005-07-07 00:39:34 +08:00
|
|
|
if (stop_traversal && (commit->object.flags & BOUNDARY))
|
2005-06-20 10:29:41 +08:00
|
|
|
return STOP;
|
2005-06-20 10:29:38 +08:00
|
|
|
if (commit->object.flags & (UNINTERESTING|SHOWN))
|
2005-06-06 23:39:40 +08:00
|
|
|
return CONTINUE;
|
|
|
|
if (min_age != -1 && (commit->date > min_age))
|
|
|
|
return CONTINUE;
|
2005-06-20 10:29:41 +08:00
|
|
|
if (max_age != -1 && (commit->date < max_age)) {
|
2005-07-07 00:39:34 +08:00
|
|
|
stop_traversal=1;
|
2005-09-21 08:55:46 +08:00
|
|
|
return CONTINUE;
|
2005-06-20 10:29:41 +08:00
|
|
|
}
|
2005-08-08 17:37:21 +08:00
|
|
|
if (no_merges && (commit->parents && commit->parents->next))
|
|
|
|
return CONTINUE;
|
2005-10-22 07:40:54 +08:00
|
|
|
if (paths && dense) {
|
|
|
|
if (!(commit->object.flags & TREECHANGE))
|
|
|
|
return CONTINUE;
|
|
|
|
rewrite_parents(commit);
|
|
|
|
}
|
2005-06-06 23:39:40 +08:00
|
|
|
return DO;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int process_commit(struct commit * commit)
|
|
|
|
{
|
|
|
|
int action=filter_commit(commit);
|
|
|
|
|
|
|
|
if (action == STOP) {
|
|
|
|
return STOP;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (action == CONTINUE) {
|
|
|
|
return CONTINUE;
|
2005-06-03 00:19:53 +08:00
|
|
|
}
|
2005-06-06 23:39:40 +08:00
|
|
|
|
2005-11-19 05:29:04 +08:00
|
|
|
if (max_count != -1 && !max_count--)
|
|
|
|
return STOP;
|
|
|
|
|
2005-06-06 23:39:40 +08:00
|
|
|
show_commit(commit);
|
|
|
|
|
|
|
|
return CONTINUE;
|
2005-06-03 00:19:53 +08:00
|
|
|
}
|
|
|
|
|
2005-06-27 06:26:05 +08:00
|
|
|
static struct object_list **add_object(struct object *obj, struct object_list **p, const char *name)
|
2005-06-25 13:56:58 +08:00
|
|
|
{
|
|
|
|
struct object_list *entry = xmalloc(sizeof(*entry));
|
|
|
|
entry->item = obj;
|
2005-06-30 02:30:24 +08:00
|
|
|
entry->next = *p;
|
2005-06-27 06:26:05 +08:00
|
|
|
entry->name = name;
|
2005-06-25 13:56:58 +08:00
|
|
|
*p = entry;
|
|
|
|
return &entry->next;
|
|
|
|
}
|
|
|
|
|
2005-06-27 06:26:05 +08:00
|
|
|
static struct object_list **process_blob(struct blob *blob, struct object_list **p, const char *name)
|
2005-06-25 13:56:58 +08:00
|
|
|
{
|
|
|
|
struct object *obj = &blob->object;
|
|
|
|
|
|
|
|
if (!blob_objects)
|
|
|
|
return p;
|
|
|
|
if (obj->flags & (UNINTERESTING | SEEN))
|
|
|
|
return p;
|
|
|
|
obj->flags |= SEEN;
|
2005-06-27 06:26:05 +08:00
|
|
|
return add_object(obj, p, name);
|
2005-06-25 13:56:58 +08:00
|
|
|
}
|
|
|
|
|
2005-06-27 06:26:05 +08:00
|
|
|
static struct object_list **process_tree(struct tree *tree, struct object_list **p, const char *name)
|
2005-06-25 13:56:58 +08:00
|
|
|
{
|
|
|
|
struct object *obj = &tree->object;
|
|
|
|
struct tree_entry_list *entry;
|
|
|
|
|
|
|
|
if (!tree_objects)
|
|
|
|
return p;
|
|
|
|
if (obj->flags & (UNINTERESTING | SEEN))
|
|
|
|
return p;
|
|
|
|
if (parse_tree(tree) < 0)
|
|
|
|
die("bad tree object %s", sha1_to_hex(obj->sha1));
|
|
|
|
obj->flags |= SEEN;
|
2005-06-27 06:26:05 +08:00
|
|
|
p = add_object(obj, p, name);
|
2005-09-17 05:32:48 +08:00
|
|
|
entry = tree->entries;
|
|
|
|
tree->entries = NULL;
|
|
|
|
while (entry) {
|
|
|
|
struct tree_entry_list *next = entry->next;
|
2005-06-25 13:56:58 +08:00
|
|
|
if (entry->directory)
|
2005-06-27 06:26:05 +08:00
|
|
|
p = process_tree(entry->item.tree, p, entry->name);
|
2005-06-25 13:56:58 +08:00
|
|
|
else
|
2005-06-27 06:26:05 +08:00
|
|
|
p = process_blob(entry->item.blob, p, entry->name);
|
2005-09-17 05:32:48 +08:00
|
|
|
free(entry);
|
|
|
|
entry = next;
|
2005-06-25 13:56:58 +08:00
|
|
|
}
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
2005-06-30 02:30:24 +08:00
|
|
|
static struct object_list *pending_objects = NULL;
|
|
|
|
|
2005-06-03 00:19:53 +08:00
|
|
|
static void show_commit_list(struct commit_list *list)
|
|
|
|
{
|
2005-06-30 02:30:24 +08:00
|
|
|
struct object_list *objects = NULL, **p = &objects, *pending;
|
2005-06-03 00:19:53 +08:00
|
|
|
while (list) {
|
|
|
|
struct commit *commit = pop_most_recent_commit(&list, SEEN);
|
|
|
|
|
2005-06-27 06:26:05 +08:00
|
|
|
p = process_tree(commit->tree, p, "");
|
2005-06-06 23:39:40 +08:00
|
|
|
if (process_commit(commit) == STOP)
|
2005-06-03 00:19:53 +08:00
|
|
|
break;
|
|
|
|
}
|
2005-06-30 02:30:24 +08:00
|
|
|
for (pending = pending_objects; pending; pending = pending->next) {
|
|
|
|
struct object *obj = pending->item;
|
|
|
|
const char *name = pending->name;
|
|
|
|
if (obj->flags & (UNINTERESTING | SEEN))
|
|
|
|
continue;
|
|
|
|
if (obj->type == tag_type) {
|
|
|
|
obj->flags |= SEEN;
|
|
|
|
p = add_object(obj, p, name);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (obj->type == tree_type) {
|
|
|
|
p = process_tree((struct tree *)obj, p, name);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (obj->type == blob_type) {
|
|
|
|
p = process_blob((struct blob *)obj, p, name);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
die("unknown pending object %s (%s)", sha1_to_hex(obj->sha1), name);
|
|
|
|
}
|
2005-06-25 13:56:58 +08:00
|
|
|
while (objects) {
|
2005-10-03 08:29:21 +08:00
|
|
|
/* An object with name "foo\n0000000000000000000000000000000000000000"
|
|
|
|
* can be used confuse downstream git-pack-objects very badly.
|
|
|
|
*/
|
|
|
|
const char *ep = strchr(objects->name, '\n');
|
|
|
|
if (ep) {
|
|
|
|
printf("%s %.*s\n", sha1_to_hex(objects->item->sha1),
|
|
|
|
(int) (ep - objects->name),
|
|
|
|
objects->name);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
printf("%s %s\n", sha1_to_hex(objects->item->sha1), objects->name);
|
2005-06-25 13:56:58 +08:00
|
|
|
objects = objects->next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mark_blob_uninteresting(struct blob *blob)
|
|
|
|
{
|
|
|
|
if (!blob_objects)
|
|
|
|
return;
|
|
|
|
if (blob->object.flags & UNINTERESTING)
|
|
|
|
return;
|
|
|
|
blob->object.flags |= UNINTERESTING;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mark_tree_uninteresting(struct tree *tree)
|
|
|
|
{
|
|
|
|
struct object *obj = &tree->object;
|
|
|
|
struct tree_entry_list *entry;
|
|
|
|
|
|
|
|
if (!tree_objects)
|
|
|
|
return;
|
|
|
|
if (obj->flags & UNINTERESTING)
|
|
|
|
return;
|
|
|
|
obj->flags |= UNINTERESTING;
|
2005-07-11 06:09:46 +08:00
|
|
|
if (!has_sha1_file(obj->sha1))
|
|
|
|
return;
|
2005-06-25 13:56:58 +08:00
|
|
|
if (parse_tree(tree) < 0)
|
|
|
|
die("bad tree %s", sha1_to_hex(obj->sha1));
|
|
|
|
entry = tree->entries;
|
2005-09-17 05:32:48 +08:00
|
|
|
tree->entries = NULL;
|
2005-06-25 13:56:58 +08:00
|
|
|
while (entry) {
|
2005-09-17 05:32:48 +08:00
|
|
|
struct tree_entry_list *next = entry->next;
|
2005-06-25 13:56:58 +08:00
|
|
|
if (entry->directory)
|
|
|
|
mark_tree_uninteresting(entry->item.tree);
|
|
|
|
else
|
|
|
|
mark_blob_uninteresting(entry->item.blob);
|
2005-09-17 05:32:48 +08:00
|
|
|
free(entry);
|
|
|
|
entry = next;
|
2005-06-25 13:56:58 +08:00
|
|
|
}
|
2005-06-03 00:19:53 +08:00
|
|
|
}
|
|
|
|
|
2005-05-31 09:46:32 +08:00
|
|
|
static void mark_parents_uninteresting(struct commit *commit)
|
|
|
|
{
|
|
|
|
struct commit_list *parents = commit->parents;
|
|
|
|
|
|
|
|
while (parents) {
|
|
|
|
struct commit *commit = parents->item;
|
|
|
|
commit->object.flags |= UNINTERESTING;
|
2005-07-11 06:09:46 +08:00
|
|
|
|
[PATCH] Fix interesting git-rev-list corner case
This corner-case was triggered by a kernel commit that was not in date
order, due to a misconfigured time zone that made the commit appear three
hours older than it was.
That caused git-rev-list to traverse the commit tree in a non-obvious
order, and made it parse several of the _parents_ of the misplaced commit
before it actually parsed the commit itself. That's fine, but it meant
that the grandparents of the commit didn't get marked uninteresting,
because they had been reached through an "interesting" branch.
The reason was that "mark_parents_uninteresting()" (which is supposed to
mark all existing parents as being uninteresting - duh) didn't actually
traverse more than one level down the parent chain.
NORMALLY this is fine, since with the date-based traversal order,
grandparents won't ever even have been looked at before their parents (so
traversing the chain down isn't needed, because the next time around when
we pick out the parent we'll mark _its_ parents uninteresting), but since
we'd gotten out of order, we'd already seen the parent and thus never got
around to mark the grandparents.
Anyway, the fix is simple. Just traverse parent chains recursively.
Normally the chain won't even exist (since the parent hasn't been parsed
yet), so this is not actually going to trigger except in this strange
corner-case.
Add a comment to the simple one-liner, since this was a bit subtle, and I
had to really think things through to understand how it could happen.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-07-30 06:50:30 +08:00
|
|
|
/*
|
|
|
|
* Normally we haven't parsed the parent
|
|
|
|
* yet, so we won't have a parent of a parent
|
|
|
|
* here. However, it may turn out that we've
|
|
|
|
* reached this commit some other way (where it
|
|
|
|
* wasn't uninteresting), in which case we need
|
|
|
|
* to mark its parents recursively too..
|
|
|
|
*/
|
|
|
|
if (commit->parents)
|
|
|
|
mark_parents_uninteresting(commit);
|
|
|
|
|
2005-07-11 06:09:46 +08:00
|
|
|
/*
|
|
|
|
* A missing commit is ok iff its parent is marked
|
|
|
|
* uninteresting.
|
|
|
|
*
|
|
|
|
* We just mark such a thing parsed, so that when
|
|
|
|
* it is popped next time around, we won't be trying
|
|
|
|
* to parse it and get an error.
|
|
|
|
*/
|
|
|
|
if (!has_sha1_file(commit->object.sha1))
|
|
|
|
commit->object.parsed = 1;
|
2005-05-31 09:46:32 +08:00
|
|
|
parents = parents->next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-07-24 01:01:49 +08:00
|
|
|
static int everybody_uninteresting(struct commit_list *orig)
|
2005-05-31 09:46:32 +08:00
|
|
|
{
|
2005-07-24 01:01:49 +08:00
|
|
|
struct commit_list *list = orig;
|
2005-05-31 09:46:32 +08:00
|
|
|
while (list) {
|
|
|
|
struct commit *commit = list->item;
|
|
|
|
list = list->next;
|
|
|
|
if (commit->object.flags & UNINTERESTING)
|
|
|
|
continue;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2005-06-18 13:54:50 +08:00
|
|
|
/*
|
|
|
|
* This is a truly stupid algorithm, but it's only
|
|
|
|
* used for bisection, and we just don't care enough.
|
|
|
|
*
|
|
|
|
* We care just barely enough to avoid recursing for
|
|
|
|
* non-merge entries.
|
|
|
|
*/
|
|
|
|
static int count_distance(struct commit_list *entry)
|
|
|
|
{
|
|
|
|
int nr = 0;
|
|
|
|
|
|
|
|
while (entry) {
|
|
|
|
struct commit *commit = entry->item;
|
|
|
|
struct commit_list *p;
|
|
|
|
|
|
|
|
if (commit->object.flags & (UNINTERESTING | COUNTED))
|
|
|
|
break;
|
2005-11-28 03:32:03 +08:00
|
|
|
if (!paths || (commit->object.flags & TREECHANGE))
|
|
|
|
nr++;
|
2005-06-18 13:54:50 +08:00
|
|
|
commit->object.flags |= COUNTED;
|
|
|
|
p = commit->parents;
|
|
|
|
entry = p;
|
|
|
|
if (p) {
|
|
|
|
p = p->next;
|
|
|
|
while (p) {
|
|
|
|
nr += count_distance(p);
|
|
|
|
p = p->next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2005-11-28 03:32:03 +08:00
|
|
|
|
2005-06-18 13:54:50 +08:00
|
|
|
return nr;
|
|
|
|
}
|
|
|
|
|
2005-06-19 11:02:49 +08:00
|
|
|
static void clear_distance(struct commit_list *list)
|
2005-06-18 13:54:50 +08:00
|
|
|
{
|
|
|
|
while (list) {
|
|
|
|
struct commit *commit = list->item;
|
|
|
|
commit->object.flags &= ~COUNTED;
|
|
|
|
list = list->next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct commit_list *find_bisection(struct commit_list *list)
|
|
|
|
{
|
|
|
|
int nr, closest;
|
|
|
|
struct commit_list *p, *best;
|
|
|
|
|
|
|
|
nr = 0;
|
|
|
|
p = list;
|
|
|
|
while (p) {
|
2005-11-28 03:32:03 +08:00
|
|
|
if (!paths || (p->item->object.flags & TREECHANGE))
|
|
|
|
nr++;
|
2005-06-18 13:54:50 +08:00
|
|
|
p = p->next;
|
|
|
|
}
|
|
|
|
closest = 0;
|
|
|
|
best = list;
|
|
|
|
|
2005-11-28 03:32:03 +08:00
|
|
|
for (p = list; p; p = p->next) {
|
|
|
|
int distance;
|
|
|
|
|
|
|
|
if (paths && !(p->item->object.flags & TREECHANGE))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
distance = count_distance(p);
|
2005-06-18 13:54:50 +08:00
|
|
|
clear_distance(list);
|
|
|
|
if (nr - distance < distance)
|
|
|
|
distance = nr - distance;
|
|
|
|
if (distance > closest) {
|
|
|
|
best = p;
|
|
|
|
closest = distance;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (best)
|
|
|
|
best->next = NULL;
|
|
|
|
return best;
|
|
|
|
}
|
|
|
|
|
2005-09-16 06:14:29 +08:00
|
|
|
static void mark_edges_uninteresting(struct commit_list *list)
|
|
|
|
{
|
|
|
|
for ( ; list; list = list->next) {
|
|
|
|
struct commit_list *parents = list->item->parents;
|
|
|
|
|
|
|
|
for ( ; parents; parents = parents->next) {
|
|
|
|
struct commit *commit = parents->item;
|
|
|
|
if (commit->object.flags & UNINTERESTING)
|
|
|
|
mark_tree_uninteresting(commit->tree);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
rev-list: stop when the file disappears
The one thing I've considered doing (I really should) is to add a "stop
when you don't find the file" option to "git-rev-list". This patch does
some of the work towards that: it removes the "parent" thing when the
file disappears, so a "git annotate" could do do something like
git-rev-list --remove-empty --parents HEAD -- "$filename"
and it would get a good graph that stops when the filename disappears
(it's not perfect though: it won't remove all the unintersting commits).
It also simplifies the logic of finding tree differences a bit, at the
cost of making it a tad less efficient.
The old logic was two-phase: it would first simplify _only_ merges tree as
it traversed the tree, and then simplify the linear parts of the remainder
independently. That was pretty optimal from an efficiency standpoint
because it avoids doing any comparisons that we can see are unnecessary,
but it made it much harder to understand than it really needed to be.
The new logic is a lot more straightforward, and compares the trees as it
traverses the graph (ie everything is a single phase). That makes it much
easier to stop graph traversal at any point where a file disappears.
As an example, let's say that you have a git repository that has had a
file called "A" some time in the past. That file gets renamed to B, and
then gets renamed back again to A. The old "git-rev-list" would show two
commits: the commit that renames B to A (because it changes A) _and_ as
its parent the commit that renames A to B (because it changes A).
With the new --remove-empty flag, git-rev-list will show just the commit
that renames B to A as the "root" commit, and stop traversal there
(because that's what you want for "annotate" - you want to stop there, and
for every "root" commit you then separately see if it really is a new
file, or if the paths history disappeared because it was renamed from some
other file).
With this patch, you should be able to basically do a "poor mans 'git
annotate'" with a fairly simple loop:
push("HEAD", "$filename")
while (revision,filename = pop()) {
for each i in $(git-rev-list --parents --remove-empty $revision -- "$filename")
pseudo-parents($i) = git-rev-list parents for that line
if (pseudo-parents($i) is non-empty) {
show diff of $i against pseudo-parents
continue
}
/* See if the _real_ parents of $i had a rename */
parent($i) = real-parent($i)
if (find-rename in $parent($i)->$i)
push $parent($i), "old-name"
}
which should be doable in perl or something (doing stacks in shell is just
too painful to be worth it, so I'm not going to do this).
Anybody want to try?
Linus
2006-01-19 06:47:30 +08:00
|
|
|
#define TREE_SAME 0
|
|
|
|
#define TREE_NEW 1
|
|
|
|
#define TREE_DIFFERENT 2
|
|
|
|
static int tree_difference = TREE_SAME;
|
2005-10-21 12:25:09 +08:00
|
|
|
|
|
|
|
static void file_add_remove(struct diff_options *options,
|
|
|
|
int addremove, unsigned mode,
|
|
|
|
const unsigned char *sha1,
|
|
|
|
const char *base, const char *path)
|
|
|
|
{
|
rev-list: stop when the file disappears
The one thing I've considered doing (I really should) is to add a "stop
when you don't find the file" option to "git-rev-list". This patch does
some of the work towards that: it removes the "parent" thing when the
file disappears, so a "git annotate" could do do something like
git-rev-list --remove-empty --parents HEAD -- "$filename"
and it would get a good graph that stops when the filename disappears
(it's not perfect though: it won't remove all the unintersting commits).
It also simplifies the logic of finding tree differences a bit, at the
cost of making it a tad less efficient.
The old logic was two-phase: it would first simplify _only_ merges tree as
it traversed the tree, and then simplify the linear parts of the remainder
independently. That was pretty optimal from an efficiency standpoint
because it avoids doing any comparisons that we can see are unnecessary,
but it made it much harder to understand than it really needed to be.
The new logic is a lot more straightforward, and compares the trees as it
traverses the graph (ie everything is a single phase). That makes it much
easier to stop graph traversal at any point where a file disappears.
As an example, let's say that you have a git repository that has had a
file called "A" some time in the past. That file gets renamed to B, and
then gets renamed back again to A. The old "git-rev-list" would show two
commits: the commit that renames B to A (because it changes A) _and_ as
its parent the commit that renames A to B (because it changes A).
With the new --remove-empty flag, git-rev-list will show just the commit
that renames B to A as the "root" commit, and stop traversal there
(because that's what you want for "annotate" - you want to stop there, and
for every "root" commit you then separately see if it really is a new
file, or if the paths history disappeared because it was renamed from some
other file).
With this patch, you should be able to basically do a "poor mans 'git
annotate'" with a fairly simple loop:
push("HEAD", "$filename")
while (revision,filename = pop()) {
for each i in $(git-rev-list --parents --remove-empty $revision -- "$filename")
pseudo-parents($i) = git-rev-list parents for that line
if (pseudo-parents($i) is non-empty) {
show diff of $i against pseudo-parents
continue
}
/* See if the _real_ parents of $i had a rename */
parent($i) = real-parent($i)
if (find-rename in $parent($i)->$i)
push $parent($i), "old-name"
}
which should be doable in perl or something (doing stacks in shell is just
too painful to be worth it, so I'm not going to do this).
Anybody want to try?
Linus
2006-01-19 06:47:30 +08:00
|
|
|
int diff = TREE_DIFFERENT;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Is it an add of a new file? It means that
|
|
|
|
* the old tree didn't have it at all, so we
|
|
|
|
* will turn "TREE_SAME" -> "TREE_NEW", but
|
|
|
|
* leave any "TREE_DIFFERENT" alone (and if
|
|
|
|
* it already was "TREE_NEW", we'll keep it
|
|
|
|
* "TREE_NEW" of course).
|
|
|
|
*/
|
|
|
|
if (addremove == '+') {
|
|
|
|
diff = tree_difference;
|
|
|
|
if (diff != TREE_SAME)
|
|
|
|
return;
|
|
|
|
diff = TREE_NEW;
|
|
|
|
}
|
|
|
|
tree_difference = diff;
|
2005-10-21 12:25:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void file_change(struct diff_options *options,
|
|
|
|
unsigned old_mode, unsigned new_mode,
|
|
|
|
const unsigned char *old_sha1,
|
|
|
|
const unsigned char *new_sha1,
|
|
|
|
const char *base, const char *path)
|
|
|
|
{
|
rev-list: stop when the file disappears
The one thing I've considered doing (I really should) is to add a "stop
when you don't find the file" option to "git-rev-list". This patch does
some of the work towards that: it removes the "parent" thing when the
file disappears, so a "git annotate" could do do something like
git-rev-list --remove-empty --parents HEAD -- "$filename"
and it would get a good graph that stops when the filename disappears
(it's not perfect though: it won't remove all the unintersting commits).
It also simplifies the logic of finding tree differences a bit, at the
cost of making it a tad less efficient.
The old logic was two-phase: it would first simplify _only_ merges tree as
it traversed the tree, and then simplify the linear parts of the remainder
independently. That was pretty optimal from an efficiency standpoint
because it avoids doing any comparisons that we can see are unnecessary,
but it made it much harder to understand than it really needed to be.
The new logic is a lot more straightforward, and compares the trees as it
traverses the graph (ie everything is a single phase). That makes it much
easier to stop graph traversal at any point where a file disappears.
As an example, let's say that you have a git repository that has had a
file called "A" some time in the past. That file gets renamed to B, and
then gets renamed back again to A. The old "git-rev-list" would show two
commits: the commit that renames B to A (because it changes A) _and_ as
its parent the commit that renames A to B (because it changes A).
With the new --remove-empty flag, git-rev-list will show just the commit
that renames B to A as the "root" commit, and stop traversal there
(because that's what you want for "annotate" - you want to stop there, and
for every "root" commit you then separately see if it really is a new
file, or if the paths history disappeared because it was renamed from some
other file).
With this patch, you should be able to basically do a "poor mans 'git
annotate'" with a fairly simple loop:
push("HEAD", "$filename")
while (revision,filename = pop()) {
for each i in $(git-rev-list --parents --remove-empty $revision -- "$filename")
pseudo-parents($i) = git-rev-list parents for that line
if (pseudo-parents($i) is non-empty) {
show diff of $i against pseudo-parents
continue
}
/* See if the _real_ parents of $i had a rename */
parent($i) = real-parent($i)
if (find-rename in $parent($i)->$i)
push $parent($i), "old-name"
}
which should be doable in perl or something (doing stacks in shell is just
too painful to be worth it, so I'm not going to do this).
Anybody want to try?
Linus
2006-01-19 06:47:30 +08:00
|
|
|
tree_difference = TREE_DIFFERENT;
|
2005-10-21 12:25:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static struct diff_options diff_opt = {
|
|
|
|
.recursive = 1,
|
|
|
|
.add_remove = file_add_remove,
|
|
|
|
.change = file_change,
|
|
|
|
};
|
|
|
|
|
rev-list: stop when the file disappears
The one thing I've considered doing (I really should) is to add a "stop
when you don't find the file" option to "git-rev-list". This patch does
some of the work towards that: it removes the "parent" thing when the
file disappears, so a "git annotate" could do do something like
git-rev-list --remove-empty --parents HEAD -- "$filename"
and it would get a good graph that stops when the filename disappears
(it's not perfect though: it won't remove all the unintersting commits).
It also simplifies the logic of finding tree differences a bit, at the
cost of making it a tad less efficient.
The old logic was two-phase: it would first simplify _only_ merges tree as
it traversed the tree, and then simplify the linear parts of the remainder
independently. That was pretty optimal from an efficiency standpoint
because it avoids doing any comparisons that we can see are unnecessary,
but it made it much harder to understand than it really needed to be.
The new logic is a lot more straightforward, and compares the trees as it
traverses the graph (ie everything is a single phase). That makes it much
easier to stop graph traversal at any point where a file disappears.
As an example, let's say that you have a git repository that has had a
file called "A" some time in the past. That file gets renamed to B, and
then gets renamed back again to A. The old "git-rev-list" would show two
commits: the commit that renames B to A (because it changes A) _and_ as
its parent the commit that renames A to B (because it changes A).
With the new --remove-empty flag, git-rev-list will show just the commit
that renames B to A as the "root" commit, and stop traversal there
(because that's what you want for "annotate" - you want to stop there, and
for every "root" commit you then separately see if it really is a new
file, or if the paths history disappeared because it was renamed from some
other file).
With this patch, you should be able to basically do a "poor mans 'git
annotate'" with a fairly simple loop:
push("HEAD", "$filename")
while (revision,filename = pop()) {
for each i in $(git-rev-list --parents --remove-empty $revision -- "$filename")
pseudo-parents($i) = git-rev-list parents for that line
if (pseudo-parents($i) is non-empty) {
show diff of $i against pseudo-parents
continue
}
/* See if the _real_ parents of $i had a rename */
parent($i) = real-parent($i)
if (find-rename in $parent($i)->$i)
push $parent($i), "old-name"
}
which should be doable in perl or something (doing stacks in shell is just
too painful to be worth it, so I'm not going to do this).
Anybody want to try?
Linus
2006-01-19 06:47:30 +08:00
|
|
|
static int compare_tree(struct tree *t1, struct tree *t2)
|
2005-10-22 07:40:54 +08:00
|
|
|
{
|
rev-list: stop when the file disappears
The one thing I've considered doing (I really should) is to add a "stop
when you don't find the file" option to "git-rev-list". This patch does
some of the work towards that: it removes the "parent" thing when the
file disappears, so a "git annotate" could do do something like
git-rev-list --remove-empty --parents HEAD -- "$filename"
and it would get a good graph that stops when the filename disappears
(it's not perfect though: it won't remove all the unintersting commits).
It also simplifies the logic of finding tree differences a bit, at the
cost of making it a tad less efficient.
The old logic was two-phase: it would first simplify _only_ merges tree as
it traversed the tree, and then simplify the linear parts of the remainder
independently. That was pretty optimal from an efficiency standpoint
because it avoids doing any comparisons that we can see are unnecessary,
but it made it much harder to understand than it really needed to be.
The new logic is a lot more straightforward, and compares the trees as it
traverses the graph (ie everything is a single phase). That makes it much
easier to stop graph traversal at any point where a file disappears.
As an example, let's say that you have a git repository that has had a
file called "A" some time in the past. That file gets renamed to B, and
then gets renamed back again to A. The old "git-rev-list" would show two
commits: the commit that renames B to A (because it changes A) _and_ as
its parent the commit that renames A to B (because it changes A).
With the new --remove-empty flag, git-rev-list will show just the commit
that renames B to A as the "root" commit, and stop traversal there
(because that's what you want for "annotate" - you want to stop there, and
for every "root" commit you then separately see if it really is a new
file, or if the paths history disappeared because it was renamed from some
other file).
With this patch, you should be able to basically do a "poor mans 'git
annotate'" with a fairly simple loop:
push("HEAD", "$filename")
while (revision,filename = pop()) {
for each i in $(git-rev-list --parents --remove-empty $revision -- "$filename")
pseudo-parents($i) = git-rev-list parents for that line
if (pseudo-parents($i) is non-empty) {
show diff of $i against pseudo-parents
continue
}
/* See if the _real_ parents of $i had a rename */
parent($i) = real-parent($i)
if (find-rename in $parent($i)->$i)
push $parent($i), "old-name"
}
which should be doable in perl or something (doing stacks in shell is just
too painful to be worth it, so I'm not going to do this).
Anybody want to try?
Linus
2006-01-19 06:47:30 +08:00
|
|
|
if (!t1)
|
|
|
|
return TREE_NEW;
|
|
|
|
if (!t2)
|
|
|
|
return TREE_DIFFERENT;
|
|
|
|
tree_difference = TREE_SAME;
|
2005-10-22 07:40:54 +08:00
|
|
|
if (diff_tree_sha1(t1->object.sha1, t2->object.sha1, "", &diff_opt) < 0)
|
rev-list: stop when the file disappears
The one thing I've considered doing (I really should) is to add a "stop
when you don't find the file" option to "git-rev-list". This patch does
some of the work towards that: it removes the "parent" thing when the
file disappears, so a "git annotate" could do do something like
git-rev-list --remove-empty --parents HEAD -- "$filename"
and it would get a good graph that stops when the filename disappears
(it's not perfect though: it won't remove all the unintersting commits).
It also simplifies the logic of finding tree differences a bit, at the
cost of making it a tad less efficient.
The old logic was two-phase: it would first simplify _only_ merges tree as
it traversed the tree, and then simplify the linear parts of the remainder
independently. That was pretty optimal from an efficiency standpoint
because it avoids doing any comparisons that we can see are unnecessary,
but it made it much harder to understand than it really needed to be.
The new logic is a lot more straightforward, and compares the trees as it
traverses the graph (ie everything is a single phase). That makes it much
easier to stop graph traversal at any point where a file disappears.
As an example, let's say that you have a git repository that has had a
file called "A" some time in the past. That file gets renamed to B, and
then gets renamed back again to A. The old "git-rev-list" would show two
commits: the commit that renames B to A (because it changes A) _and_ as
its parent the commit that renames A to B (because it changes A).
With the new --remove-empty flag, git-rev-list will show just the commit
that renames B to A as the "root" commit, and stop traversal there
(because that's what you want for "annotate" - you want to stop there, and
for every "root" commit you then separately see if it really is a new
file, or if the paths history disappeared because it was renamed from some
other file).
With this patch, you should be able to basically do a "poor mans 'git
annotate'" with a fairly simple loop:
push("HEAD", "$filename")
while (revision,filename = pop()) {
for each i in $(git-rev-list --parents --remove-empty $revision -- "$filename")
pseudo-parents($i) = git-rev-list parents for that line
if (pseudo-parents($i) is non-empty) {
show diff of $i against pseudo-parents
continue
}
/* See if the _real_ parents of $i had a rename */
parent($i) = real-parent($i)
if (find-rename in $parent($i)->$i)
push $parent($i), "old-name"
}
which should be doable in perl or something (doing stacks in shell is just
too painful to be worth it, so I'm not going to do this).
Anybody want to try?
Linus
2006-01-19 06:47:30 +08:00
|
|
|
return TREE_DIFFERENT;
|
|
|
|
return tree_difference;
|
2005-10-22 07:40:54 +08:00
|
|
|
}
|
|
|
|
|
2005-10-26 02:50:46 +08:00
|
|
|
static int same_tree_as_empty(struct tree *t1)
|
|
|
|
{
|
|
|
|
int retval;
|
|
|
|
void *tree;
|
|
|
|
struct tree_desc empty, real;
|
|
|
|
|
|
|
|
if (!t1)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
tree = read_object_with_reference(t1->object.sha1, "tree", &real.size, NULL);
|
|
|
|
if (!tree)
|
|
|
|
return 0;
|
|
|
|
real.buf = tree;
|
|
|
|
|
|
|
|
empty.buf = "";
|
|
|
|
empty.size = 0;
|
|
|
|
|
rev-list: stop when the file disappears
The one thing I've considered doing (I really should) is to add a "stop
when you don't find the file" option to "git-rev-list". This patch does
some of the work towards that: it removes the "parent" thing when the
file disappears, so a "git annotate" could do do something like
git-rev-list --remove-empty --parents HEAD -- "$filename"
and it would get a good graph that stops when the filename disappears
(it's not perfect though: it won't remove all the unintersting commits).
It also simplifies the logic of finding tree differences a bit, at the
cost of making it a tad less efficient.
The old logic was two-phase: it would first simplify _only_ merges tree as
it traversed the tree, and then simplify the linear parts of the remainder
independently. That was pretty optimal from an efficiency standpoint
because it avoids doing any comparisons that we can see are unnecessary,
but it made it much harder to understand than it really needed to be.
The new logic is a lot more straightforward, and compares the trees as it
traverses the graph (ie everything is a single phase). That makes it much
easier to stop graph traversal at any point where a file disappears.
As an example, let's say that you have a git repository that has had a
file called "A" some time in the past. That file gets renamed to B, and
then gets renamed back again to A. The old "git-rev-list" would show two
commits: the commit that renames B to A (because it changes A) _and_ as
its parent the commit that renames A to B (because it changes A).
With the new --remove-empty flag, git-rev-list will show just the commit
that renames B to A as the "root" commit, and stop traversal there
(because that's what you want for "annotate" - you want to stop there, and
for every "root" commit you then separately see if it really is a new
file, or if the paths history disappeared because it was renamed from some
other file).
With this patch, you should be able to basically do a "poor mans 'git
annotate'" with a fairly simple loop:
push("HEAD", "$filename")
while (revision,filename = pop()) {
for each i in $(git-rev-list --parents --remove-empty $revision -- "$filename")
pseudo-parents($i) = git-rev-list parents for that line
if (pseudo-parents($i) is non-empty) {
show diff of $i against pseudo-parents
continue
}
/* See if the _real_ parents of $i had a rename */
parent($i) = real-parent($i)
if (find-rename in $parent($i)->$i)
push $parent($i), "old-name"
}
which should be doable in perl or something (doing stacks in shell is just
too painful to be worth it, so I'm not going to do this).
Anybody want to try?
Linus
2006-01-19 06:47:30 +08:00
|
|
|
tree_difference = 0;
|
2005-10-26 02:50:46 +08:00
|
|
|
retval = diff_tree(&empty, &real, "", &diff_opt);
|
|
|
|
free(tree);
|
|
|
|
|
rev-list: stop when the file disappears
The one thing I've considered doing (I really should) is to add a "stop
when you don't find the file" option to "git-rev-list". This patch does
some of the work towards that: it removes the "parent" thing when the
file disappears, so a "git annotate" could do do something like
git-rev-list --remove-empty --parents HEAD -- "$filename"
and it would get a good graph that stops when the filename disappears
(it's not perfect though: it won't remove all the unintersting commits).
It also simplifies the logic of finding tree differences a bit, at the
cost of making it a tad less efficient.
The old logic was two-phase: it would first simplify _only_ merges tree as
it traversed the tree, and then simplify the linear parts of the remainder
independently. That was pretty optimal from an efficiency standpoint
because it avoids doing any comparisons that we can see are unnecessary,
but it made it much harder to understand than it really needed to be.
The new logic is a lot more straightforward, and compares the trees as it
traverses the graph (ie everything is a single phase). That makes it much
easier to stop graph traversal at any point where a file disappears.
As an example, let's say that you have a git repository that has had a
file called "A" some time in the past. That file gets renamed to B, and
then gets renamed back again to A. The old "git-rev-list" would show two
commits: the commit that renames B to A (because it changes A) _and_ as
its parent the commit that renames A to B (because it changes A).
With the new --remove-empty flag, git-rev-list will show just the commit
that renames B to A as the "root" commit, and stop traversal there
(because that's what you want for "annotate" - you want to stop there, and
for every "root" commit you then separately see if it really is a new
file, or if the paths history disappeared because it was renamed from some
other file).
With this patch, you should be able to basically do a "poor mans 'git
annotate'" with a fairly simple loop:
push("HEAD", "$filename")
while (revision,filename = pop()) {
for each i in $(git-rev-list --parents --remove-empty $revision -- "$filename")
pseudo-parents($i) = git-rev-list parents for that line
if (pseudo-parents($i) is non-empty) {
show diff of $i against pseudo-parents
continue
}
/* See if the _real_ parents of $i had a rename */
parent($i) = real-parent($i)
if (find-rename in $parent($i)->$i)
push $parent($i), "old-name"
}
which should be doable in perl or something (doing stacks in shell is just
too painful to be worth it, so I'm not going to do this).
Anybody want to try?
Linus
2006-01-19 06:47:30 +08:00
|
|
|
return retval >= 0 && !tree_difference;
|
2005-10-26 02:50:46 +08:00
|
|
|
}
|
|
|
|
|
rev-list: stop when the file disappears
The one thing I've considered doing (I really should) is to add a "stop
when you don't find the file" option to "git-rev-list". This patch does
some of the work towards that: it removes the "parent" thing when the
file disappears, so a "git annotate" could do do something like
git-rev-list --remove-empty --parents HEAD -- "$filename"
and it would get a good graph that stops when the filename disappears
(it's not perfect though: it won't remove all the unintersting commits).
It also simplifies the logic of finding tree differences a bit, at the
cost of making it a tad less efficient.
The old logic was two-phase: it would first simplify _only_ merges tree as
it traversed the tree, and then simplify the linear parts of the remainder
independently. That was pretty optimal from an efficiency standpoint
because it avoids doing any comparisons that we can see are unnecessary,
but it made it much harder to understand than it really needed to be.
The new logic is a lot more straightforward, and compares the trees as it
traverses the graph (ie everything is a single phase). That makes it much
easier to stop graph traversal at any point where a file disappears.
As an example, let's say that you have a git repository that has had a
file called "A" some time in the past. That file gets renamed to B, and
then gets renamed back again to A. The old "git-rev-list" would show two
commits: the commit that renames B to A (because it changes A) _and_ as
its parent the commit that renames A to B (because it changes A).
With the new --remove-empty flag, git-rev-list will show just the commit
that renames B to A as the "root" commit, and stop traversal there
(because that's what you want for "annotate" - you want to stop there, and
for every "root" commit you then separately see if it really is a new
file, or if the paths history disappeared because it was renamed from some
other file).
With this patch, you should be able to basically do a "poor mans 'git
annotate'" with a fairly simple loop:
push("HEAD", "$filename")
while (revision,filename = pop()) {
for each i in $(git-rev-list --parents --remove-empty $revision -- "$filename")
pseudo-parents($i) = git-rev-list parents for that line
if (pseudo-parents($i) is non-empty) {
show diff of $i against pseudo-parents
continue
}
/* See if the _real_ parents of $i had a rename */
parent($i) = real-parent($i)
if (find-rename in $parent($i)->$i)
push $parent($i), "old-name"
}
which should be doable in perl or something (doing stacks in shell is just
too painful to be worth it, so I'm not going to do this).
Anybody want to try?
Linus
2006-01-19 06:47:30 +08:00
|
|
|
static void try_to_simplify_commit(struct commit *commit)
|
2005-10-21 12:25:09 +08:00
|
|
|
{
|
rev-list: stop when the file disappears
The one thing I've considered doing (I really should) is to add a "stop
when you don't find the file" option to "git-rev-list". This patch does
some of the work towards that: it removes the "parent" thing when the
file disappears, so a "git annotate" could do do something like
git-rev-list --remove-empty --parents HEAD -- "$filename"
and it would get a good graph that stops when the filename disappears
(it's not perfect though: it won't remove all the unintersting commits).
It also simplifies the logic of finding tree differences a bit, at the
cost of making it a tad less efficient.
The old logic was two-phase: it would first simplify _only_ merges tree as
it traversed the tree, and then simplify the linear parts of the remainder
independently. That was pretty optimal from an efficiency standpoint
because it avoids doing any comparisons that we can see are unnecessary,
but it made it much harder to understand than it really needed to be.
The new logic is a lot more straightforward, and compares the trees as it
traverses the graph (ie everything is a single phase). That makes it much
easier to stop graph traversal at any point where a file disappears.
As an example, let's say that you have a git repository that has had a
file called "A" some time in the past. That file gets renamed to B, and
then gets renamed back again to A. The old "git-rev-list" would show two
commits: the commit that renames B to A (because it changes A) _and_ as
its parent the commit that renames A to B (because it changes A).
With the new --remove-empty flag, git-rev-list will show just the commit
that renames B to A as the "root" commit, and stop traversal there
(because that's what you want for "annotate" - you want to stop there, and
for every "root" commit you then separately see if it really is a new
file, or if the paths history disappeared because it was renamed from some
other file).
With this patch, you should be able to basically do a "poor mans 'git
annotate'" with a fairly simple loop:
push("HEAD", "$filename")
while (revision,filename = pop()) {
for each i in $(git-rev-list --parents --remove-empty $revision -- "$filename")
pseudo-parents($i) = git-rev-list parents for that line
if (pseudo-parents($i) is non-empty) {
show diff of $i against pseudo-parents
continue
}
/* See if the _real_ parents of $i had a rename */
parent($i) = real-parent($i)
if (find-rename in $parent($i)->$i)
push $parent($i), "old-name"
}
which should be doable in perl or something (doing stacks in shell is just
too painful to be worth it, so I'm not going to do this).
Anybody want to try?
Linus
2006-01-19 06:47:30 +08:00
|
|
|
struct commit_list **pp, *parent;
|
|
|
|
|
2005-10-21 12:25:09 +08:00
|
|
|
if (!commit->tree)
|
rev-list: stop when the file disappears
The one thing I've considered doing (I really should) is to add a "stop
when you don't find the file" option to "git-rev-list". This patch does
some of the work towards that: it removes the "parent" thing when the
file disappears, so a "git annotate" could do do something like
git-rev-list --remove-empty --parents HEAD -- "$filename"
and it would get a good graph that stops when the filename disappears
(it's not perfect though: it won't remove all the unintersting commits).
It also simplifies the logic of finding tree differences a bit, at the
cost of making it a tad less efficient.
The old logic was two-phase: it would first simplify _only_ merges tree as
it traversed the tree, and then simplify the linear parts of the remainder
independently. That was pretty optimal from an efficiency standpoint
because it avoids doing any comparisons that we can see are unnecessary,
but it made it much harder to understand than it really needed to be.
The new logic is a lot more straightforward, and compares the trees as it
traverses the graph (ie everything is a single phase). That makes it much
easier to stop graph traversal at any point where a file disappears.
As an example, let's say that you have a git repository that has had a
file called "A" some time in the past. That file gets renamed to B, and
then gets renamed back again to A. The old "git-rev-list" would show two
commits: the commit that renames B to A (because it changes A) _and_ as
its parent the commit that renames A to B (because it changes A).
With the new --remove-empty flag, git-rev-list will show just the commit
that renames B to A as the "root" commit, and stop traversal there
(because that's what you want for "annotate" - you want to stop there, and
for every "root" commit you then separately see if it really is a new
file, or if the paths history disappeared because it was renamed from some
other file).
With this patch, you should be able to basically do a "poor mans 'git
annotate'" with a fairly simple loop:
push("HEAD", "$filename")
while (revision,filename = pop()) {
for each i in $(git-rev-list --parents --remove-empty $revision -- "$filename")
pseudo-parents($i) = git-rev-list parents for that line
if (pseudo-parents($i) is non-empty) {
show diff of $i against pseudo-parents
continue
}
/* See if the _real_ parents of $i had a rename */
parent($i) = real-parent($i)
if (find-rename in $parent($i)->$i)
push $parent($i), "old-name"
}
which should be doable in perl or something (doing stacks in shell is just
too painful to be worth it, so I'm not going to do this).
Anybody want to try?
Linus
2006-01-19 06:47:30 +08:00
|
|
|
return;
|
2005-10-21 12:25:09 +08:00
|
|
|
|
rev-list: stop when the file disappears
The one thing I've considered doing (I really should) is to add a "stop
when you don't find the file" option to "git-rev-list". This patch does
some of the work towards that: it removes the "parent" thing when the
file disappears, so a "git annotate" could do do something like
git-rev-list --remove-empty --parents HEAD -- "$filename"
and it would get a good graph that stops when the filename disappears
(it's not perfect though: it won't remove all the unintersting commits).
It also simplifies the logic of finding tree differences a bit, at the
cost of making it a tad less efficient.
The old logic was two-phase: it would first simplify _only_ merges tree as
it traversed the tree, and then simplify the linear parts of the remainder
independently. That was pretty optimal from an efficiency standpoint
because it avoids doing any comparisons that we can see are unnecessary,
but it made it much harder to understand than it really needed to be.
The new logic is a lot more straightforward, and compares the trees as it
traverses the graph (ie everything is a single phase). That makes it much
easier to stop graph traversal at any point where a file disappears.
As an example, let's say that you have a git repository that has had a
file called "A" some time in the past. That file gets renamed to B, and
then gets renamed back again to A. The old "git-rev-list" would show two
commits: the commit that renames B to A (because it changes A) _and_ as
its parent the commit that renames A to B (because it changes A).
With the new --remove-empty flag, git-rev-list will show just the commit
that renames B to A as the "root" commit, and stop traversal there
(because that's what you want for "annotate" - you want to stop there, and
for every "root" commit you then separately see if it really is a new
file, or if the paths history disappeared because it was renamed from some
other file).
With this patch, you should be able to basically do a "poor mans 'git
annotate'" with a fairly simple loop:
push("HEAD", "$filename")
while (revision,filename = pop()) {
for each i in $(git-rev-list --parents --remove-empty $revision -- "$filename")
pseudo-parents($i) = git-rev-list parents for that line
if (pseudo-parents($i) is non-empty) {
show diff of $i against pseudo-parents
continue
}
/* See if the _real_ parents of $i had a rename */
parent($i) = real-parent($i)
if (find-rename in $parent($i)->$i)
push $parent($i), "old-name"
}
which should be doable in perl or something (doing stacks in shell is just
too painful to be worth it, so I'm not going to do this).
Anybody want to try?
Linus
2006-01-19 06:47:30 +08:00
|
|
|
if (!commit->parents) {
|
|
|
|
if (!same_tree_as_empty(commit->tree))
|
|
|
|
commit->object.flags |= TREECHANGE;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
pp = &commit->parents;
|
|
|
|
while ((parent = *pp) != NULL) {
|
2005-10-21 12:25:09 +08:00
|
|
|
struct commit *p = parent->item;
|
rev-list: stop when the file disappears
The one thing I've considered doing (I really should) is to add a "stop
when you don't find the file" option to "git-rev-list". This patch does
some of the work towards that: it removes the "parent" thing when the
file disappears, so a "git annotate" could do do something like
git-rev-list --remove-empty --parents HEAD -- "$filename"
and it would get a good graph that stops when the filename disappears
(it's not perfect though: it won't remove all the unintersting commits).
It also simplifies the logic of finding tree differences a bit, at the
cost of making it a tad less efficient.
The old logic was two-phase: it would first simplify _only_ merges tree as
it traversed the tree, and then simplify the linear parts of the remainder
independently. That was pretty optimal from an efficiency standpoint
because it avoids doing any comparisons that we can see are unnecessary,
but it made it much harder to understand than it really needed to be.
The new logic is a lot more straightforward, and compares the trees as it
traverses the graph (ie everything is a single phase). That makes it much
easier to stop graph traversal at any point where a file disappears.
As an example, let's say that you have a git repository that has had a
file called "A" some time in the past. That file gets renamed to B, and
then gets renamed back again to A. The old "git-rev-list" would show two
commits: the commit that renames B to A (because it changes A) _and_ as
its parent the commit that renames A to B (because it changes A).
With the new --remove-empty flag, git-rev-list will show just the commit
that renames B to A as the "root" commit, and stop traversal there
(because that's what you want for "annotate" - you want to stop there, and
for every "root" commit you then separately see if it really is a new
file, or if the paths history disappeared because it was renamed from some
other file).
With this patch, you should be able to basically do a "poor mans 'git
annotate'" with a fairly simple loop:
push("HEAD", "$filename")
while (revision,filename = pop()) {
for each i in $(git-rev-list --parents --remove-empty $revision -- "$filename")
pseudo-parents($i) = git-rev-list parents for that line
if (pseudo-parents($i) is non-empty) {
show diff of $i against pseudo-parents
continue
}
/* See if the _real_ parents of $i had a rename */
parent($i) = real-parent($i)
if (find-rename in $parent($i)->$i)
push $parent($i), "old-name"
}
which should be doable in perl or something (doing stacks in shell is just
too painful to be worth it, so I'm not going to do this).
Anybody want to try?
Linus
2006-01-19 06:47:30 +08:00
|
|
|
|
|
|
|
if (p->object.flags & UNINTERESTING) {
|
|
|
|
pp = &parent->next;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2005-10-21 12:25:09 +08:00
|
|
|
parse_commit(p);
|
rev-list: stop when the file disappears
The one thing I've considered doing (I really should) is to add a "stop
when you don't find the file" option to "git-rev-list". This patch does
some of the work towards that: it removes the "parent" thing when the
file disappears, so a "git annotate" could do do something like
git-rev-list --remove-empty --parents HEAD -- "$filename"
and it would get a good graph that stops when the filename disappears
(it's not perfect though: it won't remove all the unintersting commits).
It also simplifies the logic of finding tree differences a bit, at the
cost of making it a tad less efficient.
The old logic was two-phase: it would first simplify _only_ merges tree as
it traversed the tree, and then simplify the linear parts of the remainder
independently. That was pretty optimal from an efficiency standpoint
because it avoids doing any comparisons that we can see are unnecessary,
but it made it much harder to understand than it really needed to be.
The new logic is a lot more straightforward, and compares the trees as it
traverses the graph (ie everything is a single phase). That makes it much
easier to stop graph traversal at any point where a file disappears.
As an example, let's say that you have a git repository that has had a
file called "A" some time in the past. That file gets renamed to B, and
then gets renamed back again to A. The old "git-rev-list" would show two
commits: the commit that renames B to A (because it changes A) _and_ as
its parent the commit that renames A to B (because it changes A).
With the new --remove-empty flag, git-rev-list will show just the commit
that renames B to A as the "root" commit, and stop traversal there
(because that's what you want for "annotate" - you want to stop there, and
for every "root" commit you then separately see if it really is a new
file, or if the paths history disappeared because it was renamed from some
other file).
With this patch, you should be able to basically do a "poor mans 'git
annotate'" with a fairly simple loop:
push("HEAD", "$filename")
while (revision,filename = pop()) {
for each i in $(git-rev-list --parents --remove-empty $revision -- "$filename")
pseudo-parents($i) = git-rev-list parents for that line
if (pseudo-parents($i) is non-empty) {
show diff of $i against pseudo-parents
continue
}
/* See if the _real_ parents of $i had a rename */
parent($i) = real-parent($i)
if (find-rename in $parent($i)->$i)
push $parent($i), "old-name"
}
which should be doable in perl or something (doing stacks in shell is just
too painful to be worth it, so I'm not going to do this).
Anybody want to try?
Linus
2006-01-19 06:47:30 +08:00
|
|
|
switch (compare_tree(p->tree, commit->tree)) {
|
|
|
|
case TREE_SAME:
|
|
|
|
parent->next = NULL;
|
|
|
|
commit->parents = parent;
|
|
|
|
return;
|
|
|
|
|
|
|
|
case TREE_NEW:
|
|
|
|
if (remove_empty_trees && same_tree_as_empty(p->tree)) {
|
|
|
|
*pp = parent->next;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
/* fallthrough */
|
|
|
|
case TREE_DIFFERENT:
|
|
|
|
pp = &parent->next;
|
2005-10-21 12:25:09 +08:00
|
|
|
continue;
|
rev-list: stop when the file disappears
The one thing I've considered doing (I really should) is to add a "stop
when you don't find the file" option to "git-rev-list". This patch does
some of the work towards that: it removes the "parent" thing when the
file disappears, so a "git annotate" could do do something like
git-rev-list --remove-empty --parents HEAD -- "$filename"
and it would get a good graph that stops when the filename disappears
(it's not perfect though: it won't remove all the unintersting commits).
It also simplifies the logic of finding tree differences a bit, at the
cost of making it a tad less efficient.
The old logic was two-phase: it would first simplify _only_ merges tree as
it traversed the tree, and then simplify the linear parts of the remainder
independently. That was pretty optimal from an efficiency standpoint
because it avoids doing any comparisons that we can see are unnecessary,
but it made it much harder to understand than it really needed to be.
The new logic is a lot more straightforward, and compares the trees as it
traverses the graph (ie everything is a single phase). That makes it much
easier to stop graph traversal at any point where a file disappears.
As an example, let's say that you have a git repository that has had a
file called "A" some time in the past. That file gets renamed to B, and
then gets renamed back again to A. The old "git-rev-list" would show two
commits: the commit that renames B to A (because it changes A) _and_ as
its parent the commit that renames A to B (because it changes A).
With the new --remove-empty flag, git-rev-list will show just the commit
that renames B to A as the "root" commit, and stop traversal there
(because that's what you want for "annotate" - you want to stop there, and
for every "root" commit you then separately see if it really is a new
file, or if the paths history disappeared because it was renamed from some
other file).
With this patch, you should be able to basically do a "poor mans 'git
annotate'" with a fairly simple loop:
push("HEAD", "$filename")
while (revision,filename = pop()) {
for each i in $(git-rev-list --parents --remove-empty $revision -- "$filename")
pseudo-parents($i) = git-rev-list parents for that line
if (pseudo-parents($i) is non-empty) {
show diff of $i against pseudo-parents
continue
}
/* See if the _real_ parents of $i had a rename */
parent($i) = real-parent($i)
if (find-rename in $parent($i)->$i)
push $parent($i), "old-name"
}
which should be doable in perl or something (doing stacks in shell is just
too painful to be worth it, so I'm not going to do this).
Anybody want to try?
Linus
2006-01-19 06:47:30 +08:00
|
|
|
}
|
|
|
|
die("bad tree compare for commit %s", sha1_to_hex(commit->object.sha1));
|
2005-10-21 12:25:09 +08:00
|
|
|
}
|
rev-list: stop when the file disappears
The one thing I've considered doing (I really should) is to add a "stop
when you don't find the file" option to "git-rev-list". This patch does
some of the work towards that: it removes the "parent" thing when the
file disappears, so a "git annotate" could do do something like
git-rev-list --remove-empty --parents HEAD -- "$filename"
and it would get a good graph that stops when the filename disappears
(it's not perfect though: it won't remove all the unintersting commits).
It also simplifies the logic of finding tree differences a bit, at the
cost of making it a tad less efficient.
The old logic was two-phase: it would first simplify _only_ merges tree as
it traversed the tree, and then simplify the linear parts of the remainder
independently. That was pretty optimal from an efficiency standpoint
because it avoids doing any comparisons that we can see are unnecessary,
but it made it much harder to understand than it really needed to be.
The new logic is a lot more straightforward, and compares the trees as it
traverses the graph (ie everything is a single phase). That makes it much
easier to stop graph traversal at any point where a file disappears.
As an example, let's say that you have a git repository that has had a
file called "A" some time in the past. That file gets renamed to B, and
then gets renamed back again to A. The old "git-rev-list" would show two
commits: the commit that renames B to A (because it changes A) _and_ as
its parent the commit that renames A to B (because it changes A).
With the new --remove-empty flag, git-rev-list will show just the commit
that renames B to A as the "root" commit, and stop traversal there
(because that's what you want for "annotate" - you want to stop there, and
for every "root" commit you then separately see if it really is a new
file, or if the paths history disappeared because it was renamed from some
other file).
With this patch, you should be able to basically do a "poor mans 'git
annotate'" with a fairly simple loop:
push("HEAD", "$filename")
while (revision,filename = pop()) {
for each i in $(git-rev-list --parents --remove-empty $revision -- "$filename")
pseudo-parents($i) = git-rev-list parents for that line
if (pseudo-parents($i) is non-empty) {
show diff of $i against pseudo-parents
continue
}
/* See if the _real_ parents of $i had a rename */
parent($i) = real-parent($i)
if (find-rename in $parent($i)->$i)
push $parent($i), "old-name"
}
which should be doable in perl or something (doing stacks in shell is just
too painful to be worth it, so I'm not going to do this).
Anybody want to try?
Linus
2006-01-19 06:47:30 +08:00
|
|
|
commit->object.flags |= TREECHANGE;
|
2005-10-21 12:25:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void add_parents_to_list(struct commit *commit, struct commit_list **list)
|
|
|
|
{
|
|
|
|
struct commit_list *parent = commit->parents;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If the commit is uninteresting, don't try to
|
|
|
|
* prune parents - we want the maximal uninteresting
|
|
|
|
* set.
|
|
|
|
*
|
|
|
|
* Normally we haven't parsed the parent
|
|
|
|
* yet, so we won't have a parent of a parent
|
|
|
|
* here. However, it may turn out that we've
|
|
|
|
* reached this commit some other way (where it
|
|
|
|
* wasn't uninteresting), in which case we need
|
|
|
|
* to mark its parents recursively too..
|
|
|
|
*/
|
|
|
|
if (commit->object.flags & UNINTERESTING) {
|
|
|
|
while (parent) {
|
|
|
|
struct commit *p = parent->item;
|
|
|
|
parent = parent->next;
|
|
|
|
parse_commit(p);
|
|
|
|
p->object.flags |= UNINTERESTING;
|
|
|
|
if (p->parents)
|
|
|
|
mark_parents_uninteresting(p);
|
|
|
|
if (p->object.flags & SEEN)
|
|
|
|
continue;
|
|
|
|
p->object.flags |= SEEN;
|
|
|
|
insert_by_date(p, list);
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
rev-list: stop when the file disappears
The one thing I've considered doing (I really should) is to add a "stop
when you don't find the file" option to "git-rev-list". This patch does
some of the work towards that: it removes the "parent" thing when the
file disappears, so a "git annotate" could do do something like
git-rev-list --remove-empty --parents HEAD -- "$filename"
and it would get a good graph that stops when the filename disappears
(it's not perfect though: it won't remove all the unintersting commits).
It also simplifies the logic of finding tree differences a bit, at the
cost of making it a tad less efficient.
The old logic was two-phase: it would first simplify _only_ merges tree as
it traversed the tree, and then simplify the linear parts of the remainder
independently. That was pretty optimal from an efficiency standpoint
because it avoids doing any comparisons that we can see are unnecessary,
but it made it much harder to understand than it really needed to be.
The new logic is a lot more straightforward, and compares the trees as it
traverses the graph (ie everything is a single phase). That makes it much
easier to stop graph traversal at any point where a file disappears.
As an example, let's say that you have a git repository that has had a
file called "A" some time in the past. That file gets renamed to B, and
then gets renamed back again to A. The old "git-rev-list" would show two
commits: the commit that renames B to A (because it changes A) _and_ as
its parent the commit that renames A to B (because it changes A).
With the new --remove-empty flag, git-rev-list will show just the commit
that renames B to A as the "root" commit, and stop traversal there
(because that's what you want for "annotate" - you want to stop there, and
for every "root" commit you then separately see if it really is a new
file, or if the paths history disappeared because it was renamed from some
other file).
With this patch, you should be able to basically do a "poor mans 'git
annotate'" with a fairly simple loop:
push("HEAD", "$filename")
while (revision,filename = pop()) {
for each i in $(git-rev-list --parents --remove-empty $revision -- "$filename")
pseudo-parents($i) = git-rev-list parents for that line
if (pseudo-parents($i) is non-empty) {
show diff of $i against pseudo-parents
continue
}
/* See if the _real_ parents of $i had a rename */
parent($i) = real-parent($i)
if (find-rename in $parent($i)->$i)
push $parent($i), "old-name"
}
which should be doable in perl or something (doing stacks in shell is just
too painful to be worth it, so I'm not going to do this).
Anybody want to try?
Linus
2006-01-19 06:47:30 +08:00
|
|
|
* Ok, the commit wasn't uninteresting. Try to
|
|
|
|
* simplify the commit history and find the parent
|
|
|
|
* that has no differences in the path set if one exists.
|
2005-10-21 12:25:09 +08:00
|
|
|
*/
|
rev-list: stop when the file disappears
The one thing I've considered doing (I really should) is to add a "stop
when you don't find the file" option to "git-rev-list". This patch does
some of the work towards that: it removes the "parent" thing when the
file disappears, so a "git annotate" could do do something like
git-rev-list --remove-empty --parents HEAD -- "$filename"
and it would get a good graph that stops when the filename disappears
(it's not perfect though: it won't remove all the unintersting commits).
It also simplifies the logic of finding tree differences a bit, at the
cost of making it a tad less efficient.
The old logic was two-phase: it would first simplify _only_ merges tree as
it traversed the tree, and then simplify the linear parts of the remainder
independently. That was pretty optimal from an efficiency standpoint
because it avoids doing any comparisons that we can see are unnecessary,
but it made it much harder to understand than it really needed to be.
The new logic is a lot more straightforward, and compares the trees as it
traverses the graph (ie everything is a single phase). That makes it much
easier to stop graph traversal at any point where a file disappears.
As an example, let's say that you have a git repository that has had a
file called "A" some time in the past. That file gets renamed to B, and
then gets renamed back again to A. The old "git-rev-list" would show two
commits: the commit that renames B to A (because it changes A) _and_ as
its parent the commit that renames A to B (because it changes A).
With the new --remove-empty flag, git-rev-list will show just the commit
that renames B to A as the "root" commit, and stop traversal there
(because that's what you want for "annotate" - you want to stop there, and
for every "root" commit you then separately see if it really is a new
file, or if the paths history disappeared because it was renamed from some
other file).
With this patch, you should be able to basically do a "poor mans 'git
annotate'" with a fairly simple loop:
push("HEAD", "$filename")
while (revision,filename = pop()) {
for each i in $(git-rev-list --parents --remove-empty $revision -- "$filename")
pseudo-parents($i) = git-rev-list parents for that line
if (pseudo-parents($i) is non-empty) {
show diff of $i against pseudo-parents
continue
}
/* See if the _real_ parents of $i had a rename */
parent($i) = real-parent($i)
if (find-rename in $parent($i)->$i)
push $parent($i), "old-name"
}
which should be doable in perl or something (doing stacks in shell is just
too painful to be worth it, so I'm not going to do this).
Anybody want to try?
Linus
2006-01-19 06:47:30 +08:00
|
|
|
if (paths)
|
|
|
|
try_to_simplify_commit(commit);
|
2005-10-21 12:25:09 +08:00
|
|
|
|
rev-list: stop when the file disappears
The one thing I've considered doing (I really should) is to add a "stop
when you don't find the file" option to "git-rev-list". This patch does
some of the work towards that: it removes the "parent" thing when the
file disappears, so a "git annotate" could do do something like
git-rev-list --remove-empty --parents HEAD -- "$filename"
and it would get a good graph that stops when the filename disappears
(it's not perfect though: it won't remove all the unintersting commits).
It also simplifies the logic of finding tree differences a bit, at the
cost of making it a tad less efficient.
The old logic was two-phase: it would first simplify _only_ merges tree as
it traversed the tree, and then simplify the linear parts of the remainder
independently. That was pretty optimal from an efficiency standpoint
because it avoids doing any comparisons that we can see are unnecessary,
but it made it much harder to understand than it really needed to be.
The new logic is a lot more straightforward, and compares the trees as it
traverses the graph (ie everything is a single phase). That makes it much
easier to stop graph traversal at any point where a file disappears.
As an example, let's say that you have a git repository that has had a
file called "A" some time in the past. That file gets renamed to B, and
then gets renamed back again to A. The old "git-rev-list" would show two
commits: the commit that renames B to A (because it changes A) _and_ as
its parent the commit that renames A to B (because it changes A).
With the new --remove-empty flag, git-rev-list will show just the commit
that renames B to A as the "root" commit, and stop traversal there
(because that's what you want for "annotate" - you want to stop there, and
for every "root" commit you then separately see if it really is a new
file, or if the paths history disappeared because it was renamed from some
other file).
With this patch, you should be able to basically do a "poor mans 'git
annotate'" with a fairly simple loop:
push("HEAD", "$filename")
while (revision,filename = pop()) {
for each i in $(git-rev-list --parents --remove-empty $revision -- "$filename")
pseudo-parents($i) = git-rev-list parents for that line
if (pseudo-parents($i) is non-empty) {
show diff of $i against pseudo-parents
continue
}
/* See if the _real_ parents of $i had a rename */
parent($i) = real-parent($i)
if (find-rename in $parent($i)->$i)
push $parent($i), "old-name"
}
which should be doable in perl or something (doing stacks in shell is just
too painful to be worth it, so I'm not going to do this).
Anybody want to try?
Linus
2006-01-19 06:47:30 +08:00
|
|
|
parent = commit->parents;
|
2005-10-21 12:25:09 +08:00
|
|
|
while (parent) {
|
|
|
|
struct commit *p = parent->item;
|
|
|
|
|
|
|
|
parent = parent->next;
|
|
|
|
|
|
|
|
parse_commit(p);
|
|
|
|
if (p->object.flags & SEEN)
|
|
|
|
continue;
|
|
|
|
p->object.flags |= SEEN;
|
|
|
|
insert_by_date(p, list);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2005-07-04 01:10:45 +08:00
|
|
|
static struct commit_list *limit_list(struct commit_list *list)
|
2005-06-03 00:25:44 +08:00
|
|
|
{
|
|
|
|
struct commit_list *newlist = NULL;
|
|
|
|
struct commit_list **p = &newlist;
|
2005-06-30 02:30:24 +08:00
|
|
|
while (list) {
|
2005-10-21 12:25:09 +08:00
|
|
|
struct commit_list *entry = list;
|
|
|
|
struct commit *commit = list->item;
|
2005-06-03 00:25:44 +08:00
|
|
|
struct object *obj = &commit->object;
|
|
|
|
|
2005-10-21 12:25:09 +08:00
|
|
|
list = list->next;
|
|
|
|
free(entry);
|
|
|
|
|
2005-09-21 08:55:46 +08:00
|
|
|
if (max_age != -1 && (commit->date < max_age))
|
|
|
|
obj->flags |= UNINTERESTING;
|
2005-07-04 04:29:54 +08:00
|
|
|
if (unpacked && has_sha1_pack(obj->sha1))
|
|
|
|
obj->flags |= UNINTERESTING;
|
2005-10-21 12:25:09 +08:00
|
|
|
add_parents_to_list(commit, &list);
|
git-rev-list: allow arbitrary head selections, use git-rev-tree syntax
This makes git-rev-list use the same command line syntax to mark the
commits as git-rev-tree does, and instead of just allowing a start and
end commit, it allows an arbitrary list of "interesting" and "uninteresting"
commits.
For example, imagine that you had three branches (a, b and c) that you
are interested in, but you don't want to see stuff that already exists
in another persons three releases (x, y and z). You can do
git-rev-list a b c ^x ^y ^z
(order doesn't matter, btw - feel free to put the uninteresting ones
first or otherwise swithc them around), and it will show all the
commits that are reachable from a/b/c but not reachable from x/y/z.
The old syntax "git-rev-list start end" would not be written as
"git-rev-list start ^end", or "git-rev-list ^end start".
There's no limit to the number of heads you can specify (unlike
git-rev-tree, which can handle a maximum of 16 heads).
2005-06-05 05:38:28 +08:00
|
|
|
if (obj->flags & UNINTERESTING) {
|
2005-06-03 00:25:44 +08:00
|
|
|
mark_parents_uninteresting(commit);
|
|
|
|
if (everybody_uninteresting(list))
|
|
|
|
break;
|
|
|
|
continue;
|
|
|
|
}
|
2005-09-21 08:55:46 +08:00
|
|
|
if (min_age != -1 && (commit->date > min_age))
|
|
|
|
continue;
|
2005-06-03 00:25:44 +08:00
|
|
|
p = &commit_list_insert(commit, p)->next;
|
2005-06-30 02:30:24 +08:00
|
|
|
}
|
2005-09-16 06:14:29 +08:00
|
|
|
if (tree_objects)
|
|
|
|
mark_edges_uninteresting(newlist);
|
2005-06-18 13:54:50 +08:00
|
|
|
if (bisect_list)
|
|
|
|
newlist = find_bisection(newlist);
|
2005-06-03 00:25:44 +08:00
|
|
|
return newlist;
|
|
|
|
}
|
|
|
|
|
2005-06-30 02:30:24 +08:00
|
|
|
static void add_pending_object(struct object *obj, const char *name)
|
|
|
|
{
|
|
|
|
add_object(obj, &pending_objects, name);
|
|
|
|
}
|
|
|
|
|
git-rev-list: do not forget non-commit refs
What happens is that the new logic decides that if it can't look up a
commit reference (ie "get_commit_reference()" returns NULL), the thing
must be a pathname.
Fair enough.
But wrong.
The thing is, it may be a perfectly fine ref that _isn't_ a commit. In
git, you have a tag that points to your PGP key, and in the kernel, I have
a tag that points to a tree (and a direct ref that points to that tree
too, for that matter).
So the rule is (as for all the other programs that mix revs and pathnames)
not that we only accept commit references, but _any_ valid object ref.
If the object then isn't a commit ref, git-rev-list will either ignore it,
or add it to the list of non-commit objects (if using "--objects").
The solution is to move the "get_sha1()" out of get_commit_reference(),
and into the callers. In fact, we already _have_ the SHA1 in the case of
the handle_all() loop, since for_each_ref() will have done it for us, so
this is the correct thing to do anyway.
This patch (on top of the original one) does exactly that.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-10-26 23:18:13 +08:00
|
|
|
static struct commit *get_commit_reference(const char *name, const unsigned char *sha1, unsigned int flags)
|
2005-06-30 01:40:14 +08:00
|
|
|
{
|
2005-06-30 02:30:24 +08:00
|
|
|
struct object *object;
|
2005-06-30 01:40:14 +08:00
|
|
|
|
2005-06-30 02:30:24 +08:00
|
|
|
object = parse_object(sha1);
|
|
|
|
if (!object)
|
|
|
|
die("bad object %s", name);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Tag object? Look what it points to..
|
|
|
|
*/
|
2005-07-11 14:55:56 +08:00
|
|
|
while (object->type == tag_type) {
|
2005-06-30 02:30:24 +08:00
|
|
|
struct tag *tag = (struct tag *) object;
|
|
|
|
object->flags |= flags;
|
|
|
|
if (tag_objects && !(object->flags & UNINTERESTING))
|
|
|
|
add_pending_object(object, tag->tag);
|
2005-07-11 14:55:56 +08:00
|
|
|
object = parse_object(tag->tagged->sha1);
|
2005-08-20 02:28:35 +08:00
|
|
|
if (!object)
|
|
|
|
die("bad object %s", sha1_to_hex(tag->tagged->sha1));
|
2005-06-30 02:30:24 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Commit object? Just return it, we'll do all the complex
|
|
|
|
* reachability crud.
|
|
|
|
*/
|
|
|
|
if (object->type == commit_type) {
|
|
|
|
struct commit *commit = (struct commit *)object;
|
|
|
|
object->flags |= flags;
|
|
|
|
if (parse_commit(commit) < 0)
|
|
|
|
die("unable to parse commit %s", name);
|
2005-07-11 06:09:46 +08:00
|
|
|
if (flags & UNINTERESTING)
|
|
|
|
mark_parents_uninteresting(commit);
|
2005-06-30 02:30:24 +08:00
|
|
|
return commit;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Tree object? Either mark it uniniteresting, or add it
|
|
|
|
* to the list of objects to look at later..
|
|
|
|
*/
|
|
|
|
if (object->type == tree_type) {
|
|
|
|
struct tree *tree = (struct tree *)object;
|
|
|
|
if (!tree_objects)
|
2005-07-04 04:07:52 +08:00
|
|
|
return NULL;
|
2005-06-30 02:30:24 +08:00
|
|
|
if (flags & UNINTERESTING) {
|
|
|
|
mark_tree_uninteresting(tree);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
add_pending_object(object, "");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Blob object? You know the drill by now..
|
|
|
|
*/
|
|
|
|
if (object->type == blob_type) {
|
|
|
|
struct blob *blob = (struct blob *)object;
|
|
|
|
if (!blob_objects)
|
2005-07-04 04:07:52 +08:00
|
|
|
return NULL;
|
2005-06-30 02:30:24 +08:00
|
|
|
if (flags & UNINTERESTING) {
|
|
|
|
mark_blob_uninteresting(blob);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
add_pending_object(object, "");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
die("%s is unknown object", name);
|
2005-06-30 01:40:14 +08:00
|
|
|
}
|
|
|
|
|
2005-08-04 17:31:15 +08:00
|
|
|
static void handle_one_commit(struct commit *com, struct commit_list **lst)
|
|
|
|
{
|
|
|
|
if (!com || com->object.flags & SEEN)
|
|
|
|
return;
|
|
|
|
com->object.flags |= SEEN;
|
|
|
|
commit_list_insert(com, lst);
|
|
|
|
}
|
|
|
|
|
2005-10-06 05:49:54 +08:00
|
|
|
/* for_each_ref() callback does not allow user data -- Yuck. */
|
|
|
|
static struct commit_list **global_lst;
|
|
|
|
|
|
|
|
static int include_one_commit(const char *path, const unsigned char *sha1)
|
|
|
|
{
|
git-rev-list: do not forget non-commit refs
What happens is that the new logic decides that if it can't look up a
commit reference (ie "get_commit_reference()" returns NULL), the thing
must be a pathname.
Fair enough.
But wrong.
The thing is, it may be a perfectly fine ref that _isn't_ a commit. In
git, you have a tag that points to your PGP key, and in the kernel, I have
a tag that points to a tree (and a direct ref that points to that tree
too, for that matter).
So the rule is (as for all the other programs that mix revs and pathnames)
not that we only accept commit references, but _any_ valid object ref.
If the object then isn't a commit ref, git-rev-list will either ignore it,
or add it to the list of non-commit objects (if using "--objects").
The solution is to move the "get_sha1()" out of get_commit_reference(),
and into the callers. In fact, we already _have_ the SHA1 in the case of
the handle_all() loop, since for_each_ref() will have done it for us, so
this is the correct thing to do anyway.
This patch (on top of the original one) does exactly that.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-10-26 23:18:13 +08:00
|
|
|
struct commit *com = get_commit_reference(path, sha1, 0);
|
2005-10-06 05:49:54 +08:00
|
|
|
handle_one_commit(com, global_lst);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void handle_all(struct commit_list **lst)
|
|
|
|
{
|
|
|
|
global_lst = lst;
|
|
|
|
for_each_ref(include_one_commit);
|
|
|
|
global_lst = NULL;
|
|
|
|
}
|
2005-08-04 17:31:15 +08:00
|
|
|
|
2005-10-21 12:25:09 +08:00
|
|
|
int main(int argc, const char **argv)
|
2005-04-24 10:04:40 +08:00
|
|
|
{
|
2005-10-21 12:25:09 +08:00
|
|
|
const char *prefix = setup_git_directory();
|
2005-04-24 10:04:40 +08:00
|
|
|
struct commit_list *list = NULL;
|
git-rev-list: allow arbitrary head selections, use git-rev-tree syntax
This makes git-rev-list use the same command line syntax to mark the
commits as git-rev-tree does, and instead of just allowing a start and
end commit, it allows an arbitrary list of "interesting" and "uninteresting"
commits.
For example, imagine that you had three branches (a, b and c) that you
are interested in, but you don't want to see stuff that already exists
in another persons three releases (x, y and z). You can do
git-rev-list a b c ^x ^y ^z
(order doesn't matter, btw - feel free to put the uninteresting ones
first or otherwise swithc them around), and it will show all the
commits that are reachable from a/b/c but not reachable from x/y/z.
The old syntax "git-rev-list start end" would not be written as
"git-rev-list start ^end", or "git-rev-list ^end start".
There's no limit to the number of heads you can specify (unlike
git-rev-tree, which can handle a maximum of 16 heads).
2005-06-05 05:38:28 +08:00
|
|
|
int i, limited = 0;
|
2005-04-24 10:04:40 +08:00
|
|
|
|
2005-05-06 16:00:11 +08:00
|
|
|
for (i = 1 ; i < argc; i++) {
|
git-rev-list: allow arbitrary head selections, use git-rev-tree syntax
This makes git-rev-list use the same command line syntax to mark the
commits as git-rev-tree does, and instead of just allowing a start and
end commit, it allows an arbitrary list of "interesting" and "uninteresting"
commits.
For example, imagine that you had three branches (a, b and c) that you
are interested in, but you don't want to see stuff that already exists
in another persons three releases (x, y and z). You can do
git-rev-list a b c ^x ^y ^z
(order doesn't matter, btw - feel free to put the uninteresting ones
first or otherwise swithc them around), and it will show all the
commits that are reachable from a/b/c but not reachable from x/y/z.
The old syntax "git-rev-list start end" would not be written as
"git-rev-list start ^end", or "git-rev-list ^end start".
There's no limit to the number of heads you can specify (unlike
git-rev-tree, which can handle a maximum of 16 heads).
2005-06-05 05:38:28 +08:00
|
|
|
int flags;
|
2005-10-21 12:25:09 +08:00
|
|
|
const char *arg = argv[i];
|
2005-08-04 17:31:15 +08:00
|
|
|
char *dotdot;
|
git-rev-list: allow arbitrary head selections, use git-rev-tree syntax
This makes git-rev-list use the same command line syntax to mark the
commits as git-rev-tree does, and instead of just allowing a start and
end commit, it allows an arbitrary list of "interesting" and "uninteresting"
commits.
For example, imagine that you had three branches (a, b and c) that you
are interested in, but you don't want to see stuff that already exists
in another persons three releases (x, y and z). You can do
git-rev-list a b c ^x ^y ^z
(order doesn't matter, btw - feel free to put the uninteresting ones
first or otherwise swithc them around), and it will show all the
commits that are reachable from a/b/c but not reachable from x/y/z.
The old syntax "git-rev-list start end" would not be written as
"git-rev-list start ^end", or "git-rev-list ^end start".
There's no limit to the number of heads you can specify (unlike
git-rev-tree, which can handle a maximum of 16 heads).
2005-06-05 05:38:28 +08:00
|
|
|
struct commit *commit;
|
git-rev-list: do not forget non-commit refs
What happens is that the new logic decides that if it can't look up a
commit reference (ie "get_commit_reference()" returns NULL), the thing
must be a pathname.
Fair enough.
But wrong.
The thing is, it may be a perfectly fine ref that _isn't_ a commit. In
git, you have a tag that points to your PGP key, and in the kernel, I have
a tag that points to a tree (and a direct ref that points to that tree
too, for that matter).
So the rule is (as for all the other programs that mix revs and pathnames)
not that we only accept commit references, but _any_ valid object ref.
If the object then isn't a commit ref, git-rev-list will either ignore it,
or add it to the list of non-commit objects (if using "--objects").
The solution is to move the "get_sha1()" out of get_commit_reference(),
and into the callers. In fact, we already _have_ the SHA1 in the case of
the handle_all() loop, since for_each_ref() will have done it for us, so
this is the correct thing to do anyway.
This patch (on top of the original one) does exactly that.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-10-26 23:18:13 +08:00
|
|
|
unsigned char sha1[20];
|
2005-05-06 16:00:11 +08:00
|
|
|
|
2006-01-30 08:28:02 +08:00
|
|
|
/* accept -<digit>, like traditilnal "head" */
|
|
|
|
if ((*arg == '-') && isdigit(arg[1])) {
|
|
|
|
max_count = atoi(arg + 1);
|
|
|
|
continue;
|
|
|
|
}
|
2006-01-30 08:26:40 +08:00
|
|
|
if (!strcmp(arg, "-n")) {
|
|
|
|
if (++i >= argc)
|
|
|
|
die("-n requires an argument");
|
|
|
|
max_count = atoi(argv[i]);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!strncmp(arg,"-n",2)) {
|
|
|
|
max_count = atoi(arg + 2);
|
|
|
|
continue;
|
|
|
|
}
|
2005-05-06 16:00:11 +08:00
|
|
|
if (!strncmp(arg, "--max-count=", 12)) {
|
|
|
|
max_count = atoi(arg + 12);
|
2005-05-26 09:29:09 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!strncmp(arg, "--max-age=", 10)) {
|
2005-05-06 16:00:11 +08:00
|
|
|
max_age = atoi(arg + 10);
|
2005-09-21 08:55:46 +08:00
|
|
|
limited = 1;
|
2005-05-26 09:29:09 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!strncmp(arg, "--min-age=", 10)) {
|
2005-05-06 16:00:11 +08:00
|
|
|
min_age = atoi(arg + 10);
|
2005-09-21 08:55:46 +08:00
|
|
|
limited = 1;
|
2005-05-26 09:29:09 +08:00
|
|
|
continue;
|
2005-05-06 16:00:11 +08:00
|
|
|
}
|
2005-05-26 09:29:09 +08:00
|
|
|
if (!strcmp(arg, "--header")) {
|
|
|
|
verbose_header = 1;
|
|
|
|
continue;
|
|
|
|
}
|
2005-06-06 00:02:03 +08:00
|
|
|
if (!strncmp(arg, "--pretty", 8)) {
|
|
|
|
commit_format = get_commit_format(arg+8);
|
2005-06-01 23:42:22 +08:00
|
|
|
verbose_header = 1;
|
|
|
|
hdr_termination = '\n';
|
2005-08-09 13:15:40 +08:00
|
|
|
if (commit_format == CMIT_FMT_ONELINE)
|
2005-08-25 05:58:42 +08:00
|
|
|
commit_prefix = "";
|
2005-08-09 13:15:40 +08:00
|
|
|
else
|
2005-08-25 05:58:42 +08:00
|
|
|
commit_prefix = "commit ";
|
2005-06-01 23:42:22 +08:00
|
|
|
continue;
|
|
|
|
}
|
2005-08-08 17:37:21 +08:00
|
|
|
if (!strncmp(arg, "--no-merges", 11)) {
|
|
|
|
no_merges = 1;
|
|
|
|
continue;
|
|
|
|
}
|
2005-05-31 10:30:07 +08:00
|
|
|
if (!strcmp(arg, "--parents")) {
|
|
|
|
show_parents = 1;
|
|
|
|
continue;
|
|
|
|
}
|
2005-06-18 13:54:50 +08:00
|
|
|
if (!strcmp(arg, "--bisect")) {
|
|
|
|
bisect_list = 1;
|
|
|
|
continue;
|
|
|
|
}
|
2005-10-06 05:49:54 +08:00
|
|
|
if (!strcmp(arg, "--all")) {
|
|
|
|
handle_all(&list);
|
|
|
|
continue;
|
|
|
|
}
|
2005-06-25 13:56:58 +08:00
|
|
|
if (!strcmp(arg, "--objects")) {
|
2005-06-30 01:40:14 +08:00
|
|
|
tag_objects = 1;
|
2005-06-25 13:56:58 +08:00
|
|
|
tree_objects = 1;
|
|
|
|
blob_objects = 1;
|
|
|
|
continue;
|
|
|
|
}
|
2005-07-04 04:29:54 +08:00
|
|
|
if (!strcmp(arg, "--unpacked")) {
|
|
|
|
unpacked = 1;
|
|
|
|
limited = 1;
|
|
|
|
continue;
|
|
|
|
}
|
2005-07-06 03:12:50 +08:00
|
|
|
if (!strcmp(arg, "--merge-order")) {
|
2005-06-06 23:39:40 +08:00
|
|
|
merge_order = 1;
|
|
|
|
continue;
|
|
|
|
}
|
2005-07-06 03:12:50 +08:00
|
|
|
if (!strcmp(arg, "--show-breaks")) {
|
2005-06-06 23:39:40 +08:00
|
|
|
show_breaks = 1;
|
|
|
|
continue;
|
|
|
|
}
|
2005-07-07 01:25:04 +08:00
|
|
|
if (!strcmp(arg, "--topo-order")) {
|
|
|
|
topo_order = 1;
|
2005-07-07 01:51:43 +08:00
|
|
|
limited = 1;
|
2005-07-07 01:25:04 +08:00
|
|
|
continue;
|
|
|
|
}
|
2005-10-22 07:40:54 +08:00
|
|
|
if (!strcmp(arg, "--dense")) {
|
|
|
|
dense = 1;
|
|
|
|
continue;
|
|
|
|
}
|
2005-10-26 06:24:55 +08:00
|
|
|
if (!strcmp(arg, "--sparse")) {
|
|
|
|
dense = 0;
|
|
|
|
continue;
|
|
|
|
}
|
rev-list: stop when the file disappears
The one thing I've considered doing (I really should) is to add a "stop
when you don't find the file" option to "git-rev-list". This patch does
some of the work towards that: it removes the "parent" thing when the
file disappears, so a "git annotate" could do do something like
git-rev-list --remove-empty --parents HEAD -- "$filename"
and it would get a good graph that stops when the filename disappears
(it's not perfect though: it won't remove all the unintersting commits).
It also simplifies the logic of finding tree differences a bit, at the
cost of making it a tad less efficient.
The old logic was two-phase: it would first simplify _only_ merges tree as
it traversed the tree, and then simplify the linear parts of the remainder
independently. That was pretty optimal from an efficiency standpoint
because it avoids doing any comparisons that we can see are unnecessary,
but it made it much harder to understand than it really needed to be.
The new logic is a lot more straightforward, and compares the trees as it
traverses the graph (ie everything is a single phase). That makes it much
easier to stop graph traversal at any point where a file disappears.
As an example, let's say that you have a git repository that has had a
file called "A" some time in the past. That file gets renamed to B, and
then gets renamed back again to A. The old "git-rev-list" would show two
commits: the commit that renames B to A (because it changes A) _and_ as
its parent the commit that renames A to B (because it changes A).
With the new --remove-empty flag, git-rev-list will show just the commit
that renames B to A as the "root" commit, and stop traversal there
(because that's what you want for "annotate" - you want to stop there, and
for every "root" commit you then separately see if it really is a new
file, or if the paths history disappeared because it was renamed from some
other file).
With this patch, you should be able to basically do a "poor mans 'git
annotate'" with a fairly simple loop:
push("HEAD", "$filename")
while (revision,filename = pop()) {
for each i in $(git-rev-list --parents --remove-empty $revision -- "$filename")
pseudo-parents($i) = git-rev-list parents for that line
if (pseudo-parents($i) is non-empty) {
show diff of $i against pseudo-parents
continue
}
/* See if the _real_ parents of $i had a rename */
parent($i) = real-parent($i)
if (find-rename in $parent($i)->$i)
push $parent($i), "old-name"
}
which should be doable in perl or something (doing stacks in shell is just
too painful to be worth it, so I'm not going to do this).
Anybody want to try?
Linus
2006-01-19 06:47:30 +08:00
|
|
|
if (!strcmp(arg, "--remove-empty")) {
|
|
|
|
remove_empty_trees = 1;
|
|
|
|
continue;
|
|
|
|
}
|
2005-10-21 12:25:09 +08:00
|
|
|
if (!strcmp(arg, "--")) {
|
2005-10-26 06:24:55 +08:00
|
|
|
i++;
|
2005-10-21 12:25:09 +08:00
|
|
|
break;
|
|
|
|
}
|
2005-05-26 09:29:09 +08:00
|
|
|
|
2005-08-04 17:31:15 +08:00
|
|
|
if (show_breaks && !merge_order)
|
|
|
|
usage(rev_list_usage);
|
|
|
|
|
git-rev-list: allow arbitrary head selections, use git-rev-tree syntax
This makes git-rev-list use the same command line syntax to mark the
commits as git-rev-tree does, and instead of just allowing a start and
end commit, it allows an arbitrary list of "interesting" and "uninteresting"
commits.
For example, imagine that you had three branches (a, b and c) that you
are interested in, but you don't want to see stuff that already exists
in another persons three releases (x, y and z). You can do
git-rev-list a b c ^x ^y ^z
(order doesn't matter, btw - feel free to put the uninteresting ones
first or otherwise swithc them around), and it will show all the
commits that are reachable from a/b/c but not reachable from x/y/z.
The old syntax "git-rev-list start end" would not be written as
"git-rev-list start ^end", or "git-rev-list ^end start".
There's no limit to the number of heads you can specify (unlike
git-rev-tree, which can handle a maximum of 16 heads).
2005-06-05 05:38:28 +08:00
|
|
|
flags = 0;
|
2005-08-04 17:31:15 +08:00
|
|
|
dotdot = strstr(arg, "..");
|
|
|
|
if (dotdot) {
|
git-rev-list: do not forget non-commit refs
What happens is that the new logic decides that if it can't look up a
commit reference (ie "get_commit_reference()" returns NULL), the thing
must be a pathname.
Fair enough.
But wrong.
The thing is, it may be a perfectly fine ref that _isn't_ a commit. In
git, you have a tag that points to your PGP key, and in the kernel, I have
a tag that points to a tree (and a direct ref that points to that tree
too, for that matter).
So the rule is (as for all the other programs that mix revs and pathnames)
not that we only accept commit references, but _any_ valid object ref.
If the object then isn't a commit ref, git-rev-list will either ignore it,
or add it to the list of non-commit objects (if using "--objects").
The solution is to move the "get_sha1()" out of get_commit_reference(),
and into the callers. In fact, we already _have_ the SHA1 in the case of
the handle_all() loop, since for_each_ref() will have done it for us, so
this is the correct thing to do anyway.
This patch (on top of the original one) does exactly that.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-10-26 23:18:13 +08:00
|
|
|
unsigned char from_sha1[20];
|
2005-08-04 17:31:15 +08:00
|
|
|
char *next = dotdot + 2;
|
|
|
|
*dotdot = 0;
|
2005-09-17 08:53:19 +08:00
|
|
|
if (!*next)
|
|
|
|
next = "HEAD";
|
git-rev-list: do not forget non-commit refs
What happens is that the new logic decides that if it can't look up a
commit reference (ie "get_commit_reference()" returns NULL), the thing
must be a pathname.
Fair enough.
But wrong.
The thing is, it may be a perfectly fine ref that _isn't_ a commit. In
git, you have a tag that points to your PGP key, and in the kernel, I have
a tag that points to a tree (and a direct ref that points to that tree
too, for that matter).
So the rule is (as for all the other programs that mix revs and pathnames)
not that we only accept commit references, but _any_ valid object ref.
If the object then isn't a commit ref, git-rev-list will either ignore it,
or add it to the list of non-commit objects (if using "--objects").
The solution is to move the "get_sha1()" out of get_commit_reference(),
and into the callers. In fact, we already _have_ the SHA1 in the case of
the handle_all() loop, since for_each_ref() will have done it for us, so
this is the correct thing to do anyway.
This patch (on top of the original one) does exactly that.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-10-26 23:18:13 +08:00
|
|
|
if (!get_sha1(arg, from_sha1) && !get_sha1(next, sha1)) {
|
|
|
|
struct commit *exclude;
|
|
|
|
struct commit *include;
|
|
|
|
|
|
|
|
exclude = get_commit_reference(arg, from_sha1, UNINTERESTING);
|
|
|
|
include = get_commit_reference(next, sha1, 0);
|
|
|
|
if (!exclude || !include)
|
|
|
|
die("Invalid revision range %s..%s", arg, next);
|
2005-08-04 17:31:15 +08:00
|
|
|
limited = 1;
|
|
|
|
handle_one_commit(exclude, &list);
|
|
|
|
handle_one_commit(include, &list);
|
|
|
|
continue;
|
|
|
|
}
|
2005-09-17 08:53:19 +08:00
|
|
|
*dotdot = '.';
|
2005-08-04 17:31:15 +08:00
|
|
|
}
|
git-rev-list: allow arbitrary head selections, use git-rev-tree syntax
This makes git-rev-list use the same command line syntax to mark the
commits as git-rev-tree does, and instead of just allowing a start and
end commit, it allows an arbitrary list of "interesting" and "uninteresting"
commits.
For example, imagine that you had three branches (a, b and c) that you
are interested in, but you don't want to see stuff that already exists
in another persons three releases (x, y and z). You can do
git-rev-list a b c ^x ^y ^z
(order doesn't matter, btw - feel free to put the uninteresting ones
first or otherwise swithc them around), and it will show all the
commits that are reachable from a/b/c but not reachable from x/y/z.
The old syntax "git-rev-list start end" would not be written as
"git-rev-list start ^end", or "git-rev-list ^end start".
There's no limit to the number of heads you can specify (unlike
git-rev-tree, which can handle a maximum of 16 heads).
2005-06-05 05:38:28 +08:00
|
|
|
if (*arg == '^') {
|
|
|
|
flags = UNINTERESTING;
|
|
|
|
arg++;
|
|
|
|
limited = 1;
|
|
|
|
}
|
2006-01-26 06:00:37 +08:00
|
|
|
if (get_sha1(arg, sha1) < 0) {
|
|
|
|
struct stat st;
|
|
|
|
if (lstat(arg, &st) < 0)
|
|
|
|
die("'%s': %s", arg, strerror(errno));
|
2005-10-26 06:24:55 +08:00
|
|
|
break;
|
2006-01-26 06:00:37 +08:00
|
|
|
}
|
git-rev-list: do not forget non-commit refs
What happens is that the new logic decides that if it can't look up a
commit reference (ie "get_commit_reference()" returns NULL), the thing
must be a pathname.
Fair enough.
But wrong.
The thing is, it may be a perfectly fine ref that _isn't_ a commit. In
git, you have a tag that points to your PGP key, and in the kernel, I have
a tag that points to a tree (and a direct ref that points to that tree
too, for that matter).
So the rule is (as for all the other programs that mix revs and pathnames)
not that we only accept commit references, but _any_ valid object ref.
If the object then isn't a commit ref, git-rev-list will either ignore it,
or add it to the list of non-commit objects (if using "--objects").
The solution is to move the "get_sha1()" out of get_commit_reference(),
and into the callers. In fact, we already _have_ the SHA1 in the case of
the handle_all() loop, since for_each_ref() will have done it for us, so
this is the correct thing to do anyway.
This patch (on top of the original one) does exactly that.
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-10-26 23:18:13 +08:00
|
|
|
commit = get_commit_reference(arg, sha1, flags);
|
2005-08-04 17:31:15 +08:00
|
|
|
handle_one_commit(commit, &list);
|
2005-05-06 16:00:11 +08:00
|
|
|
}
|
|
|
|
|
2005-12-20 08:16:49 +08:00
|
|
|
if (!list &&
|
|
|
|
(!(tag_objects||tree_objects||blob_objects) && !pending_objects))
|
2005-10-26 06:24:55 +08:00
|
|
|
usage(rev_list_usage);
|
|
|
|
|
|
|
|
paths = get_pathspec(prefix, argv + i);
|
|
|
|
if (paths) {
|
|
|
|
limited = 1;
|
|
|
|
diff_tree_setup_paths(paths);
|
|
|
|
}
|
|
|
|
|
[PATCH] Avoid wasting memory in git-rev-list
As pointed out on the list, git-rev-list can use a lot of memory.
One low-hanging fruit is to free the commit buffer for commits that we
parse. By default, parse_commit() will save away the buffer, since a lot
of cases do want it, and re-reading it continually would be unnecessary.
However, in many cases the buffer isn't actually necessary and saving it
just wastes memory.
We could just free the buffer ourselves, but especially in git-rev-list,
we actually end up using the helper functions that automatically add
parent commits to the commit lists, so we don't actually control the
commit parsing directly.
Instead, just make this behaviour of "parse_commit()" a global flag.
Maybe this is a bit tasteless, but it's very simple, and it makes a
noticable difference in memory usage.
Before the change:
[torvalds@g5 linux]$ /usr/bin/time git-rev-list v2.6.12..HEAD > /dev/null
0.26user 0.02system 0:00.28elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+3714minor)pagefaults 0swaps
after the change:
[torvalds@g5 linux]$ /usr/bin/time git-rev-list v2.6.12..HEAD > /dev/null
0.26user 0.00system 0:00.27elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+2433minor)pagefaults 0swaps
note how the minor faults have decreased from 3714 pages to 2433 pages.
That's all due to the fewer anonymous pages allocated to hold the comment
buffers and their metadata.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-09-16 05:43:17 +08:00
|
|
|
save_commit_buffer = verbose_header;
|
2005-09-17 05:55:33 +08:00
|
|
|
track_object_refs = 0;
|
[PATCH] Avoid wasting memory in git-rev-list
As pointed out on the list, git-rev-list can use a lot of memory.
One low-hanging fruit is to free the commit buffer for commits that we
parse. By default, parse_commit() will save away the buffer, since a lot
of cases do want it, and re-reading it continually would be unnecessary.
However, in many cases the buffer isn't actually necessary and saving it
just wastes memory.
We could just free the buffer ourselves, but especially in git-rev-list,
we actually end up using the helper functions that automatically add
parent commits to the commit lists, so we don't actually control the
commit parsing directly.
Instead, just make this behaviour of "parse_commit()" a global flag.
Maybe this is a bit tasteless, but it's very simple, and it makes a
noticable difference in memory usage.
Before the change:
[torvalds@g5 linux]$ /usr/bin/time git-rev-list v2.6.12..HEAD > /dev/null
0.26user 0.02system 0:00.28elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+3714minor)pagefaults 0swaps
after the change:
[torvalds@g5 linux]$ /usr/bin/time git-rev-list v2.6.12..HEAD > /dev/null
0.26user 0.00system 0:00.27elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+2433minor)pagefaults 0swaps
note how the minor faults have decreased from 3714 pages to 2433 pages.
That's all due to the fewer anonymous pages allocated to hold the comment
buffers and their metadata.
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-09-16 05:43:17 +08:00
|
|
|
|
2005-06-06 23:39:40 +08:00
|
|
|
if (!merge_order) {
|
2005-07-07 08:59:13 +08:00
|
|
|
sort_by_date(&list);
|
Optimize common case of git-rev-list
I took a look at webgit, and it looks like at least for the "projects"
page, the most common operation ends up being basically
git-rev-list --header --parents --max-count=1 HEAD
Now, the thing is, the way "git-rev-list" works, it always keeps on
popping the parents and parsing them in order to build the list of
parents, and it turns out that even though we just want a single commit,
git-rev-list will invariably look up _three_ generations of commits.
It will parse:
- the commit we want (it obviously needs this)
- it's parent(s) as part of the "pop_most_recent_commit()" logic
- it will then pop one of the parents before it notices that it doesn't
need any more
- and as part of popping the parent, it will parse the grandparent (again
due to "pop_most_recent_commit()".
Now, I've strace'd it, and it really is pretty efficient on the whole, but
if things aren't nicely cached, and with long-latency IO, doing those two
extra objects (at a minimum - if the parent is a merge it will be more) is
just wasted time, and potentially a lot of it.
So here's a quick special-case for the trivial case of "just one commit,
and no date-limits or other special rules".
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Signed-off-by: Junio C Hamano <junkio@cox.net>
2005-10-19 09:29:17 +08:00
|
|
|
if (list && !limited && max_count == 1 &&
|
|
|
|
!tag_objects && !tree_objects && !blob_objects) {
|
|
|
|
show_commit(list->item);
|
|
|
|
return 0;
|
|
|
|
}
|
2005-06-09 04:59:43 +08:00
|
|
|
if (limited)
|
2005-06-06 23:39:40 +08:00
|
|
|
list = limit_list(list);
|
2005-07-07 01:25:04 +08:00
|
|
|
if (topo_order)
|
|
|
|
sort_in_topological_order(&list);
|
2005-06-06 23:39:40 +08:00
|
|
|
show_commit_list(list);
|
|
|
|
} else {
|
2005-07-29 23:50:51 +08:00
|
|
|
#ifndef NO_OPENSSL
|
2005-06-06 23:39:40 +08:00
|
|
|
if (sort_list_in_merge_order(list, &process_commit)) {
|
2005-07-29 23:50:51 +08:00
|
|
|
die("merge order sort failed\n");
|
2005-06-06 23:39:40 +08:00
|
|
|
}
|
2005-07-29 23:50:51 +08:00
|
|
|
#else
|
|
|
|
die("merge order sort unsupported, OpenSSL not linked");
|
|
|
|
#endif
|
2005-06-06 23:39:40 +08:00
|
|
|
}
|
2005-05-31 09:46:32 +08:00
|
|
|
|
2005-04-24 10:04:40 +08:00
|
|
|
return 0;
|
|
|
|
}
|