2008-03-07 04:26:14 +08:00
|
|
|
#define NO_THE_INDEX_COMPATIBILITY_MACROS
|
2006-07-31 02:25:18 +08:00
|
|
|
#include "cache.h"
|
2006-12-05 08:00:46 +08:00
|
|
|
#include "dir.h"
|
2006-07-31 02:25:18 +08:00
|
|
|
#include "tree.h"
|
|
|
|
#include "tree-walk.h"
|
2006-07-31 02:26:15 +08:00
|
|
|
#include "cache-tree.h"
|
2006-07-31 02:25:18 +08:00
|
|
|
#include "unpack-trees.h"
|
2007-04-19 02:27:45 +08:00
|
|
|
#include "progress.h"
|
2007-07-18 02:28:28 +08:00
|
|
|
#include "refs.h"
|
2009-03-14 12:24:08 +08:00
|
|
|
#include "attr.h"
|
2006-07-31 02:25:18 +08:00
|
|
|
|
2008-05-18 03:03:49 +08:00
|
|
|
/*
|
|
|
|
* Error messages expected by scripts out of plumbing commands such as
|
|
|
|
* read-tree. Non-scripted Porcelain is not required to use these messages
|
|
|
|
* and in fact are encouraged to reword them to better suit their particular
|
2010-08-11 16:38:05 +08:00
|
|
|
* situation better. See how "git checkout" and "git merge" replaces
|
2010-09-02 19:57:33 +08:00
|
|
|
* them using setup_unpack_trees_porcelain(), for example.
|
2008-05-18 03:03:49 +08:00
|
|
|
*/
|
2010-08-11 16:38:04 +08:00
|
|
|
const char *unpack_plumbing_errors[NB_UNPACK_TREES_ERROR_TYPES] = {
|
|
|
|
/* ERROR_WOULD_OVERWRITE */
|
2008-05-18 03:03:49 +08:00
|
|
|
"Entry '%s' would be overwritten by merge. Cannot merge.",
|
|
|
|
|
2010-08-11 16:38:04 +08:00
|
|
|
/* ERROR_NOT_UPTODATE_FILE */
|
2008-05-18 03:03:49 +08:00
|
|
|
"Entry '%s' not uptodate. Cannot merge.",
|
|
|
|
|
2010-08-11 16:38:04 +08:00
|
|
|
/* ERROR_NOT_UPTODATE_DIR */
|
2008-05-18 03:03:49 +08:00
|
|
|
"Updating '%s' would lose untracked files in it",
|
|
|
|
|
2010-08-11 16:38:06 +08:00
|
|
|
/* ERROR_WOULD_LOSE_UNTRACKED_OVERWRITTEN */
|
|
|
|
"Untracked working tree file '%s' would be overwritten by merge.",
|
2008-05-18 03:03:49 +08:00
|
|
|
|
2010-08-11 16:38:06 +08:00
|
|
|
/* ERROR_WOULD_LOSE_UNTRACKED_REMOVED */
|
|
|
|
"Untracked working tree file '%s' would be removed by merge.",
|
2008-05-18 03:03:49 +08:00
|
|
|
|
2010-08-11 16:38:04 +08:00
|
|
|
/* ERROR_BIND_OVERLAP */
|
2008-05-18 03:03:49 +08:00
|
|
|
"Entry '%s' overlaps with '%s'. Cannot bind.",
|
2009-08-20 21:47:09 +08:00
|
|
|
|
2010-08-11 16:38:04 +08:00
|
|
|
/* ERROR_SPARSE_NOT_UPTODATE_FILE */
|
2009-08-20 21:47:09 +08:00
|
|
|
"Entry '%s' not uptodate. Cannot update sparse checkout.",
|
|
|
|
|
2010-08-11 16:38:06 +08:00
|
|
|
/* ERROR_WOULD_LOSE_ORPHANED_OVERWRITTEN */
|
|
|
|
"Working tree file '%s' would be overwritten by sparse checkout update.",
|
|
|
|
|
|
|
|
/* ERROR_WOULD_LOSE_ORPHANED_REMOVED */
|
|
|
|
"Working tree file '%s' would be removed by sparse checkout update.",
|
2008-05-18 03:03:49 +08:00
|
|
|
};
|
|
|
|
|
2010-08-11 16:38:04 +08:00
|
|
|
#define ERRORMSG(o,type) \
|
|
|
|
( ((o) && (o)->msgs[(type)]) \
|
|
|
|
? ((o)->msgs[(type)]) \
|
|
|
|
: (unpack_plumbing_errors[(type)]) )
|
2008-05-18 03:03:49 +08:00
|
|
|
|
2010-09-02 19:57:34 +08:00
|
|
|
void setup_unpack_trees_porcelain(struct unpack_trees_options *opts,
|
|
|
|
const char *cmd)
|
2010-09-02 19:57:33 +08:00
|
|
|
{
|
2010-09-02 19:57:34 +08:00
|
|
|
const char **msgs = opts->msgs;
|
2010-09-02 19:57:33 +08:00
|
|
|
const char *msg;
|
|
|
|
char *tmp;
|
|
|
|
const char *cmd2 = strcmp(cmd, "checkout") ? cmd : "switch branches";
|
|
|
|
if (advice_commit_before_merge)
|
|
|
|
msg = "Your local changes to the following files would be overwritten by %s:\n%%s"
|
|
|
|
"Please, commit your changes or stash them before you can %s.";
|
|
|
|
else
|
|
|
|
msg = "Your local changes to the following files would be overwritten by %s:\n%%s";
|
|
|
|
tmp = xmalloc(strlen(msg) + strlen(cmd) + strlen(cmd2) - 2);
|
|
|
|
sprintf(tmp, msg, cmd, cmd2);
|
|
|
|
msgs[ERROR_WOULD_OVERWRITE] = tmp;
|
|
|
|
msgs[ERROR_NOT_UPTODATE_FILE] = tmp;
|
|
|
|
|
|
|
|
msgs[ERROR_NOT_UPTODATE_DIR] =
|
|
|
|
"Updating the following directories would lose untracked files in it:\n%s";
|
|
|
|
|
|
|
|
if (advice_commit_before_merge)
|
|
|
|
msg = "The following untracked working tree files would be %s by %s:\n%%s"
|
|
|
|
"Please move or remove them before you can %s.";
|
|
|
|
else
|
|
|
|
msg = "The following untracked working tree files would be %s by %s:\n%%s";
|
|
|
|
tmp = xmalloc(strlen(msg) + strlen(cmd) + strlen("removed") + strlen(cmd2) - 4);
|
|
|
|
sprintf(tmp, msg, "removed", cmd, cmd2);
|
|
|
|
msgs[ERROR_WOULD_LOSE_UNTRACKED_REMOVED] = tmp;
|
|
|
|
tmp = xmalloc(strlen(msg) + strlen(cmd) + strlen("overwritten") + strlen(cmd2) - 4);
|
|
|
|
sprintf(tmp, msg, "overwritten", cmd, cmd2);
|
|
|
|
msgs[ERROR_WOULD_LOSE_UNTRACKED_OVERWRITTEN] = tmp;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Special case: ERROR_BIND_OVERLAP refers to a pair of paths, we
|
|
|
|
* cannot easily display it as a list.
|
|
|
|
*/
|
|
|
|
msgs[ERROR_BIND_OVERLAP] = "Entry '%s' overlaps with '%s'. Cannot bind.";
|
|
|
|
|
|
|
|
msgs[ERROR_SPARSE_NOT_UPTODATE_FILE] =
|
|
|
|
"Cannot update sparse checkout: the following entries are not up-to-date:\n%s";
|
|
|
|
msgs[ERROR_WOULD_LOSE_ORPHANED_OVERWRITTEN] =
|
|
|
|
"The following Working tree files would be overwritten by sparse checkout update:\n%s";
|
|
|
|
msgs[ERROR_WOULD_LOSE_ORPHANED_REMOVED] =
|
|
|
|
"The following Working tree files would be removed by sparse checkout update:\n%s";
|
2010-09-03 00:08:15 +08:00
|
|
|
|
|
|
|
opts->show_all_errors = 1;
|
2010-09-02 19:57:33 +08:00
|
|
|
}
|
|
|
|
|
2008-03-07 10:12:28 +08:00
|
|
|
static void add_entry(struct unpack_trees_options *o, struct cache_entry *ce,
|
|
|
|
unsigned int set, unsigned int clear)
|
2007-08-11 03:15:54 +08:00
|
|
|
{
|
2008-03-07 10:12:28 +08:00
|
|
|
unsigned int size = ce_size(ce);
|
|
|
|
struct cache_entry *new = xmalloc(size);
|
|
|
|
|
|
|
|
clear |= CE_HASHED | CE_UNHASHED;
|
|
|
|
|
2010-07-31 14:14:27 +08:00
|
|
|
if (set & CE_REMOVE)
|
|
|
|
set |= CE_WT_REMOVE;
|
|
|
|
|
2008-03-07 10:12:28 +08:00
|
|
|
memcpy(new, ce, size);
|
|
|
|
new->next = NULL;
|
|
|
|
new->ce_flags = (new->ce_flags & ~clear) | set;
|
read-tree A B C: do not create a bogus index and do not segfault
"git read-tree A B C..." without the "-m" (merge) option is a way to read
these trees on top of each other to get an overlay of them.
An ancient commit ee6566e (Rewrite read-tree, 2005-09-05) passed the
ADD_CACHE_SKIP_DFCHECK flag when calling add_index_entry() to add the
paths obtained from these trees to the index, but it is an incorrect use
of the flag. The flag is meant to be used by callers who know the
addition of the entry does not introduce a D/F conflict to the index in
order to avoid the overhead of checking.
This bug resulted in a bogus index that records both "x" and "x/z" as a
blob after reading three trees that have paths ("x"), ("x", "y"), and
("x/z", "y") respectively. 34110cd (Make 'unpack_trees()' have a separate
source and destination index, 2008-03-06) refactored the callsites of
add_index_entry() incorrectly and added more codepaths that use this flag
when it shouldn't be used.
Also, 0190457 (Move 'unpack_trees()' over to 'traverse_trees()' interface,
2008-03-05) introduced a bug to call add_index_entry() for the tree that
does not have the path in it, passing NULL as a cache entry. This caused
reading multiple trees, one of which has path "x" but another doesn't, to
segfault.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-03-12 15:02:12 +08:00
|
|
|
add_index_entry(&o->result, new, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE);
|
2007-08-11 03:15:54 +08:00
|
|
|
}
|
|
|
|
|
2010-08-11 16:38:07 +08:00
|
|
|
/*
|
|
|
|
* add error messages on path <path>
|
|
|
|
* corresponding to the type <e> with the message <msg>
|
|
|
|
* indicating if it should be display in porcelain or not
|
|
|
|
*/
|
|
|
|
static int add_rejected_path(struct unpack_trees_options *o,
|
|
|
|
enum unpack_trees_error_types e,
|
|
|
|
const char *path)
|
|
|
|
{
|
|
|
|
struct rejected_paths_list *newentry;
|
|
|
|
if (!o->show_all_errors)
|
|
|
|
return error(ERRORMSG(o, e), path);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Otherwise, insert in a list for future display by
|
|
|
|
* display_error_msgs()
|
|
|
|
*/
|
|
|
|
newentry = xmalloc(sizeof(struct rejected_paths_list));
|
|
|
|
newentry->path = (char *)path;
|
|
|
|
newentry->next = o->unpack_rejects[e];
|
|
|
|
o->unpack_rejects[e] = newentry;
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* free all the structures allocated for the error <e>
|
|
|
|
*/
|
|
|
|
static void free_rejected_paths(struct unpack_trees_options *o,
|
|
|
|
enum unpack_trees_error_types e)
|
|
|
|
{
|
|
|
|
while (o->unpack_rejects[e]) {
|
|
|
|
struct rejected_paths_list *del = o->unpack_rejects[e];
|
|
|
|
o->unpack_rejects[e] = o->unpack_rejects[e]->next;
|
|
|
|
free(del);
|
|
|
|
}
|
|
|
|
free(o->unpack_rejects[e]);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* display all the error messages stored in a nice way
|
|
|
|
*/
|
|
|
|
static void display_error_msgs(struct unpack_trees_options *o)
|
|
|
|
{
|
|
|
|
int e;
|
|
|
|
int something_displayed = 0;
|
|
|
|
for (e = 0; e < NB_UNPACK_TREES_ERROR_TYPES; e++) {
|
|
|
|
if (o->unpack_rejects[e]) {
|
|
|
|
struct rejected_paths_list *rp;
|
|
|
|
struct strbuf path = STRBUF_INIT;
|
|
|
|
something_displayed = 1;
|
|
|
|
for (rp = o->unpack_rejects[e]; rp; rp = rp->next)
|
|
|
|
strbuf_addf(&path, "\t%s\n", rp->path);
|
|
|
|
error(ERRORMSG(o, e), path.buf);
|
|
|
|
strbuf_release(&path);
|
|
|
|
free_rejected_paths(o, e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (something_displayed)
|
|
|
|
printf("Aborting\n");
|
|
|
|
}
|
|
|
|
|
2009-02-10 04:54:07 +08:00
|
|
|
/*
|
|
|
|
* Unlink the last component and schedule the leading directories for
|
|
|
|
* removal, such that empty directories get removed.
|
2006-07-31 02:25:18 +08:00
|
|
|
*/
|
Optimize symlink/directory detection
This is the base for making symlink detection in the middle fo a pathname
saner and (much) more efficient.
Under various loads, we want to verify that the full path leading up to a
filename is a real directory tree, and that when we successfully do an
'lstat()' on a filename, we don't get a false positive due to a symlink in
the middle of the path that git should have seen as a symlink, not as a
normal path component.
The 'has_symlink_leading_path()' function already did this, and cached
a single level of symlink information, but didn't cache the _lack_ of a
symlink, so the normal behaviour was actually the wrong way around, and we
ended up doing an 'lstat()' on each path component to check that it was a
real directory.
This caches the last detected full directory and symlink entries, and
speeds up especially deep directory structures a lot by avoiding to
lstat() all the directories leading up to each entry in the index.
[ This can - and should - probably be extended upon so that we eventually
never do a bare 'lstat()' on any path entries at *all* when checking the
index, but always check the full path carefully. Right now we do not
generally check the whole path for all our normal quick index
revalidation.
We should also make sure that we're careful about all the invalidation,
ie when we remove a link and replace it by a directory we should
invalidate the symlink cache if it matches (and vice versa for the
directory cache).
But regardless, the basic function needs to be sane to do that. The old
'has_symlink_leading_path()' was not capable enough - or indeed the code
readable enough - to really do that sanely. So I'm pushing this as not
just an optimization, but as a base for further work. ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-05-10 00:21:07 +08:00
|
|
|
static void unlink_entry(struct cache_entry *ce)
|
2006-07-31 02:25:18 +08:00
|
|
|
{
|
2009-02-10 04:54:06 +08:00
|
|
|
if (has_symlink_or_noent_leading_path(ce->name, ce_namelen(ce)))
|
2007-05-11 14:44:53 +08:00
|
|
|
return;
|
2010-03-26 23:25:34 +08:00
|
|
|
if (remove_or_warn(ce->ce_mode, ce->name))
|
|
|
|
return;
|
2009-02-10 04:54:07 +08:00
|
|
|
schedule_dir_for_removal(ce->name, ce_namelen(ce));
|
2006-07-31 02:25:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static struct checkout state;
|
2008-03-19 13:01:28 +08:00
|
|
|
static int check_updates(struct unpack_trees_options *o)
|
2006-07-31 02:25:18 +08:00
|
|
|
{
|
2007-04-19 02:27:45 +08:00
|
|
|
unsigned cnt = 0, total = 0;
|
2007-10-31 02:57:32 +08:00
|
|
|
struct progress *progress = NULL;
|
2008-03-07 10:12:28 +08:00
|
|
|
struct index_state *index = &o->result;
|
Discard "deleted" cache entries after using them to update the working tree
Way back in read-tree.c, we used a mode 0 cache entry to indicate that
an entry had been deleted, so that the update code would remove the
working tree file, and we would just skip it when writing out the
index file afterward.
These days, unpack_trees is a library function, and it is still
leaving these entries in the active cache. Furthermore, unpack_trees
doesn't correctly ignore those entries, and who knows what other code
wouldn't expect them to be there, but just isn't yet called after a
call to unpack_trees. To avoid having other code trip over these
entries, have check_updates() remove them after it removes the working
tree files.
While we're at it, simplify the loop in check_updates(), and avoid
passing global variables as parameters to check_updates(): there is
only one call site anyway.
Signed-off-by: Daniel Barkalow <barkalow@iabervon.org>
2008-02-08 00:39:59 +08:00
|
|
|
int i;
|
2008-03-19 13:01:28 +08:00
|
|
|
int errs = 0;
|
2006-07-31 02:25:18 +08:00
|
|
|
|
|
|
|
if (o->update && o->verbose_update) {
|
2008-03-07 10:12:28 +08:00
|
|
|
for (total = cnt = 0; cnt < index->cache_nr; cnt++) {
|
|
|
|
struct cache_entry *ce = index->cache[cnt];
|
2010-07-31 14:14:27 +08:00
|
|
|
if (ce->ce_flags & (CE_UPDATE | CE_WT_REMOVE))
|
2006-07-31 02:25:18 +08:00
|
|
|
total++;
|
|
|
|
}
|
|
|
|
|
2007-10-31 02:57:32 +08:00
|
|
|
progress = start_progress_delay("Checking out files",
|
Be more verbose when checkout takes a long time
So I find it irritating when git thinks for a long time without telling me
what's taking so long. And by "long time" I definitely mean less than two
seconds, which is already way too long for me.
This hits me when doing a large pull and the checkout takes a long time,
or when just switching to another branch that is old and again checkout
takes a while.
Now, git read-tree already had support for the "-v" flag that does nice
updates about what's going on, but it was delayed by two seconds, and if
the thing had already done more than half by then it would be quiet even
after that, so in practice it meant that we migth be quiet for up to four
seconds. Much too long.
So this patch changes the timeout to just one second, which makes it much
more palatable to me.
The other thing this patch does is that "git checkout" now doesn't disable
the "-v" flag when doing its thing, and only disables the output when
given the -q flag. When allowing "checkout -m" to fall back to a 3-way
merge, the users will see the error message from straight "checkout",
so we will tell them that we do fall back to make them look less scary.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-02-24 05:36:08 +08:00
|
|
|
total, 50, 1);
|
2006-07-31 02:25:18 +08:00
|
|
|
cnt = 0;
|
|
|
|
}
|
|
|
|
|
2009-04-18 06:18:01 +08:00
|
|
|
if (o->update)
|
|
|
|
git_attr_set_direction(GIT_ATTR_CHECKOUT, &o->result);
|
2008-03-07 10:12:28 +08:00
|
|
|
for (i = 0; i < index->cache_nr; i++) {
|
|
|
|
struct cache_entry *ce = index->cache[i];
|
2006-07-31 02:25:18 +08:00
|
|
|
|
2009-08-20 21:47:06 +08:00
|
|
|
if (ce->ce_flags & CE_WT_REMOVE) {
|
|
|
|
display_progress(progress, ++cnt);
|
|
|
|
if (o->update)
|
|
|
|
unlink_entry(ce);
|
|
|
|
continue;
|
|
|
|
}
|
2008-03-23 00:48:41 +08:00
|
|
|
}
|
check_updates(): effective removal of cache entries marked CE_REMOVE
Below is oprofile output from GIT command 'git chekcout -q my-v2.6.25'
(move from tag v2.6.27 to tag v2.6.25 of the Linux kernel):
CPU: Core 2, speed 1999.95 MHz (estimated)
Counted CPU_CLK_UNHALTED events (Clock cycles when not halted) with a unit
mask of 0x00 (Unhalted core cycles) count 20000
Counted INST_RETIRED_ANY_P events (number of instructions retired) with a
unit mask of 0x00 (No unit mask) count 20000
CPU_CLK_UNHALT...|INST_RETIRED:2...|
samples| %| samples| %|
------------------------------------
409247 100.000 342878 100.000 git
CPU_CLK_UNHALT...|INST_RETIRED:2...|
samples| %| samples| %|
------------------------------------
260476 63.6476 257843 75.1996 libz.so.1.2.3
100876 24.6492 64378 18.7758 kernel-2.6.28.4_2.vmlinux
30850 7.5382 7874 2.2964 libc-2.9.so
14775 3.6103 8390 2.4469 git
2020 0.4936 4325 1.2614 libcrypto.so.0.9.8
191 0.0467 32 0.0093 libpthread-2.9.so
58 0.0142 36 0.0105 ld-2.9.so
1 2.4e-04 0 0 libldap-2.3.so.0.2.31
Detail list of the top 20 function entries (libz counted in one blob):
CPU_CLK_UNHALTED INST_RETIRED_ANY_P
samples % samples % image name symbol name
260476 63.6862 257843 75.2725 libz.so.1.2.3 /lib/libz.so.1.2.3
16587 4.0555 3636 1.0615 libc-2.9.so memcpy
7710 1.8851 277 0.0809 libc-2.9.so memmove
3679 0.8995 1108 0.3235 kernel-2.6.28.4_2.vmlinux d_validate
3546 0.8670 2607 0.7611 kernel-2.6.28.4_2.vmlinux __getblk
3174 0.7760 1813 0.5293 libc-2.9.so _int_malloc
2396 0.5858 3681 1.0746 kernel-2.6.28.4_2.vmlinux copy_to_user
2270 0.5550 2528 0.7380 kernel-2.6.28.4_2.vmlinux __link_path_walk
2205 0.5391 1797 0.5246 kernel-2.6.28.4_2.vmlinux ext4_mark_iloc_dirty
2103 0.5142 1203 0.3512 kernel-2.6.28.4_2.vmlinux find_first_zero_bit
2077 0.5078 997 0.2911 kernel-2.6.28.4_2.vmlinux do_get_write_access
2070 0.5061 514 0.1501 git cache_name_compare
2043 0.4995 1501 0.4382 kernel-2.6.28.4_2.vmlinux rcu_irq_exit
2022 0.4944 1732 0.5056 kernel-2.6.28.4_2.vmlinux __ext4_get_inode_loc
2020 0.4939 4325 1.2626 libcrypto.so.0.9.8 /usr/lib/libcrypto.so.0.9.8
1965 0.4804 1384 0.4040 git patch_delta
1708 0.4176 984 0.2873 kernel-2.6.28.4_2.vmlinux rcu_sched_grace_period
1682 0.4112 727 0.2122 kernel-2.6.28.4_2.vmlinux sysfs_slab_alias
1659 0.4056 290 0.0847 git find_pack_entry_one
1480 0.3619 1307 0.3816 kernel-2.6.28.4_2.vmlinux ext4_writepage_trans_blocks
Notice the memmove line, where the CPU did 7710 / 277 = 27.8 cycles
per instruction, and compared to the total cycles spent inside the
source code of GIT for this command, all the memmove() calls
translates to (7710 * 100) / 14775 = 52.2% of this.
Retesting with a GIT program compiled for gcov usage, I found out that
the memmove() calls came from remove_index_entry_at() in read-cache.c,
where we have:
memmove(istate->cache + pos,
istate->cache + pos + 1,
(istate->cache_nr - pos) * sizeof(struct cache_entry *));
remove_index_entry_at() is called 4902 times from check_updates() in
unpack-trees.c, and each time called we move each cache_entry pointers
(from the removed one) one step to the left.
Since we have 28828 entries in the cache this time, and if we on
average move half of them each time, we in total move approximately
4902 * 0.5 * 28828 * 4 = 282 629 712 bytes, or twice this amount if
each pointer is 8 bytes (64 bit).
OK, is seems that the function check_updates() is called 28 times, so
the estimated guess above had been more correct if check_updates() had
been called only once, but the point is: we get lots of bytes moved.
To fix this, and use an O(N) algorithm instead, where N is the number
of cache_entries, we delete/remove all entries in one loop through all
entries.
From a retest, the new remove_marked_cache_entries() from the patch
below, ended up with the following output line from oprofile:
46 0.0105 15 0.0041 git remove_marked_cache_entries
If we can trust the numbers from oprofile in this case, we saved
approximately ((7710 - 46) * 20000) / (2 * 1000 * 1000 * 1000) = 0.077
seconds CPU time with this fix for this particular test. And notice
that now the CPU did only 46 / 15 = 3.1 cycles/instruction.
Signed-off-by: Kjetil Barvik <barvik@broadpark.no>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-02-19 06:18:03 +08:00
|
|
|
remove_marked_cache_entries(&o->result);
|
2009-02-10 04:54:07 +08:00
|
|
|
remove_scheduled_dirs();
|
2008-03-23 00:48:41 +08:00
|
|
|
|
|
|
|
for (i = 0; i < index->cache_nr; i++) {
|
|
|
|
struct cache_entry *ce = index->cache[i];
|
|
|
|
|
2008-01-15 08:03:17 +08:00
|
|
|
if (ce->ce_flags & CE_UPDATE) {
|
2008-03-23 00:48:41 +08:00
|
|
|
display_progress(progress, ++cnt);
|
2008-01-15 08:03:17 +08:00
|
|
|
ce->ce_flags &= ~CE_UPDATE;
|
2007-05-11 14:44:53 +08:00
|
|
|
if (o->update) {
|
2008-03-19 13:01:28 +08:00
|
|
|
errs |= checkout_entry(ce, &state, NULL);
|
2007-05-11 14:44:53 +08:00
|
|
|
}
|
2006-07-31 02:25:18 +08:00
|
|
|
}
|
|
|
|
}
|
2007-10-31 02:57:33 +08:00
|
|
|
stop_progress(&progress);
|
2009-04-18 06:18:01 +08:00
|
|
|
if (o->update)
|
|
|
|
git_attr_set_direction(GIT_ATTR_CHECKIN, NULL);
|
2008-03-19 13:01:28 +08:00
|
|
|
return errs != 0;
|
2006-07-31 02:25:18 +08:00
|
|
|
}
|
|
|
|
|
2009-08-20 21:47:09 +08:00
|
|
|
static int verify_uptodate_sparse(struct cache_entry *ce, struct unpack_trees_options *o);
|
2010-08-11 16:38:06 +08:00
|
|
|
static int verify_absent_sparse(struct cache_entry *ce, enum unpack_trees_error_types, struct unpack_trees_options *o);
|
2009-08-20 21:47:09 +08:00
|
|
|
|
|
|
|
static int will_have_skip_worktree(const struct cache_entry *ce, struct unpack_trees_options *o)
|
|
|
|
{
|
|
|
|
const char *basename;
|
|
|
|
|
|
|
|
basename = strrchr(ce->name, '/');
|
|
|
|
basename = basename ? basename+1 : ce->name;
|
|
|
|
return excluded_from_list(ce->name, ce_namelen(ce), basename, NULL, o->el) <= 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int apply_sparse_checkout(struct cache_entry *ce, struct unpack_trees_options *o)
|
|
|
|
{
|
|
|
|
int was_skip_worktree = ce_skip_worktree(ce);
|
|
|
|
|
2010-07-31 14:14:28 +08:00
|
|
|
if (!ce_stage(ce) && will_have_skip_worktree(ce, o))
|
2009-08-20 21:47:09 +08:00
|
|
|
ce->ce_flags |= CE_SKIP_WORKTREE;
|
|
|
|
else
|
|
|
|
ce->ce_flags &= ~CE_SKIP_WORKTREE;
|
|
|
|
|
|
|
|
/*
|
2010-07-31 14:14:26 +08:00
|
|
|
* if (!was_skip_worktree && !ce_skip_worktree()) {
|
|
|
|
* This is perfectly normal. Move on;
|
|
|
|
* }
|
2009-08-20 21:47:09 +08:00
|
|
|
*/
|
2010-07-31 14:14:26 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Merge strategies may set CE_UPDATE|CE_REMOVE outside checkout
|
|
|
|
* area as a result of ce_skip_worktree() shortcuts in
|
2010-07-31 14:14:27 +08:00
|
|
|
* verify_absent() and verify_uptodate().
|
|
|
|
* Make sure they don't modify worktree if they are already
|
|
|
|
* outside checkout area
|
2010-07-31 14:14:26 +08:00
|
|
|
*/
|
2010-07-31 14:14:27 +08:00
|
|
|
if (was_skip_worktree && ce_skip_worktree(ce)) {
|
|
|
|
ce->ce_flags &= ~CE_UPDATE;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* By default, when CE_REMOVE is on, CE_WT_REMOVE is also
|
|
|
|
* on to get that file removed from both index and worktree.
|
|
|
|
* If that file is already outside worktree area, don't
|
|
|
|
* bother remove it.
|
|
|
|
*/
|
|
|
|
if (ce->ce_flags & CE_REMOVE)
|
|
|
|
ce->ce_flags &= ~CE_WT_REMOVE;
|
|
|
|
}
|
2009-08-20 21:47:09 +08:00
|
|
|
|
|
|
|
if (!was_skip_worktree && ce_skip_worktree(ce)) {
|
|
|
|
/*
|
|
|
|
* If CE_UPDATE is set, verify_uptodate() must be called already
|
|
|
|
* also stat info may have lost after merged_entry() so calling
|
|
|
|
* verify_uptodate() again may fail
|
|
|
|
*/
|
|
|
|
if (!(ce->ce_flags & CE_UPDATE) && verify_uptodate_sparse(ce, o))
|
|
|
|
return -1;
|
|
|
|
ce->ce_flags |= CE_WT_REMOVE;
|
|
|
|
}
|
|
|
|
if (was_skip_worktree && !ce_skip_worktree(ce)) {
|
2010-08-11 16:38:06 +08:00
|
|
|
if (verify_absent_sparse(ce, ERROR_WOULD_LOSE_UNTRACKED_OVERWRITTEN, o))
|
2009-08-20 21:47:09 +08:00
|
|
|
return -1;
|
|
|
|
ce->ce_flags |= CE_UPDATE;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2008-03-07 10:12:28 +08:00
|
|
|
static inline int call_unpack_fn(struct cache_entry **src, struct unpack_trees_options *o)
|
2008-03-06 12:15:44 +08:00
|
|
|
{
|
2008-03-07 10:12:28 +08:00
|
|
|
int ret = o->fn(src, o);
|
|
|
|
if (ret > 0)
|
2008-03-06 12:15:44 +08:00
|
|
|
ret = 0;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 06:59:54 +08:00
|
|
|
static void mark_ce_used(struct cache_entry *ce, struct unpack_trees_options *o)
|
|
|
|
{
|
|
|
|
ce->ce_flags |= CE_UNPACKED;
|
|
|
|
|
|
|
|
if (o->cache_bottom < o->src_index->cache_nr &&
|
|
|
|
o->src_index->cache[o->cache_bottom] == ce) {
|
|
|
|
int bottom = o->cache_bottom;
|
|
|
|
while (bottom < o->src_index->cache_nr &&
|
|
|
|
o->src_index->cache[bottom]->ce_flags & CE_UNPACKED)
|
|
|
|
bottom++;
|
|
|
|
o->cache_bottom = bottom;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void mark_all_ce_unused(struct index_state *index)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < index->cache_nr; i++)
|
|
|
|
index->cache[i]->ce_flags &= ~CE_UNPACKED;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int locate_in_src_index(struct cache_entry *ce,
|
|
|
|
struct unpack_trees_options *o)
|
|
|
|
{
|
|
|
|
struct index_state *index = o->src_index;
|
|
|
|
int len = ce_namelen(ce);
|
|
|
|
int pos = index_name_pos(index, ce->name, len);
|
|
|
|
if (pos < 0)
|
|
|
|
pos = -1 - pos;
|
|
|
|
return pos;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We call unpack_index_entry() with an unmerged cache entry
|
|
|
|
* only in diff-index, and it wants a single callback. Skip
|
|
|
|
* the other unmerged entry with the same name.
|
|
|
|
*/
|
|
|
|
static void mark_ce_used_same_name(struct cache_entry *ce,
|
|
|
|
struct unpack_trees_options *o)
|
|
|
|
{
|
|
|
|
struct index_state *index = o->src_index;
|
|
|
|
int len = ce_namelen(ce);
|
|
|
|
int pos;
|
|
|
|
|
|
|
|
for (pos = locate_in_src_index(ce, o); pos < index->cache_nr; pos++) {
|
|
|
|
struct cache_entry *next = index->cache[pos];
|
|
|
|
if (len != ce_namelen(next) ||
|
|
|
|
memcmp(ce->name, next->name, len))
|
|
|
|
break;
|
|
|
|
mark_ce_used(next, o);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct cache_entry *next_cache_entry(struct unpack_trees_options *o)
|
|
|
|
{
|
|
|
|
const struct index_state *index = o->src_index;
|
|
|
|
int pos = o->cache_bottom;
|
|
|
|
|
|
|
|
while (pos < index->cache_nr) {
|
|
|
|
struct cache_entry *ce = index->cache[pos];
|
|
|
|
if (!(ce->ce_flags & CE_UNPACKED))
|
|
|
|
return ce;
|
|
|
|
pos++;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void add_same_unmerged(struct cache_entry *ce,
|
|
|
|
struct unpack_trees_options *o)
|
|
|
|
{
|
|
|
|
struct index_state *index = o->src_index;
|
|
|
|
int len = ce_namelen(ce);
|
|
|
|
int pos = index_name_pos(index, ce->name, len);
|
|
|
|
|
|
|
|
if (0 <= pos)
|
|
|
|
die("programming error in a caller of mark_ce_used_same_name");
|
|
|
|
for (pos = -pos - 1; pos < index->cache_nr; pos++) {
|
|
|
|
struct cache_entry *next = index->cache[pos];
|
|
|
|
if (len != ce_namelen(next) ||
|
|
|
|
memcmp(ce->name, next->name, len))
|
|
|
|
break;
|
|
|
|
add_entry(o, next, 0, 0);
|
|
|
|
mark_ce_used(next, o);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int unpack_index_entry(struct cache_entry *ce,
|
|
|
|
struct unpack_trees_options *o)
|
2008-03-06 12:15:44 +08:00
|
|
|
{
|
2010-05-14 17:31:33 +08:00
|
|
|
struct cache_entry *src[5] = { NULL };
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 06:59:54 +08:00
|
|
|
int ret;
|
2008-03-07 10:12:28 +08:00
|
|
|
|
2010-05-14 17:31:33 +08:00
|
|
|
src[0] = ce;
|
|
|
|
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 06:59:54 +08:00
|
|
|
mark_ce_used(ce, o);
|
2008-03-06 12:15:44 +08:00
|
|
|
if (ce_stage(ce)) {
|
|
|
|
if (o->skip_unmerged) {
|
2008-03-07 10:12:28 +08:00
|
|
|
add_entry(o, ce, 0, 0);
|
|
|
|
return 0;
|
2008-03-06 12:15:44 +08:00
|
|
|
}
|
|
|
|
}
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 06:59:54 +08:00
|
|
|
ret = call_unpack_fn(src, o);
|
|
|
|
if (ce_stage(ce))
|
|
|
|
mark_ce_used_same_name(ce, o);
|
|
|
|
return ret;
|
2008-03-06 12:15:44 +08:00
|
|
|
}
|
|
|
|
|
unpack-trees.c: look ahead in the index
This makes the traversal of index be in sync with the tree traversal.
When unpack_callback() is fed a set of tree entries from trees, it
inspects the name of the entry and checks if the an index entry with
the same name could be hiding behind the current index entry, and
(1) if the name appears in the index as a leaf node, it is also
fed to the n_way_merge() callback function;
(2) if the name is a directory in the index, i.e. there are entries in
that are underneath it, then nothing is fed to the n_way_merge()
callback function;
(3) otherwise, if the name comes before the first eligible entry in the
index, the index entry is first unpacked alone.
When traverse_trees_recursive() descends into a subdirectory, the
cache_bottom pointer is moved to walk index entries within that directory.
All of these are omitted for diff-index, which does not even want to be
fed an index entry and a tree entry with D/F conflicts.
This fixes 3-way read-tree and exposes a bug in other parts of the system
in t6035, test #5. The test prepares these three trees:
O = HEAD^
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
A = HEAD
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob 587be6b4c3f93f93c489c0111bba5596147a26cb a/x
B = master
120000 blob a36b77384451ea1de7bd340ffca868249626bc52 a/b
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
With a clean index that matches HEAD, running
git read-tree -m -u --aggressive $O $A $B
now yields
120000 a36b77384451ea1de7bd340ffca868249626bc52 3 a/b
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0 a/b-2/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 1 a/b/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 2 a/b/c/d
100644 587be6b4c3f93f93c489c0111bba5596147a26cb 0 a/x
which is correct. "master" created "a/b" symlink that did not exist,
and removed "a/b/c/d" while HEAD did not do touch either path.
Before this series, read-tree did not notice the situation and resolved
addition of "a/b" and removal of "a/b/c/d" independently. If A = HEAD had
another path "a/b/c/e" added, this merge should conflict but instead it
silently resolved "a/b" and then immediately overwrote it to add
"a/b/c/e", which was quite bogus.
Tests in t1012 start to work with this.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-09-20 15:03:39 +08:00
|
|
|
static int find_cache_pos(struct traverse_info *, const struct name_entry *);
|
|
|
|
|
|
|
|
static void restore_cache_bottom(struct traverse_info *info, int bottom)
|
|
|
|
{
|
|
|
|
struct unpack_trees_options *o = info->data;
|
|
|
|
|
|
|
|
if (o->diff_index_cached)
|
|
|
|
return;
|
|
|
|
o->cache_bottom = bottom;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int switch_cache_bottom(struct traverse_info *info)
|
|
|
|
{
|
|
|
|
struct unpack_trees_options *o = info->data;
|
|
|
|
int ret, pos;
|
|
|
|
|
|
|
|
if (o->diff_index_cached)
|
|
|
|
return 0;
|
|
|
|
ret = o->cache_bottom;
|
|
|
|
pos = find_cache_pos(info->prev, &info->name);
|
|
|
|
|
|
|
|
if (pos < -1)
|
|
|
|
o->cache_bottom = -2 - pos;
|
|
|
|
else if (pos < 0)
|
|
|
|
o->cache_bottom = o->src_index->cache_nr;
|
|
|
|
return ret;
|
2008-03-06 12:15:44 +08:00
|
|
|
}
|
|
|
|
|
2009-06-19 01:28:43 +08:00
|
|
|
static int traverse_trees_recursive(int n, unsigned long dirmask, unsigned long df_conflicts, struct name_entry *names, struct traverse_info *info)
|
2006-07-31 02:25:18 +08:00
|
|
|
{
|
unpack-trees.c: look ahead in the index
This makes the traversal of index be in sync with the tree traversal.
When unpack_callback() is fed a set of tree entries from trees, it
inspects the name of the entry and checks if the an index entry with
the same name could be hiding behind the current index entry, and
(1) if the name appears in the index as a leaf node, it is also
fed to the n_way_merge() callback function;
(2) if the name is a directory in the index, i.e. there are entries in
that are underneath it, then nothing is fed to the n_way_merge()
callback function;
(3) otherwise, if the name comes before the first eligible entry in the
index, the index entry is first unpacked alone.
When traverse_trees_recursive() descends into a subdirectory, the
cache_bottom pointer is moved to walk index entries within that directory.
All of these are omitted for diff-index, which does not even want to be
fed an index entry and a tree entry with D/F conflicts.
This fixes 3-way read-tree and exposes a bug in other parts of the system
in t6035, test #5. The test prepares these three trees:
O = HEAD^
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
A = HEAD
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob 587be6b4c3f93f93c489c0111bba5596147a26cb a/x
B = master
120000 blob a36b77384451ea1de7bd340ffca868249626bc52 a/b
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
With a clean index that matches HEAD, running
git read-tree -m -u --aggressive $O $A $B
now yields
120000 a36b77384451ea1de7bd340ffca868249626bc52 3 a/b
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0 a/b-2/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 1 a/b/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 2 a/b/c/d
100644 587be6b4c3f93f93c489c0111bba5596147a26cb 0 a/x
which is correct. "master" created "a/b" symlink that did not exist,
and removed "a/b/c/d" while HEAD did not do touch either path.
Before this series, read-tree did not notice the situation and resolved
addition of "a/b" and removal of "a/b/c/d" independently. If A = HEAD had
another path "a/b/c/e" added, this merge should conflict but instead it
silently resolved "a/b" and then immediately overwrote it to add
"a/b/c/e", which was quite bogus.
Tests in t1012 start to work with this.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-09-20 15:03:39 +08:00
|
|
|
int i, ret, bottom;
|
2008-03-14 13:07:18 +08:00
|
|
|
struct tree_desc t[MAX_UNPACK_TREES];
|
2010-08-10 11:33:44 +08:00
|
|
|
void *buf[MAX_UNPACK_TREES];
|
2008-03-06 12:15:44 +08:00
|
|
|
struct traverse_info newinfo;
|
|
|
|
struct name_entry *p;
|
|
|
|
|
|
|
|
p = names;
|
|
|
|
while (!p->mode)
|
|
|
|
p++;
|
|
|
|
|
|
|
|
newinfo = *info;
|
|
|
|
newinfo.prev = info;
|
|
|
|
newinfo.name = *p;
|
|
|
|
newinfo.pathlen += tree_entry_len(p->path, p->sha1) + 1;
|
|
|
|
newinfo.conflicts |= df_conflicts;
|
|
|
|
|
|
|
|
for (i = 0; i < n; i++, dirmask >>= 1) {
|
|
|
|
const unsigned char *sha1 = NULL;
|
|
|
|
if (dirmask & 1)
|
|
|
|
sha1 = names[i].sha1;
|
2010-08-10 11:33:44 +08:00
|
|
|
buf[i] = fill_tree_descriptor(t+i, sha1);
|
2008-03-06 12:15:44 +08:00
|
|
|
}
|
unpack-trees.c: look ahead in the index
This makes the traversal of index be in sync with the tree traversal.
When unpack_callback() is fed a set of tree entries from trees, it
inspects the name of the entry and checks if the an index entry with
the same name could be hiding behind the current index entry, and
(1) if the name appears in the index as a leaf node, it is also
fed to the n_way_merge() callback function;
(2) if the name is a directory in the index, i.e. there are entries in
that are underneath it, then nothing is fed to the n_way_merge()
callback function;
(3) otherwise, if the name comes before the first eligible entry in the
index, the index entry is first unpacked alone.
When traverse_trees_recursive() descends into a subdirectory, the
cache_bottom pointer is moved to walk index entries within that directory.
All of these are omitted for diff-index, which does not even want to be
fed an index entry and a tree entry with D/F conflicts.
This fixes 3-way read-tree and exposes a bug in other parts of the system
in t6035, test #5. The test prepares these three trees:
O = HEAD^
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
A = HEAD
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob 587be6b4c3f93f93c489c0111bba5596147a26cb a/x
B = master
120000 blob a36b77384451ea1de7bd340ffca868249626bc52 a/b
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
With a clean index that matches HEAD, running
git read-tree -m -u --aggressive $O $A $B
now yields
120000 a36b77384451ea1de7bd340ffca868249626bc52 3 a/b
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0 a/b-2/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 1 a/b/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 2 a/b/c/d
100644 587be6b4c3f93f93c489c0111bba5596147a26cb 0 a/x
which is correct. "master" created "a/b" symlink that did not exist,
and removed "a/b/c/d" while HEAD did not do touch either path.
Before this series, read-tree did not notice the situation and resolved
addition of "a/b" and removal of "a/b/c/d" independently. If A = HEAD had
another path "a/b/c/e" added, this merge should conflict but instead it
silently resolved "a/b" and then immediately overwrote it to add
"a/b/c/e", which was quite bogus.
Tests in t1012 start to work with this.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-09-20 15:03:39 +08:00
|
|
|
|
|
|
|
bottom = switch_cache_bottom(&newinfo);
|
|
|
|
ret = traverse_trees(n, t, &newinfo);
|
|
|
|
restore_cache_bottom(&newinfo, bottom);
|
2010-08-10 11:33:44 +08:00
|
|
|
|
|
|
|
for (i = 0; i < n; i++)
|
|
|
|
free(buf[i]);
|
|
|
|
|
unpack-trees.c: look ahead in the index
This makes the traversal of index be in sync with the tree traversal.
When unpack_callback() is fed a set of tree entries from trees, it
inspects the name of the entry and checks if the an index entry with
the same name could be hiding behind the current index entry, and
(1) if the name appears in the index as a leaf node, it is also
fed to the n_way_merge() callback function;
(2) if the name is a directory in the index, i.e. there are entries in
that are underneath it, then nothing is fed to the n_way_merge()
callback function;
(3) otherwise, if the name comes before the first eligible entry in the
index, the index entry is first unpacked alone.
When traverse_trees_recursive() descends into a subdirectory, the
cache_bottom pointer is moved to walk index entries within that directory.
All of these are omitted for diff-index, which does not even want to be
fed an index entry and a tree entry with D/F conflicts.
This fixes 3-way read-tree and exposes a bug in other parts of the system
in t6035, test #5. The test prepares these three trees:
O = HEAD^
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
A = HEAD
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob 587be6b4c3f93f93c489c0111bba5596147a26cb a/x
B = master
120000 blob a36b77384451ea1de7bd340ffca868249626bc52 a/b
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
With a clean index that matches HEAD, running
git read-tree -m -u --aggressive $O $A $B
now yields
120000 a36b77384451ea1de7bd340ffca868249626bc52 3 a/b
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0 a/b-2/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 1 a/b/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 2 a/b/c/d
100644 587be6b4c3f93f93c489c0111bba5596147a26cb 0 a/x
which is correct. "master" created "a/b" symlink that did not exist,
and removed "a/b/c/d" while HEAD did not do touch either path.
Before this series, read-tree did not notice the situation and resolved
addition of "a/b" and removal of "a/b/c/d" independently. If A = HEAD had
another path "a/b/c/e" added, this merge should conflict but instead it
silently resolved "a/b" and then immediately overwrote it to add
"a/b/c/e", which was quite bogus.
Tests in t1012 start to work with this.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-09-20 15:03:39 +08:00
|
|
|
return ret;
|
2008-03-06 12:15:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Compare the traverse-path to the cache entry without actually
|
|
|
|
* having to generate the textual representation of the traverse
|
|
|
|
* path.
|
|
|
|
*
|
|
|
|
* NOTE! This *only* compares up to the size of the traverse path
|
|
|
|
* itself - the caller needs to do the final check for the cache
|
|
|
|
* entry having more data at the end!
|
|
|
|
*/
|
|
|
|
static int do_compare_entry(const struct cache_entry *ce, const struct traverse_info *info, const struct name_entry *n)
|
|
|
|
{
|
|
|
|
int len, pathlen, ce_len;
|
|
|
|
const char *ce_name;
|
|
|
|
|
|
|
|
if (info->prev) {
|
|
|
|
int cmp = do_compare_entry(ce, info->prev, &info->name);
|
|
|
|
if (cmp)
|
|
|
|
return cmp;
|
|
|
|
}
|
|
|
|
pathlen = info->pathlen;
|
|
|
|
ce_len = ce_namelen(ce);
|
|
|
|
|
|
|
|
/* If ce_len < pathlen then we must have previously hit "name == directory" entry */
|
|
|
|
if (ce_len < pathlen)
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
ce_len -= pathlen;
|
|
|
|
ce_name = ce->name + pathlen;
|
|
|
|
|
|
|
|
len = tree_entry_len(n->path, n->sha1);
|
|
|
|
return df_name_compare(ce_name, ce_len, S_IFREG, n->path, len, n->mode);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int compare_entry(const struct cache_entry *ce, const struct traverse_info *info, const struct name_entry *n)
|
|
|
|
{
|
|
|
|
int cmp = do_compare_entry(ce, info, n);
|
|
|
|
if (cmp)
|
|
|
|
return cmp;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Even if the beginning compared identically, the ce should
|
|
|
|
* compare as bigger than a directory leading up to it!
|
|
|
|
*/
|
|
|
|
return ce_namelen(ce) > traverse_path_len(info, n);
|
|
|
|
}
|
|
|
|
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 06:59:54 +08:00
|
|
|
static int ce_in_traverse_path(const struct cache_entry *ce,
|
|
|
|
const struct traverse_info *info)
|
|
|
|
{
|
|
|
|
if (!info->prev)
|
|
|
|
return 1;
|
|
|
|
if (do_compare_entry(ce, info->prev, &info->name))
|
|
|
|
return 0;
|
|
|
|
/*
|
|
|
|
* If ce (blob) is the same name as the path (which is a tree
|
|
|
|
* we will be descending into), it won't be inside it.
|
|
|
|
*/
|
|
|
|
return (info->pathlen < ce_namelen(ce));
|
|
|
|
}
|
|
|
|
|
2008-03-06 12:15:44 +08:00
|
|
|
static struct cache_entry *create_ce_entry(const struct traverse_info *info, const struct name_entry *n, int stage)
|
|
|
|
{
|
|
|
|
int len = traverse_path_len(info, n);
|
|
|
|
struct cache_entry *ce = xcalloc(1, cache_entry_size(len));
|
|
|
|
|
|
|
|
ce->ce_mode = create_ce_mode(n->mode);
|
|
|
|
ce->ce_flags = create_ce_flags(len, stage);
|
|
|
|
hashcpy(ce->sha1, n->sha1);
|
|
|
|
make_traverse_path(ce->name, info, n);
|
|
|
|
|
|
|
|
return ce;
|
|
|
|
}
|
|
|
|
|
2009-01-31 22:39:10 +08:00
|
|
|
static int unpack_nondirectories(int n, unsigned long mask,
|
|
|
|
unsigned long dirmask,
|
|
|
|
struct cache_entry **src,
|
|
|
|
const struct name_entry *names,
|
|
|
|
const struct traverse_info *info)
|
2008-03-06 12:15:44 +08:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
struct unpack_trees_options *o = info->data;
|
|
|
|
unsigned long conflicts;
|
|
|
|
|
|
|
|
/* Do we have *only* directories? Nothing to do */
|
|
|
|
if (mask == dirmask && !src[0])
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
conflicts = info->conflicts;
|
|
|
|
if (o->merge)
|
|
|
|
conflicts >>= 1;
|
|
|
|
conflicts |= dirmask;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Ok, we've filled in up to any potential index entry in src[0],
|
|
|
|
* now do the rest.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
int stage;
|
|
|
|
unsigned int bit = 1ul << i;
|
|
|
|
if (conflicts & bit) {
|
|
|
|
src[i + o->merge] = o->df_conflict_entry;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!(mask & bit))
|
|
|
|
continue;
|
|
|
|
if (!o->merge)
|
|
|
|
stage = 0;
|
|
|
|
else if (i + 1 < o->head_idx)
|
|
|
|
stage = 1;
|
|
|
|
else if (i + 1 > o->head_idx)
|
|
|
|
stage = 3;
|
|
|
|
else
|
|
|
|
stage = 2;
|
|
|
|
src[i + o->merge] = create_ce_entry(info, names + i, stage);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (o->merge)
|
2008-03-07 10:12:28 +08:00
|
|
|
return call_unpack_fn(src, o);
|
2008-03-06 12:15:44 +08:00
|
|
|
|
|
|
|
for (i = 0; i < n; i++)
|
read-tree A B C: do not create a bogus index and do not segfault
"git read-tree A B C..." without the "-m" (merge) option is a way to read
these trees on top of each other to get an overlay of them.
An ancient commit ee6566e (Rewrite read-tree, 2005-09-05) passed the
ADD_CACHE_SKIP_DFCHECK flag when calling add_index_entry() to add the
paths obtained from these trees to the index, but it is an incorrect use
of the flag. The flag is meant to be used by callers who know the
addition of the entry does not introduce a D/F conflict to the index in
order to avoid the overhead of checking.
This bug resulted in a bogus index that records both "x" and "x/z" as a
blob after reading three trees that have paths ("x"), ("x", "y"), and
("x/z", "y") respectively. 34110cd (Make 'unpack_trees()' have a separate
source and destination index, 2008-03-06) refactored the callsites of
add_index_entry() incorrectly and added more codepaths that use this flag
when it shouldn't be used.
Also, 0190457 (Move 'unpack_trees()' over to 'traverse_trees()' interface,
2008-03-05) introduced a bug to call add_index_entry() for the tree that
does not have the path in it, passing NULL as a cache entry. This caused
reading multiple trees, one of which has path "x" but another doesn't, to
segfault.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-03-12 15:02:12 +08:00
|
|
|
if (src[i] && src[i] != o->df_conflict_entry)
|
|
|
|
add_entry(o, src[i], 0, 0);
|
2008-03-06 12:15:44 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-09-20 07:36:45 +08:00
|
|
|
static int unpack_failed(struct unpack_trees_options *o, const char *message)
|
|
|
|
{
|
|
|
|
discard_index(&o->result);
|
|
|
|
if (!o->gently) {
|
|
|
|
if (message)
|
|
|
|
return error("%s", message);
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
unpack-trees.c: look ahead in the index
This makes the traversal of index be in sync with the tree traversal.
When unpack_callback() is fed a set of tree entries from trees, it
inspects the name of the entry and checks if the an index entry with
the same name could be hiding behind the current index entry, and
(1) if the name appears in the index as a leaf node, it is also
fed to the n_way_merge() callback function;
(2) if the name is a directory in the index, i.e. there are entries in
that are underneath it, then nothing is fed to the n_way_merge()
callback function;
(3) otherwise, if the name comes before the first eligible entry in the
index, the index entry is first unpacked alone.
When traverse_trees_recursive() descends into a subdirectory, the
cache_bottom pointer is moved to walk index entries within that directory.
All of these are omitted for diff-index, which does not even want to be
fed an index entry and a tree entry with D/F conflicts.
This fixes 3-way read-tree and exposes a bug in other parts of the system
in t6035, test #5. The test prepares these three trees:
O = HEAD^
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
A = HEAD
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob 587be6b4c3f93f93c489c0111bba5596147a26cb a/x
B = master
120000 blob a36b77384451ea1de7bd340ffca868249626bc52 a/b
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
With a clean index that matches HEAD, running
git read-tree -m -u --aggressive $O $A $B
now yields
120000 a36b77384451ea1de7bd340ffca868249626bc52 3 a/b
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0 a/b-2/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 1 a/b/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 2 a/b/c/d
100644 587be6b4c3f93f93c489c0111bba5596147a26cb 0 a/x
which is correct. "master" created "a/b" symlink that did not exist,
and removed "a/b/c/d" while HEAD did not do touch either path.
Before this series, read-tree did not notice the situation and resolved
addition of "a/b" and removal of "a/b/c/d" independently. If A = HEAD had
another path "a/b/c/e" added, this merge should conflict but instead it
silently resolved "a/b" and then immediately overwrote it to add
"a/b/c/e", which was quite bogus.
Tests in t1012 start to work with this.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-09-20 15:03:39 +08:00
|
|
|
/* NEEDSWORK: give this a better name and share with tree-walk.c */
|
|
|
|
static int name_compare(const char *a, int a_len,
|
|
|
|
const char *b, int b_len)
|
|
|
|
{
|
|
|
|
int len = (a_len < b_len) ? a_len : b_len;
|
|
|
|
int cmp = memcmp(a, b, len);
|
|
|
|
if (cmp)
|
|
|
|
return cmp;
|
|
|
|
return (a_len - b_len);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The tree traversal is looking at name p. If we have a matching entry,
|
|
|
|
* return it. If name p is a directory in the index, do not return
|
|
|
|
* anything, as we will want to match it when the traversal descends into
|
|
|
|
* the directory.
|
|
|
|
*/
|
|
|
|
static int find_cache_pos(struct traverse_info *info,
|
|
|
|
const struct name_entry *p)
|
|
|
|
{
|
|
|
|
int pos;
|
|
|
|
struct unpack_trees_options *o = info->data;
|
|
|
|
struct index_state *index = o->src_index;
|
|
|
|
int pfxlen = info->pathlen;
|
|
|
|
int p_len = tree_entry_len(p->path, p->sha1);
|
|
|
|
|
|
|
|
for (pos = o->cache_bottom; pos < index->cache_nr; pos++) {
|
|
|
|
struct cache_entry *ce = index->cache[pos];
|
|
|
|
const char *ce_name, *ce_slash;
|
|
|
|
int cmp, ce_len;
|
|
|
|
|
unpack-trees: Make index lookahead less pessimal
When traversing trees with an index, the current index pointer
(o->cache_bottom) occasionally has to be temporarily advanced forwards to
match the traversal order of the tree, which is not the same as the sort
order of the index. The existing algorithm that did this (introduced in
730f72840cc50c523fe4cdd796ea2d2fc4571a28) would get "stuck" when the
cache_bottom was popped and then repeatedly check the same index entries
over and over. This represents a serious performance regression for
large repositories compared to the old "broken" traversal order.
This commit makes a simple change to mitigate this. Whenever
find_cache_pos sees that the current pos is also the cache_bottom, and
it has already been unpacked, it advances the cache_bottom as well as
the current pos. This prevents the above "sticking" behavior without
dramatically changing the algorithm.
In addition, this commit moves the unpacked check above the
ce_in_traverse_path() check. The simple bitmask check is cheaper, and
in the case described above will be firing quite a bit to advance the
cache_bottom after a tree pop.
This yields considerable performance improvements for large trees.
The following are the number of function calls for "git diff HEAD" on
the Linux kernel tree, with 33,307 files:
Symbol Calls Before Calls After
------------------- ------------ -----------
unpack_callback 35,332 35,332
find_cache_pos 37,357 37,357
ce_in_traverse_path 4,979,473 37,357
do_compare_entry 6,828,181 251,925
df_name_compare 6,828,181 251,925
And on a repository of 187,456 files:
Symbol Calls Before Calls After
------------------- ------------ -----------
unpack_callback 197,958 197,958
find_cache_pos 208,460 208,460
ce_in_traverse_path 37,308,336 208,460
do_compare_entry 156,950,469 2,690,626
df_name_compare 156,950,469 2,690,626
On the latter repository, user time for "git diff HEAD" was reduced from
5.58 to 0.42 seconds. This is compared to 0.30 seconds before the
traversal order fix was implemented.
Signed-off-by: Brian Downing <bdowning@lavos.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-06-11 10:59:07 +08:00
|
|
|
if (ce->ce_flags & CE_UNPACKED) {
|
|
|
|
/*
|
|
|
|
* cache_bottom entry is already unpacked, so
|
|
|
|
* we can never match it; don't check it
|
|
|
|
* again.
|
|
|
|
*/
|
|
|
|
if (pos == o->cache_bottom)
|
|
|
|
++o->cache_bottom;
|
unpack-trees.c: look ahead in the index
This makes the traversal of index be in sync with the tree traversal.
When unpack_callback() is fed a set of tree entries from trees, it
inspects the name of the entry and checks if the an index entry with
the same name could be hiding behind the current index entry, and
(1) if the name appears in the index as a leaf node, it is also
fed to the n_way_merge() callback function;
(2) if the name is a directory in the index, i.e. there are entries in
that are underneath it, then nothing is fed to the n_way_merge()
callback function;
(3) otherwise, if the name comes before the first eligible entry in the
index, the index entry is first unpacked alone.
When traverse_trees_recursive() descends into a subdirectory, the
cache_bottom pointer is moved to walk index entries within that directory.
All of these are omitted for diff-index, which does not even want to be
fed an index entry and a tree entry with D/F conflicts.
This fixes 3-way read-tree and exposes a bug in other parts of the system
in t6035, test #5. The test prepares these three trees:
O = HEAD^
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
A = HEAD
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob 587be6b4c3f93f93c489c0111bba5596147a26cb a/x
B = master
120000 blob a36b77384451ea1de7bd340ffca868249626bc52 a/b
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
With a clean index that matches HEAD, running
git read-tree -m -u --aggressive $O $A $B
now yields
120000 a36b77384451ea1de7bd340ffca868249626bc52 3 a/b
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0 a/b-2/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 1 a/b/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 2 a/b/c/d
100644 587be6b4c3f93f93c489c0111bba5596147a26cb 0 a/x
which is correct. "master" created "a/b" symlink that did not exist,
and removed "a/b/c/d" while HEAD did not do touch either path.
Before this series, read-tree did not notice the situation and resolved
addition of "a/b" and removal of "a/b/c/d" independently. If A = HEAD had
another path "a/b/c/e" added, this merge should conflict but instead it
silently resolved "a/b" and then immediately overwrote it to add
"a/b/c/e", which was quite bogus.
Tests in t1012 start to work with this.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-09-20 15:03:39 +08:00
|
|
|
continue;
|
unpack-trees: Make index lookahead less pessimal
When traversing trees with an index, the current index pointer
(o->cache_bottom) occasionally has to be temporarily advanced forwards to
match the traversal order of the tree, which is not the same as the sort
order of the index. The existing algorithm that did this (introduced in
730f72840cc50c523fe4cdd796ea2d2fc4571a28) would get "stuck" when the
cache_bottom was popped and then repeatedly check the same index entries
over and over. This represents a serious performance regression for
large repositories compared to the old "broken" traversal order.
This commit makes a simple change to mitigate this. Whenever
find_cache_pos sees that the current pos is also the cache_bottom, and
it has already been unpacked, it advances the cache_bottom as well as
the current pos. This prevents the above "sticking" behavior without
dramatically changing the algorithm.
In addition, this commit moves the unpacked check above the
ce_in_traverse_path() check. The simple bitmask check is cheaper, and
in the case described above will be firing quite a bit to advance the
cache_bottom after a tree pop.
This yields considerable performance improvements for large trees.
The following are the number of function calls for "git diff HEAD" on
the Linux kernel tree, with 33,307 files:
Symbol Calls Before Calls After
------------------- ------------ -----------
unpack_callback 35,332 35,332
find_cache_pos 37,357 37,357
ce_in_traverse_path 4,979,473 37,357
do_compare_entry 6,828,181 251,925
df_name_compare 6,828,181 251,925
And on a repository of 187,456 files:
Symbol Calls Before Calls After
------------------- ------------ -----------
unpack_callback 197,958 197,958
find_cache_pos 208,460 208,460
ce_in_traverse_path 37,308,336 208,460
do_compare_entry 156,950,469 2,690,626
df_name_compare 156,950,469 2,690,626
On the latter repository, user time for "git diff HEAD" was reduced from
5.58 to 0.42 seconds. This is compared to 0.30 seconds before the
traversal order fix was implemented.
Signed-off-by: Brian Downing <bdowning@lavos.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-06-11 10:59:07 +08:00
|
|
|
}
|
|
|
|
if (!ce_in_traverse_path(ce, info))
|
unpack-trees.c: look ahead in the index
This makes the traversal of index be in sync with the tree traversal.
When unpack_callback() is fed a set of tree entries from trees, it
inspects the name of the entry and checks if the an index entry with
the same name could be hiding behind the current index entry, and
(1) if the name appears in the index as a leaf node, it is also
fed to the n_way_merge() callback function;
(2) if the name is a directory in the index, i.e. there are entries in
that are underneath it, then nothing is fed to the n_way_merge()
callback function;
(3) otherwise, if the name comes before the first eligible entry in the
index, the index entry is first unpacked alone.
When traverse_trees_recursive() descends into a subdirectory, the
cache_bottom pointer is moved to walk index entries within that directory.
All of these are omitted for diff-index, which does not even want to be
fed an index entry and a tree entry with D/F conflicts.
This fixes 3-way read-tree and exposes a bug in other parts of the system
in t6035, test #5. The test prepares these three trees:
O = HEAD^
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
A = HEAD
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob 587be6b4c3f93f93c489c0111bba5596147a26cb a/x
B = master
120000 blob a36b77384451ea1de7bd340ffca868249626bc52 a/b
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
With a clean index that matches HEAD, running
git read-tree -m -u --aggressive $O $A $B
now yields
120000 a36b77384451ea1de7bd340ffca868249626bc52 3 a/b
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0 a/b-2/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 1 a/b/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 2 a/b/c/d
100644 587be6b4c3f93f93c489c0111bba5596147a26cb 0 a/x
which is correct. "master" created "a/b" symlink that did not exist,
and removed "a/b/c/d" while HEAD did not do touch either path.
Before this series, read-tree did not notice the situation and resolved
addition of "a/b" and removal of "a/b/c/d" independently. If A = HEAD had
another path "a/b/c/e" added, this merge should conflict but instead it
silently resolved "a/b" and then immediately overwrote it to add
"a/b/c/e", which was quite bogus.
Tests in t1012 start to work with this.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-09-20 15:03:39 +08:00
|
|
|
continue;
|
|
|
|
ce_name = ce->name + pfxlen;
|
|
|
|
ce_slash = strchr(ce_name, '/');
|
|
|
|
if (ce_slash)
|
|
|
|
ce_len = ce_slash - ce_name;
|
|
|
|
else
|
|
|
|
ce_len = ce_namelen(ce) - pfxlen;
|
|
|
|
cmp = name_compare(p->path, p_len, ce_name, ce_len);
|
|
|
|
/*
|
|
|
|
* Exact match; if we have a directory we need to
|
|
|
|
* delay returning it.
|
|
|
|
*/
|
|
|
|
if (!cmp)
|
|
|
|
return ce_slash ? -2 - pos : pos;
|
|
|
|
if (0 < cmp)
|
|
|
|
continue; /* keep looking */
|
|
|
|
/*
|
|
|
|
* ce_name sorts after p->path; could it be that we
|
|
|
|
* have files under p->path directory in the index?
|
|
|
|
* E.g. ce_name == "t-i", and p->path == "t"; we may
|
|
|
|
* have "t/a" in the index.
|
|
|
|
*/
|
|
|
|
if (p_len < ce_len && !memcmp(ce_name, p->path, p_len) &&
|
|
|
|
ce_name[p_len] < '/')
|
|
|
|
continue; /* keep looking */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct cache_entry *find_cache_entry(struct traverse_info *info,
|
|
|
|
const struct name_entry *p)
|
|
|
|
{
|
|
|
|
int pos = find_cache_pos(info, p);
|
|
|
|
struct unpack_trees_options *o = info->data;
|
|
|
|
|
|
|
|
if (0 <= pos)
|
|
|
|
return o->src_index->cache[pos];
|
|
|
|
else
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2009-09-14 17:22:00 +08:00
|
|
|
static void debug_path(struct traverse_info *info)
|
|
|
|
{
|
|
|
|
if (info->prev) {
|
|
|
|
debug_path(info->prev);
|
|
|
|
if (*info->prev->name.path)
|
|
|
|
putchar('/');
|
|
|
|
}
|
|
|
|
printf("%s", info->name.path);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void debug_name_entry(int i, struct name_entry *n)
|
|
|
|
{
|
|
|
|
printf("ent#%d %06o %s\n", i,
|
|
|
|
n->path ? n->mode : 0,
|
|
|
|
n->path ? n->path : "(missing)");
|
|
|
|
}
|
|
|
|
|
|
|
|
static void debug_unpack_callback(int n,
|
|
|
|
unsigned long mask,
|
|
|
|
unsigned long dirmask,
|
|
|
|
struct name_entry *names,
|
|
|
|
struct traverse_info *info)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
printf("* unpack mask %lu, dirmask %lu, cnt %d ",
|
|
|
|
mask, dirmask, n);
|
|
|
|
debug_path(info);
|
|
|
|
putchar('\n');
|
|
|
|
for (i = 0; i < n; i++)
|
|
|
|
debug_name_entry(i, names + i);
|
|
|
|
}
|
|
|
|
|
2008-03-06 12:15:44 +08:00
|
|
|
static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, struct name_entry *names, struct traverse_info *info)
|
|
|
|
{
|
2009-01-31 22:39:10 +08:00
|
|
|
struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, };
|
2008-03-06 12:15:44 +08:00
|
|
|
struct unpack_trees_options *o = info->data;
|
|
|
|
const struct name_entry *p = names;
|
|
|
|
|
|
|
|
/* Find first entry with a real name (we could use "mask" too) */
|
|
|
|
while (!p->mode)
|
|
|
|
p++;
|
|
|
|
|
2009-09-14 17:22:00 +08:00
|
|
|
if (o->debug_unpack)
|
|
|
|
debug_unpack_callback(n, mask, dirmask, names, info);
|
|
|
|
|
2008-03-06 12:15:44 +08:00
|
|
|
/* Are we supposed to look at the index too? */
|
|
|
|
if (o->merge) {
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 06:59:54 +08:00
|
|
|
while (1) {
|
|
|
|
int cmp;
|
unpack-trees.c: look ahead in the index
This makes the traversal of index be in sync with the tree traversal.
When unpack_callback() is fed a set of tree entries from trees, it
inspects the name of the entry and checks if the an index entry with
the same name could be hiding behind the current index entry, and
(1) if the name appears in the index as a leaf node, it is also
fed to the n_way_merge() callback function;
(2) if the name is a directory in the index, i.e. there are entries in
that are underneath it, then nothing is fed to the n_way_merge()
callback function;
(3) otherwise, if the name comes before the first eligible entry in the
index, the index entry is first unpacked alone.
When traverse_trees_recursive() descends into a subdirectory, the
cache_bottom pointer is moved to walk index entries within that directory.
All of these are omitted for diff-index, which does not even want to be
fed an index entry and a tree entry with D/F conflicts.
This fixes 3-way read-tree and exposes a bug in other parts of the system
in t6035, test #5. The test prepares these three trees:
O = HEAD^
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
A = HEAD
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b/c/d
100644 blob 587be6b4c3f93f93c489c0111bba5596147a26cb a/x
B = master
120000 blob a36b77384451ea1de7bd340ffca868249626bc52 a/b
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/b-2/c/d
100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 a/x
With a clean index that matches HEAD, running
git read-tree -m -u --aggressive $O $A $B
now yields
120000 a36b77384451ea1de7bd340ffca868249626bc52 3 a/b
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0 a/b-2/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 1 a/b/c/d
100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 2 a/b/c/d
100644 587be6b4c3f93f93c489c0111bba5596147a26cb 0 a/x
which is correct. "master" created "a/b" symlink that did not exist,
and removed "a/b/c/d" while HEAD did not do touch either path.
Before this series, read-tree did not notice the situation and resolved
addition of "a/b" and removal of "a/b/c/d" independently. If A = HEAD had
another path "a/b/c/e" added, this merge should conflict but instead it
silently resolved "a/b" and then immediately overwrote it to add
"a/b/c/e", which was quite bogus.
Tests in t1012 start to work with this.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-09-20 15:03:39 +08:00
|
|
|
struct cache_entry *ce;
|
|
|
|
|
|
|
|
if (o->diff_index_cached)
|
|
|
|
ce = next_cache_entry(o);
|
|
|
|
else
|
|
|
|
ce = find_cache_entry(info, p);
|
|
|
|
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 06:59:54 +08:00
|
|
|
if (!ce)
|
|
|
|
break;
|
|
|
|
cmp = compare_entry(ce, info, p);
|
2008-03-06 12:15:44 +08:00
|
|
|
if (cmp < 0) {
|
|
|
|
if (unpack_index_entry(ce, o) < 0)
|
2009-09-20 07:36:45 +08:00
|
|
|
return unpack_failed(o, NULL);
|
2008-03-06 12:15:44 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!cmp) {
|
|
|
|
if (ce_stage(ce)) {
|
|
|
|
/*
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 06:59:54 +08:00
|
|
|
* If we skip unmerged index
|
|
|
|
* entries, we'll skip this
|
|
|
|
* entry *and* the tree
|
|
|
|
* entries associated with it!
|
2008-03-06 12:15:44 +08:00
|
|
|
*/
|
2008-03-07 10:12:28 +08:00
|
|
|
if (o->skip_unmerged) {
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 06:59:54 +08:00
|
|
|
add_same_unmerged(ce, o);
|
2008-03-06 12:15:44 +08:00
|
|
|
return mask;
|
2008-03-07 10:12:28 +08:00
|
|
|
}
|
2008-03-06 12:15:44 +08:00
|
|
|
}
|
|
|
|
src[0] = ce;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-03-07 10:12:28 +08:00
|
|
|
if (unpack_nondirectories(n, mask, dirmask, src, names, info) < 0)
|
2008-03-06 12:15:44 +08:00
|
|
|
return -1;
|
|
|
|
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 06:59:54 +08:00
|
|
|
if (src[0]) {
|
|
|
|
if (ce_stage(src[0]))
|
|
|
|
mark_ce_used_same_name(src[0], o);
|
|
|
|
else
|
|
|
|
mark_ce_used(src[0], o);
|
|
|
|
}
|
|
|
|
|
2008-03-06 12:15:44 +08:00
|
|
|
/* Now handle any directories.. */
|
|
|
|
if (dirmask) {
|
|
|
|
unsigned long conflicts = mask & ~dirmask;
|
|
|
|
if (o->merge) {
|
|
|
|
conflicts <<= 1;
|
|
|
|
if (src[0])
|
|
|
|
conflicts |= 1;
|
|
|
|
}
|
Optimize "diff-index --cached" using cache-tree
When running "diff-index --cached" after making a change to only a small
portion of the index, there is no point unpacking unchanged subtrees into
the index recursively, only to find that all entries match anyway. Tweak
unpack_trees() logic that is used to read in the tree object to catch the
case where the tree entry we are looking at matches the index as a whole
by looking at the cache-tree.
As an exercise, after modifying a few paths in the kernel tree, here are
a few numbers on my Athlon 64X2 3800+:
(without patch, hot cache)
$ /usr/bin/time git diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.07user 0.02system 0:00.09elapsed 102%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+9407minor)pagefaults 0swaps
(with patch, hot cache)
$ /usr/bin/time ../git.git/git-diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.02user 0.00system 0:00.02elapsed 103%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+2446minor)pagefaults 0swaps
Cold cache numbers are very impressive, but it does not matter very much
in practice:
(without patch, cold cache)
$ su root sh -c 'echo 3 >/proc/sys/vm/drop_caches'
$ /usr/bin/time git diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.06user 0.17system 0:10.26elapsed 2%CPU (0avgtext+0avgdata 0maxresident)k
247032inputs+0outputs (1172major+8237minor)pagefaults 0swaps
(with patch, cold cache)
$ su root sh -c 'echo 3 >/proc/sys/vm/drop_caches'
$ /usr/bin/time ../git.git/git-diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.02user 0.01system 0:01.01elapsed 3%CPU (0avgtext+0avgdata 0maxresident)k
18440inputs+0outputs (79major+2369minor)pagefaults 0swaps
This of course helps "git status" as well.
(without patch, hot cache)
$ /usr/bin/time ../git.git/git-status >/dev/null
0.17user 0.18system 0:00.35elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+5336outputs (0major+10970minor)pagefaults 0swaps
(with patch, hot cache)
$ /usr/bin/time ../git.git/git-status >/dev/null
0.10user 0.16system 0:00.27elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+5336outputs (0major+3921minor)pagefaults 0swaps
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-05-21 06:57:22 +08:00
|
|
|
|
|
|
|
/* special case: "diff-index --cached" looking at a tree */
|
|
|
|
if (o->diff_index_cached &&
|
|
|
|
n == 1 && dirmask == 1 && S_ISDIR(names->mode)) {
|
|
|
|
int matches;
|
|
|
|
matches = cache_tree_matches_traversal(o->src_index->cache_tree,
|
|
|
|
names, info);
|
|
|
|
/*
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 06:59:54 +08:00
|
|
|
* Everything under the name matches; skip the
|
|
|
|
* entire hierarchy. diff_index_cached codepath
|
|
|
|
* special cases D/F conflicts in such a way that
|
|
|
|
* it does not do any look-ahead, so this is safe.
|
Optimize "diff-index --cached" using cache-tree
When running "diff-index --cached" after making a change to only a small
portion of the index, there is no point unpacking unchanged subtrees into
the index recursively, only to find that all entries match anyway. Tweak
unpack_trees() logic that is used to read in the tree object to catch the
case where the tree entry we are looking at matches the index as a whole
by looking at the cache-tree.
As an exercise, after modifying a few paths in the kernel tree, here are
a few numbers on my Athlon 64X2 3800+:
(without patch, hot cache)
$ /usr/bin/time git diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.07user 0.02system 0:00.09elapsed 102%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+9407minor)pagefaults 0swaps
(with patch, hot cache)
$ /usr/bin/time ../git.git/git-diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.02user 0.00system 0:00.02elapsed 103%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+2446minor)pagefaults 0swaps
Cold cache numbers are very impressive, but it does not matter very much
in practice:
(without patch, cold cache)
$ su root sh -c 'echo 3 >/proc/sys/vm/drop_caches'
$ /usr/bin/time git diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.06user 0.17system 0:10.26elapsed 2%CPU (0avgtext+0avgdata 0maxresident)k
247032inputs+0outputs (1172major+8237minor)pagefaults 0swaps
(with patch, cold cache)
$ su root sh -c 'echo 3 >/proc/sys/vm/drop_caches'
$ /usr/bin/time ../git.git/git-diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.02user 0.01system 0:01.01elapsed 3%CPU (0avgtext+0avgdata 0maxresident)k
18440inputs+0outputs (79major+2369minor)pagefaults 0swaps
This of course helps "git status" as well.
(without patch, hot cache)
$ /usr/bin/time ../git.git/git-status >/dev/null
0.17user 0.18system 0:00.35elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+5336outputs (0major+10970minor)pagefaults 0swaps
(with patch, hot cache)
$ /usr/bin/time ../git.git/git-status >/dev/null
0.10user 0.16system 0:00.27elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+5336outputs (0major+3921minor)pagefaults 0swaps
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-05-21 06:57:22 +08:00
|
|
|
*/
|
|
|
|
if (matches) {
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 06:59:54 +08:00
|
|
|
o->cache_bottom += matches;
|
Optimize "diff-index --cached" using cache-tree
When running "diff-index --cached" after making a change to only a small
portion of the index, there is no point unpacking unchanged subtrees into
the index recursively, only to find that all entries match anyway. Tweak
unpack_trees() logic that is used to read in the tree object to catch the
case where the tree entry we are looking at matches the index as a whole
by looking at the cache-tree.
As an exercise, after modifying a few paths in the kernel tree, here are
a few numbers on my Athlon 64X2 3800+:
(without patch, hot cache)
$ /usr/bin/time git diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.07user 0.02system 0:00.09elapsed 102%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+9407minor)pagefaults 0swaps
(with patch, hot cache)
$ /usr/bin/time ../git.git/git-diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.02user 0.00system 0:00.02elapsed 103%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+0outputs (0major+2446minor)pagefaults 0swaps
Cold cache numbers are very impressive, but it does not matter very much
in practice:
(without patch, cold cache)
$ su root sh -c 'echo 3 >/proc/sys/vm/drop_caches'
$ /usr/bin/time git diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.06user 0.17system 0:10.26elapsed 2%CPU (0avgtext+0avgdata 0maxresident)k
247032inputs+0outputs (1172major+8237minor)pagefaults 0swaps
(with patch, cold cache)
$ su root sh -c 'echo 3 >/proc/sys/vm/drop_caches'
$ /usr/bin/time ../git.git/git-diff --cached --raw
:100644 100644 b57e1f5... e69de29... M Makefile
:100644 000000 8c86b72... 0000000... D arch/x86/Makefile
:000000 100644 0000000... e69de29... A arche
0.02user 0.01system 0:01.01elapsed 3%CPU (0avgtext+0avgdata 0maxresident)k
18440inputs+0outputs (79major+2369minor)pagefaults 0swaps
This of course helps "git status" as well.
(without patch, hot cache)
$ /usr/bin/time ../git.git/git-status >/dev/null
0.17user 0.18system 0:00.35elapsed 100%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+5336outputs (0major+10970minor)pagefaults 0swaps
(with patch, hot cache)
$ /usr/bin/time ../git.git/git-status >/dev/null
0.10user 0.16system 0:00.27elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k
0inputs+5336outputs (0major+3921minor)pagefaults 0swaps
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2009-05-21 06:57:22 +08:00
|
|
|
return mask;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-03-10 16:26:23 +08:00
|
|
|
if (traverse_trees_recursive(n, dirmask, conflicts,
|
|
|
|
names, info) < 0)
|
|
|
|
return -1;
|
2008-03-06 12:15:44 +08:00
|
|
|
return mask;
|
|
|
|
}
|
|
|
|
|
|
|
|
return mask;
|
|
|
|
}
|
|
|
|
|
2008-05-29 06:12:30 +08:00
|
|
|
/*
|
|
|
|
* N-way merge "len" trees. Returns 0 on success, -1 on failure to manipulate the
|
|
|
|
* resulting index, -2 on failure to reflect the changes to the work tree.
|
|
|
|
*/
|
2008-03-06 12:15:44 +08:00
|
|
|
int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options *o)
|
|
|
|
{
|
2009-08-20 21:47:09 +08:00
|
|
|
int i, ret;
|
2006-12-04 18:11:39 +08:00
|
|
|
static struct cache_entry *dfc;
|
2009-08-20 21:47:08 +08:00
|
|
|
struct exclude_list el;
|
2006-07-31 02:25:18 +08:00
|
|
|
|
2008-03-14 13:07:18 +08:00
|
|
|
if (len > MAX_UNPACK_TREES)
|
|
|
|
die("unpack_trees takes at most %d trees", MAX_UNPACK_TREES);
|
2006-07-31 02:26:15 +08:00
|
|
|
memset(&state, 0, sizeof(state));
|
2006-07-31 02:25:18 +08:00
|
|
|
state.base_dir = "";
|
|
|
|
state.force = 1;
|
|
|
|
state.quiet = 1;
|
|
|
|
state.refresh_cache = 1;
|
|
|
|
|
2009-08-20 21:47:08 +08:00
|
|
|
memset(&el, 0, sizeof(el));
|
|
|
|
if (!core_apply_sparse_checkout || !o->update)
|
|
|
|
o->skip_sparse_checkout = 1;
|
|
|
|
if (!o->skip_sparse_checkout) {
|
|
|
|
if (add_excludes_from_file_to_list(git_path("info/sparse-checkout"), "", 0, NULL, &el, 0) < 0)
|
|
|
|
o->skip_sparse_checkout = 1;
|
|
|
|
else
|
|
|
|
o->el = ⪙
|
|
|
|
}
|
|
|
|
|
2008-03-07 10:12:28 +08:00
|
|
|
memset(&o->result, 0, sizeof(o->result));
|
unpack_trees(): protect the handcrafted in-core index from read_cache()
unpack_trees() rebuilds the in-core index from scratch by allocating a new
structure and finishing it off by copying the built one to the final
index.
The resulting in-core index is Ok for most use, but read_cache() does not
recognize it as such. The function is meant to be no-op if you already
have loaded the index, until you call discard_cache().
This change the way read_cache() detects an already initialized in-core
index, by introducing an extra bit, and marks the handcrafted in-core
index as initialized, to avoid this problem.
A better fix in the longer term would be to change the read_cache() API so
that it will always discard and re-read from the on-disk index to avoid
confusion. But there are higher level API that have relied on the current
semantics, and they and their users all need to get converted, which is
outside the scope of 'maint' track.
An example of such a higher level API is write_cache_as_tree(), which is
used by git-write-tree as well as later Porcelains like git-merge, revert
and cherry-pick. In the longer term, we should remove read_cache() from
there and add one to cmd_write_tree(); other callers expect that the
in-core index they prepared is what gets written as a tree so no other
change is necessary for this particular codepath.
The original version of this patch marked the index by pointing an
otherwise wasted malloc'ed memory with o->result.alloc, but this version
uses Linus's idea to use a new "initialized" bit, which is conceptually
much cleaner.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-08-24 03:57:30 +08:00
|
|
|
o->result.initialized = 1;
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 06:59:54 +08:00
|
|
|
o->result.timestamp.sec = o->src_index->timestamp.sec;
|
|
|
|
o->result.timestamp.nsec = o->src_index->timestamp.nsec;
|
2006-07-31 02:25:18 +08:00
|
|
|
o->merge_size = len;
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 06:59:54 +08:00
|
|
|
mark_all_ce_unused(o->src_index);
|
2006-12-04 18:11:39 +08:00
|
|
|
|
|
|
|
if (!dfc)
|
correct cache_entry allocation
Most cache_entry structs are allocated by using the
cache_entry_size macro, which rounds the size of the struct
up to the nearest multiple of 8 bytes (presumably to avoid
memory fragmentation).
There is one exception: the special "conflict entry" is
allocated with an empty name, and so is explicitly given
just one extra byte to hold the NUL.
However, later code doesn't realize that this particular
struct has been allocated differently, and happily tries
reading and copying it based on the ce_size macro, which
assumes the 8-byte alignment.
This can lead to reading uninitalized data, though since
that data is simply padding, there shouldn't be any problem
as a result. Still, it makes sense to hold the padding
assumption so as not to surprise later maintainers.
This fixes valgrind errors in t1005, t3030, t4002, and
t4114.
Signed-off-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-10-23 12:30:58 +08:00
|
|
|
dfc = xcalloc(1, cache_entry_size(0));
|
2006-12-04 18:11:39 +08:00
|
|
|
o->df_conflict_entry = dfc;
|
2006-07-31 02:25:18 +08:00
|
|
|
|
|
|
|
if (len) {
|
2008-03-06 12:15:44 +08:00
|
|
|
const char *prefix = o->prefix ? o->prefix : "";
|
|
|
|
struct traverse_info info;
|
|
|
|
|
|
|
|
setup_traverse_info(&info, prefix);
|
|
|
|
info.fn = unpack_callback;
|
|
|
|
info.data = o;
|
2010-08-11 16:38:07 +08:00
|
|
|
info.show_all_errors = o->show_all_errors;
|
2008-03-06 12:15:44 +08:00
|
|
|
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 06:59:54 +08:00
|
|
|
if (o->prefix) {
|
|
|
|
/*
|
|
|
|
* Unpack existing index entries that sort before the
|
|
|
|
* prefix the tree is spliced into. Note that o->merge
|
|
|
|
* is always true in this case.
|
|
|
|
*/
|
|
|
|
while (1) {
|
|
|
|
struct cache_entry *ce = next_cache_entry(o);
|
|
|
|
if (!ce)
|
|
|
|
break;
|
|
|
|
if (ce_in_traverse_path(ce, &info))
|
|
|
|
break;
|
|
|
|
if (unpack_index_entry(ce, o) < 0)
|
|
|
|
goto return_failed;
|
|
|
|
}
|
2009-08-20 21:47:08 +08:00
|
|
|
}
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 06:59:54 +08:00
|
|
|
|
2008-03-06 12:15:44 +08:00
|
|
|
if (traverse_trees(len, t, &info) < 0)
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 06:59:54 +08:00
|
|
|
goto return_failed;
|
2006-07-31 02:25:18 +08:00
|
|
|
}
|
|
|
|
|
2008-03-06 12:15:44 +08:00
|
|
|
/* Any left-over entries in the index? */
|
|
|
|
if (o->merge) {
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 06:59:54 +08:00
|
|
|
while (1) {
|
|
|
|
struct cache_entry *ce = next_cache_entry(o);
|
|
|
|
if (!ce)
|
|
|
|
break;
|
2008-03-06 12:15:44 +08:00
|
|
|
if (unpack_index_entry(ce, o) < 0)
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 06:59:54 +08:00
|
|
|
goto return_failed;
|
2008-02-08 00:39:52 +08:00
|
|
|
}
|
|
|
|
}
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 06:59:54 +08:00
|
|
|
mark_all_ce_unused(o->src_index);
|
2006-07-31 02:25:18 +08:00
|
|
|
|
2009-08-20 21:47:08 +08:00
|
|
|
if (o->trivial_merges_only && o->nontrivial_merge) {
|
|
|
|
ret = unpack_failed(o, "Merge requires file-level merging");
|
|
|
|
goto done;
|
|
|
|
}
|
2008-03-06 12:15:44 +08:00
|
|
|
|
2009-08-20 21:47:09 +08:00
|
|
|
if (!o->skip_sparse_checkout) {
|
2009-08-20 21:47:13 +08:00
|
|
|
int empty_worktree = 1;
|
2009-08-20 21:47:09 +08:00
|
|
|
for (i = 0;i < o->result.cache_nr;i++) {
|
|
|
|
struct cache_entry *ce = o->result.cache[i];
|
|
|
|
|
|
|
|
if (apply_sparse_checkout(ce, o)) {
|
|
|
|
ret = -1;
|
|
|
|
goto done;
|
|
|
|
}
|
2010-07-31 14:14:26 +08:00
|
|
|
if (!ce_skip_worktree(ce))
|
2009-08-20 21:47:13 +08:00
|
|
|
empty_worktree = 0;
|
2009-08-20 21:47:10 +08:00
|
|
|
|
2009-08-20 21:47:09 +08:00
|
|
|
}
|
2009-08-20 21:47:13 +08:00
|
|
|
if (o->result.cache_nr && empty_worktree) {
|
|
|
|
ret = unpack_failed(o, "Sparse checkout leaves no entry on working directory");
|
|
|
|
goto done;
|
|
|
|
}
|
2009-08-20 21:47:09 +08:00
|
|
|
}
|
2008-03-06 12:15:44 +08:00
|
|
|
|
2008-03-07 10:12:28 +08:00
|
|
|
o->src_index = NULL;
|
2008-05-29 06:12:30 +08:00
|
|
|
ret = check_updates(o) ? (-2) : 0;
|
2008-03-07 10:12:28 +08:00
|
|
|
if (o->dst_index)
|
|
|
|
*o->dst_index = o->result;
|
2009-08-20 21:47:08 +08:00
|
|
|
|
|
|
|
done:
|
|
|
|
for (i = 0;i < el.nr;i++)
|
|
|
|
free(el.excludes[i]);
|
|
|
|
if (el.excludes)
|
|
|
|
free(el.excludes);
|
|
|
|
|
2008-05-29 06:12:30 +08:00
|
|
|
return ret;
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 06:59:54 +08:00
|
|
|
|
|
|
|
return_failed:
|
2010-08-11 16:38:07 +08:00
|
|
|
if (o->show_all_errors)
|
|
|
|
display_error_msgs(o);
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 06:59:54 +08:00
|
|
|
mark_all_ce_unused(o->src_index);
|
2010-01-25 09:35:58 +08:00
|
|
|
ret = unpack_failed(o, NULL);
|
|
|
|
goto done;
|
2006-07-31 02:25:18 +08:00
|
|
|
}
|
2006-07-31 02:26:15 +08:00
|
|
|
|
|
|
|
/* Here come the merge functions */
|
|
|
|
|
2008-05-18 03:03:49 +08:00
|
|
|
static int reject_merge(struct cache_entry *ce, struct unpack_trees_options *o)
|
2006-07-31 02:26:15 +08:00
|
|
|
{
|
2010-08-11 16:38:07 +08:00
|
|
|
return add_rejected_path(o, ERROR_WOULD_OVERWRITE, ce->name);
|
2006-07-31 02:26:15 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int same(struct cache_entry *a, struct cache_entry *b)
|
|
|
|
{
|
|
|
|
if (!!a != !!b)
|
|
|
|
return 0;
|
|
|
|
if (!a && !b)
|
|
|
|
return 1;
|
"reset --merge": fix unmerged case
Commit 9e8ecea (Add 'merge' mode to 'git reset', 2008-12-01) disallowed
"git reset --merge" when there was unmerged entries. But it wished if
unmerged entries were reset as if --hard (instead of --merge) has been
used. This makes sense because all "mergy" operations makes sure that
any path involved in the merge does not have local modifications before
starting, so resetting such a path away won't lose any information.
The previous commit changed the behavior of --merge to accept resetting
unmerged entries if they are reset to a different state than HEAD, but it
did not reset the changes in the work tree, leaving the conflict markers
in the resulting file in the work tree.
Fix it by doing three things:
- Update the documentation to match the wish of original "reset --merge"
better, namely, "An unmerged entry is a sign that the path didn't have
any local modification and can be safely resetted to whatever the new
HEAD records";
- Update read_index_unmerged(), which reads the index file into the cache
while dropping any higher-stage entries down to stage #0, not to copy
the object name from the higher stage entry. The code used to take the
object name from the a stage entry ("base" if you happened to have
stage #1, or "ours" if both sides added, etc.), which essentially meant
that you are getting random results depending on what the merge did.
The _only_ reason we want to keep a previously unmerged entry in the
index at stage #0 is so that we don't forget the fact that we have
corresponding file in the work tree in order to be able to remove it
when the tree we are resetting to does not have the path. In order to
differentiate such an entry from ordinary cache entry, the cache entry
added by read_index_unmerged() is marked as CE_CONFLICTED.
- Update merged_entry() and deleted_entry() so that they pay attention to
cache entries marked as CE_CONFLICTED. They are previously unmerged
entries, and the files in the work tree that correspond to them are
resetted away by oneway_merge() to the version from the tree we are
resetting to.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-01 15:04:04 +08:00
|
|
|
if ((a->ce_flags | b->ce_flags) & CE_CONFLICTED)
|
|
|
|
return 0;
|
2006-07-31 02:26:15 +08:00
|
|
|
return a->ce_mode == b->ce_mode &&
|
2006-08-18 02:54:57 +08:00
|
|
|
!hashcmp(a->sha1, b->sha1);
|
2006-07-31 02:26:15 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* When a CE gets turned into an unmerged entry, we
|
|
|
|
* want it to be up-to-date
|
|
|
|
*/
|
2009-08-20 21:47:07 +08:00
|
|
|
static int verify_uptodate_1(struct cache_entry *ce,
|
|
|
|
struct unpack_trees_options *o,
|
2010-08-11 16:38:06 +08:00
|
|
|
enum unpack_trees_error_types error_type)
|
2006-07-31 02:26:15 +08:00
|
|
|
{
|
|
|
|
struct stat st;
|
|
|
|
|
2010-05-01 17:25:12 +08:00
|
|
|
if (o->index_only || (!((ce->ce_flags & CE_VALID) || ce_skip_worktree(ce)) && (o->reset || ce_uptodate(ce))))
|
2008-02-08 00:39:48 +08:00
|
|
|
return 0;
|
2006-07-31 02:26:15 +08:00
|
|
|
|
|
|
|
if (!lstat(ce->name, &st)) {
|
2009-12-14 19:43:58 +08:00
|
|
|
unsigned changed = ie_match_stat(o->src_index, ce, &st, CE_MATCH_IGNORE_VALID|CE_MATCH_IGNORE_SKIP_WORKTREE);
|
2006-07-31 02:26:15 +08:00
|
|
|
if (!changed)
|
2008-02-08 00:39:48 +08:00
|
|
|
return 0;
|
unpack-trees.c: assume submodules are clean during check-out
Sven originally raised this issue:
If you have a submodule checked out and you go back (or
forward) to a revision of the supermodule that contains a
different revision of the submodule and then switch to
another revision, it will complain that the submodule is not
uptodate, because git simply didn't update the submodule in
the first move.
The current policy is to consider it is perfectly normal that
checked-out submodule is out-of-sync wrt the supermodule index.
At least until we introduce a superproject repository
configuration option that says "in this repository, I do care
about this submodule and at any time I move around in the
superproject, recursively check out the submodule to match", it
is a reasonable policy, as we currently do not recursively
checkout the submodules at all. The most extreme case of this
policy is that the superproject index knows about the submodule
but the subdirectory does not even have to be checked out.
The function verify_uptodate(), called during the two-way merge
aka branch switching, is about "make sure the filesystem entity
that corresponds to this cache entry is up to date, lest we lose
the local modifications". As we explicitly allow submodule
checkout to drift from the supermodule index entry, the check
should say "Ok, for submodules, not matching is the norm" for
now.
Later when we have the ability to mark "I care about this
submodule to be always in sync with the superproject" (thereby
implementing automatic recursive checkout and perhaps diff,
among other things), we should check if the submodule in
question is marked as such and perform the current test.
Acked-by: Lars Hjemli <hjemli@gmail.com>
Acked-by: Sven Verdoolaege <skimo@kotnet.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2007-08-04 13:13:09 +08:00
|
|
|
/*
|
|
|
|
* NEEDSWORK: the current default policy is to allow
|
|
|
|
* submodule to be out of sync wrt the supermodule
|
|
|
|
* index. This needs to be tightened later for
|
|
|
|
* submodules that are marked to be automatically
|
|
|
|
* checked out.
|
|
|
|
*/
|
2008-01-15 08:03:17 +08:00
|
|
|
if (S_ISGITLINK(ce->ce_mode))
|
2008-02-08 00:39:48 +08:00
|
|
|
return 0;
|
2006-07-31 02:26:15 +08:00
|
|
|
errno = 0;
|
|
|
|
}
|
|
|
|
if (errno == ENOENT)
|
2008-02-08 00:39:48 +08:00
|
|
|
return 0;
|
2008-02-08 00:39:52 +08:00
|
|
|
return o->gently ? -1 :
|
2010-08-11 16:38:07 +08:00
|
|
|
add_rejected_path(o, error_type, ce->name);
|
2009-08-20 21:47:07 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int verify_uptodate(struct cache_entry *ce,
|
|
|
|
struct unpack_trees_options *o)
|
|
|
|
{
|
2009-08-20 21:47:10 +08:00
|
|
|
if (!o->skip_sparse_checkout && will_have_skip_worktree(ce, o))
|
|
|
|
return 0;
|
2010-08-11 16:38:06 +08:00
|
|
|
return verify_uptodate_1(ce, o, ERROR_NOT_UPTODATE_FILE);
|
2006-07-31 02:26:15 +08:00
|
|
|
}
|
|
|
|
|
2009-08-20 21:47:09 +08:00
|
|
|
static int verify_uptodate_sparse(struct cache_entry *ce,
|
|
|
|
struct unpack_trees_options *o)
|
|
|
|
{
|
2010-08-11 16:38:06 +08:00
|
|
|
return verify_uptodate_1(ce, o, ERROR_SPARSE_NOT_UPTODATE_FILE);
|
2006-07-31 02:26:15 +08:00
|
|
|
}
|
|
|
|
|
2008-03-07 04:26:14 +08:00
|
|
|
static void invalidate_ce_path(struct cache_entry *ce, struct unpack_trees_options *o)
|
2006-07-31 02:26:15 +08:00
|
|
|
{
|
|
|
|
if (ce)
|
2008-03-07 10:12:28 +08:00
|
|
|
cache_tree_invalidate_path(o->src_index->cache_tree, ce->name);
|
2006-07-31 02:26:15 +08:00
|
|
|
}
|
|
|
|
|
2007-07-18 02:28:28 +08:00
|
|
|
/*
|
|
|
|
* Check that checking out ce->sha1 in subdir ce->name is not
|
|
|
|
* going to overwrite any working files.
|
|
|
|
*
|
|
|
|
* Currently, git does not checkout subprojects during a superproject
|
|
|
|
* checkout, so it is not going to overwrite anything.
|
|
|
|
*/
|
2010-08-11 16:38:06 +08:00
|
|
|
static int verify_clean_submodule(struct cache_entry *ce,
|
|
|
|
enum unpack_trees_error_types error_type,
|
2007-07-18 02:28:28 +08:00
|
|
|
struct unpack_trees_options *o)
|
|
|
|
{
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2010-08-11 16:38:06 +08:00
|
|
|
static int verify_clean_subdirectory(struct cache_entry *ce,
|
|
|
|
enum unpack_trees_error_types error_type,
|
2007-03-16 14:25:22 +08:00
|
|
|
struct unpack_trees_options *o)
|
|
|
|
{
|
|
|
|
/*
|
2007-07-18 02:28:28 +08:00
|
|
|
* we are about to extract "ce->name"; we would not want to lose
|
2007-03-16 14:25:22 +08:00
|
|
|
* anything in the existing directory there.
|
|
|
|
*/
|
|
|
|
int namelen;
|
2009-01-02 04:54:33 +08:00
|
|
|
int i;
|
2007-03-16 14:25:22 +08:00
|
|
|
struct dir_struct d;
|
|
|
|
char *pathbuf;
|
|
|
|
int cnt = 0;
|
2007-07-18 02:28:28 +08:00
|
|
|
unsigned char sha1[20];
|
|
|
|
|
2008-01-15 08:03:17 +08:00
|
|
|
if (S_ISGITLINK(ce->ce_mode) &&
|
2007-07-18 02:28:28 +08:00
|
|
|
resolve_gitlink_ref(ce->name, "HEAD", sha1) == 0) {
|
|
|
|
/* If we are not going to update the submodule, then
|
|
|
|
* we don't care.
|
|
|
|
*/
|
|
|
|
if (!hashcmp(sha1, ce->sha1))
|
|
|
|
return 0;
|
2010-08-11 16:38:06 +08:00
|
|
|
return verify_clean_submodule(ce, error_type, o);
|
2007-07-18 02:28:28 +08:00
|
|
|
}
|
2007-03-16 14:25:22 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* First let's make sure we do not have a local modification
|
|
|
|
* in that directory.
|
|
|
|
*/
|
2007-07-18 02:28:28 +08:00
|
|
|
namelen = strlen(ce->name);
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 06:59:54 +08:00
|
|
|
for (i = locate_in_src_index(ce, o);
|
|
|
|
i < o->src_index->cache_nr;
|
|
|
|
i++) {
|
2009-01-02 04:54:32 +08:00
|
|
|
struct cache_entry *ce2 = o->src_index->cache[i];
|
|
|
|
int len = ce_namelen(ce2);
|
2007-03-16 14:25:22 +08:00
|
|
|
if (len < namelen ||
|
2009-01-02 04:54:32 +08:00
|
|
|
strncmp(ce->name, ce2->name, namelen) ||
|
|
|
|
ce2->name[namelen] != '/')
|
2007-03-16 14:25:22 +08:00
|
|
|
break;
|
|
|
|
/*
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 06:59:54 +08:00
|
|
|
* ce2->name is an entry in the subdirectory to be
|
|
|
|
* removed.
|
2007-03-16 14:25:22 +08:00
|
|
|
*/
|
2009-01-02 04:54:32 +08:00
|
|
|
if (!ce_stage(ce2)) {
|
|
|
|
if (verify_uptodate(ce2, o))
|
2008-02-08 00:39:48 +08:00
|
|
|
return -1;
|
2009-01-02 04:54:32 +08:00
|
|
|
add_entry(o, ce2, CE_REMOVE, 0);
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 06:59:54 +08:00
|
|
|
mark_ce_used(ce2, o);
|
2007-03-16 14:25:22 +08:00
|
|
|
}
|
|
|
|
cnt++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Then we need to make sure that we do not lose a locally
|
|
|
|
* present file that is not ignored.
|
|
|
|
*/
|
|
|
|
pathbuf = xmalloc(namelen + 2);
|
2007-07-18 02:28:28 +08:00
|
|
|
memcpy(pathbuf, ce->name, namelen);
|
2007-03-16 14:25:22 +08:00
|
|
|
strcpy(pathbuf+namelen, "/");
|
|
|
|
|
|
|
|
memset(&d, 0, sizeof(d));
|
|
|
|
if (o->dir)
|
|
|
|
d.exclude_per_dir = o->dir->exclude_per_dir;
|
2009-07-09 10:24:39 +08:00
|
|
|
i = read_directory(&d, pathbuf, namelen+1, NULL);
|
2007-03-16 14:25:22 +08:00
|
|
|
if (i)
|
2008-02-08 00:39:52 +08:00
|
|
|
return o->gently ? -1 :
|
2010-08-11 16:38:07 +08:00
|
|
|
add_rejected_path(o, ERROR_NOT_UPTODATE_DIR, ce->name);
|
2007-03-16 14:25:22 +08:00
|
|
|
free(pathbuf);
|
|
|
|
return cnt;
|
|
|
|
}
|
|
|
|
|
2008-03-23 00:35:59 +08:00
|
|
|
/*
|
|
|
|
* This gets called when there was no index entry for the tree entry 'dst',
|
|
|
|
* but we found a file in the working tree that 'lstat()' said was fine,
|
|
|
|
* and we're on a case-insensitive filesystem.
|
|
|
|
*
|
|
|
|
* See if we can find a case-insensitive match in the index that also
|
|
|
|
* matches the stat information, and assume it's that other file!
|
|
|
|
*/
|
|
|
|
static int icase_exists(struct unpack_trees_options *o, struct cache_entry *dst, struct stat *st)
|
|
|
|
{
|
|
|
|
struct cache_entry *src;
|
|
|
|
|
|
|
|
src = index_name_exists(o->src_index, dst->name, ce_namelen(dst), 1);
|
2009-12-14 19:43:58 +08:00
|
|
|
return src && !ie_match_stat(o->src_index, src, st, CE_MATCH_IGNORE_VALID|CE_MATCH_IGNORE_SKIP_WORKTREE);
|
2008-03-23 00:35:59 +08:00
|
|
|
}
|
|
|
|
|
2006-07-31 02:26:15 +08:00
|
|
|
/*
|
|
|
|
* We do not want to remove or overwrite a working tree file that
|
2006-12-05 08:00:46 +08:00
|
|
|
* is not tracked, unless it is ignored.
|
2006-07-31 02:26:15 +08:00
|
|
|
*/
|
2010-08-11 16:38:06 +08:00
|
|
|
static int verify_absent_1(struct cache_entry *ce,
|
|
|
|
enum unpack_trees_error_types error_type,
|
|
|
|
struct unpack_trees_options *o)
|
2006-07-31 02:26:15 +08:00
|
|
|
{
|
|
|
|
struct stat st;
|
|
|
|
|
|
|
|
if (o->index_only || o->reset || !o->update)
|
2008-02-08 00:39:48 +08:00
|
|
|
return 0;
|
2007-03-16 14:25:22 +08:00
|
|
|
|
2009-02-10 04:54:06 +08:00
|
|
|
if (has_symlink_or_noent_leading_path(ce->name, ce_namelen(ce)))
|
2008-02-08 00:39:48 +08:00
|
|
|
return 0;
|
2007-07-12 16:04:16 +08:00
|
|
|
|
2007-07-18 02:28:28 +08:00
|
|
|
if (!lstat(ce->name, &st)) {
|
2008-02-01 12:23:25 +08:00
|
|
|
int dtype = ce_to_dtype(ce);
|
2008-03-22 06:53:00 +08:00
|
|
|
struct cache_entry *result;
|
2007-03-16 14:25:22 +08:00
|
|
|
|
2008-03-23 00:35:59 +08:00
|
|
|
/*
|
|
|
|
* It may be that the 'lstat()' succeeded even though
|
|
|
|
* target 'ce' was absent, because there is an old
|
|
|
|
* entry that is different only in case..
|
|
|
|
*
|
|
|
|
* Ignore that lstat() if it matches.
|
|
|
|
*/
|
|
|
|
if (ignore_case && icase_exists(o, ce, &st))
|
|
|
|
return 0;
|
|
|
|
|
2008-02-01 12:23:25 +08:00
|
|
|
if (o->dir && excluded(o->dir, ce->name, &dtype))
|
2007-03-16 14:25:22 +08:00
|
|
|
/*
|
2007-07-18 02:28:28 +08:00
|
|
|
* ce->name is explicitly excluded, so it is Ok to
|
2007-03-16 14:25:22 +08:00
|
|
|
* overwrite it.
|
|
|
|
*/
|
2008-02-08 00:39:48 +08:00
|
|
|
return 0;
|
2007-03-16 14:25:22 +08:00
|
|
|
if (S_ISDIR(st.st_mode)) {
|
|
|
|
/*
|
|
|
|
* We are checking out path "foo" and
|
|
|
|
* found "foo/." in the working tree.
|
|
|
|
* This is tricky -- if we have modified
|
|
|
|
* files that are in "foo/" we would lose
|
2009-09-20 08:26:16 +08:00
|
|
|
* them.
|
2007-03-16 14:25:22 +08:00
|
|
|
*/
|
2010-08-11 16:38:06 +08:00
|
|
|
if (verify_clean_subdirectory(ce, error_type, o) < 0)
|
unpack-trees.c: prepare for looking ahead in the index
This prepares but does not yet implement a look-ahead in the index entries
when traverse-trees.c decides to give us tree entries in an order that
does not match what is in the index.
A case where a look-ahead in the index is necessary happens when merging
branch B into branch A while the index matches the current branch A, using
a tree O as their common ancestor, and these three trees looks like this:
O A B
t t
t-i t-i t-i
t-j t-j
t/1
t/2
The traverse_trees() function gets "t", "t-i" and "t" from trees O, A and
B first, and notices that A may have a matching "t" behind "t-i" and "t-j"
(indeed it does), and tells A to give that entry instead. After unpacking
blob "t" from tree B (as it hasn't changed since O in B and A removed it,
it will result in its removal), it descends into directory "t/".
The side that walked index in parallel to the tree traversal used to be
implemented with one pointer, o->pos, that points at the next index entry
to be processed. When this happens, the pointer o->pos still points at
"t-i" that is the first entry. We should be able to skip "t-i" and "t-j"
and locate "t/1" from the index while the recursive invocation of
traverse_trees() walks and match entries found there, and later come back
to process "t-i".
While that look-ahead is not implemented yet, this adds a flag bit,
CE_UNPACKED, to mark the entries in the index that has already been
processed. o->pos pointer has been renamed to o->cache_bottom and it
points at the first entry that may still need to be processed.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-08 06:59:54 +08:00
|
|
|
return -1;
|
2008-02-08 00:39:48 +08:00
|
|
|
return 0;
|
2007-03-16 14:25:22 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The previous round may already have decided to
|
|
|
|
* delete this path, which is in a subdirectory that
|
|
|
|
* is being replaced with a blob.
|
|
|
|
*/
|
2008-03-22 06:55:19 +08:00
|
|
|
result = index_name_exists(&o->result, ce->name, ce_namelen(ce), 0);
|
2008-03-22 06:53:00 +08:00
|
|
|
if (result) {
|
|
|
|
if (result->ce_flags & CE_REMOVE)
|
2008-02-08 00:39:48 +08:00
|
|
|
return 0;
|
2007-03-16 14:25:22 +08:00
|
|
|
}
|
|
|
|
|
2008-02-08 00:39:52 +08:00
|
|
|
return o->gently ? -1 :
|
2010-08-11 16:38:07 +08:00
|
|
|
add_rejected_path(o, error_type, ce->name);
|
2007-03-16 14:25:22 +08:00
|
|
|
}
|
2008-02-08 00:39:48 +08:00
|
|
|
return 0;
|
2006-07-31 02:26:15 +08:00
|
|
|
}
|
2010-08-11 16:38:06 +08:00
|
|
|
static int verify_absent(struct cache_entry *ce,
|
|
|
|
enum unpack_trees_error_types error_type,
|
2009-08-20 21:47:07 +08:00
|
|
|
struct unpack_trees_options *o)
|
|
|
|
{
|
2009-08-20 21:47:10 +08:00
|
|
|
if (!o->skip_sparse_checkout && will_have_skip_worktree(ce, o))
|
|
|
|
return 0;
|
2010-08-11 16:38:06 +08:00
|
|
|
return verify_absent_1(ce, error_type, o);
|
2009-08-20 21:47:07 +08:00
|
|
|
}
|
2006-07-31 02:26:15 +08:00
|
|
|
|
2010-08-11 16:38:06 +08:00
|
|
|
static int verify_absent_sparse(struct cache_entry *ce,
|
|
|
|
enum unpack_trees_error_types error_type,
|
2009-08-20 21:47:09 +08:00
|
|
|
struct unpack_trees_options *o)
|
|
|
|
{
|
2010-08-11 16:38:06 +08:00
|
|
|
enum unpack_trees_error_types orphaned_error = error_type;
|
|
|
|
if (orphaned_error == ERROR_WOULD_LOSE_UNTRACKED_OVERWRITTEN)
|
|
|
|
orphaned_error = ERROR_WOULD_LOSE_ORPHANED_OVERWRITTEN;
|
|
|
|
|
|
|
|
return verify_absent_1(ce, orphaned_error, o);
|
2009-08-20 21:47:09 +08:00
|
|
|
}
|
2006-07-31 02:26:15 +08:00
|
|
|
|
|
|
|
static int merged_entry(struct cache_entry *merge, struct cache_entry *old,
|
|
|
|
struct unpack_trees_options *o)
|
|
|
|
{
|
2008-03-17 02:42:50 +08:00
|
|
|
int update = CE_UPDATE;
|
|
|
|
|
"reset --merge": fix unmerged case
Commit 9e8ecea (Add 'merge' mode to 'git reset', 2008-12-01) disallowed
"git reset --merge" when there was unmerged entries. But it wished if
unmerged entries were reset as if --hard (instead of --merge) has been
used. This makes sense because all "mergy" operations makes sure that
any path involved in the merge does not have local modifications before
starting, so resetting such a path away won't lose any information.
The previous commit changed the behavior of --merge to accept resetting
unmerged entries if they are reset to a different state than HEAD, but it
did not reset the changes in the work tree, leaving the conflict markers
in the resulting file in the work tree.
Fix it by doing three things:
- Update the documentation to match the wish of original "reset --merge"
better, namely, "An unmerged entry is a sign that the path didn't have
any local modification and can be safely resetted to whatever the new
HEAD records";
- Update read_index_unmerged(), which reads the index file into the cache
while dropping any higher-stage entries down to stage #0, not to copy
the object name from the higher stage entry. The code used to take the
object name from the a stage entry ("base" if you happened to have
stage #1, or "ours" if both sides added, etc.), which essentially meant
that you are getting random results depending on what the merge did.
The _only_ reason we want to keep a previously unmerged entry in the
index at stage #0 is so that we don't forget the fact that we have
corresponding file in the work tree in order to be able to remove it
when the tree we are resetting to does not have the path. In order to
differentiate such an entry from ordinary cache entry, the cache entry
added by read_index_unmerged() is marked as CE_CONFLICTED.
- Update merged_entry() and deleted_entry() so that they pay attention to
cache entries marked as CE_CONFLICTED. They are previously unmerged
entries, and the files in the work tree that correspond to them are
resetted away by oneway_merge() to the version from the tree we are
resetting to.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-01 15:04:04 +08:00
|
|
|
if (!old) {
|
2010-08-11 16:38:06 +08:00
|
|
|
if (verify_absent(merge, ERROR_WOULD_LOSE_UNTRACKED_OVERWRITTEN, o))
|
"reset --merge": fix unmerged case
Commit 9e8ecea (Add 'merge' mode to 'git reset', 2008-12-01) disallowed
"git reset --merge" when there was unmerged entries. But it wished if
unmerged entries were reset as if --hard (instead of --merge) has been
used. This makes sense because all "mergy" operations makes sure that
any path involved in the merge does not have local modifications before
starting, so resetting such a path away won't lose any information.
The previous commit changed the behavior of --merge to accept resetting
unmerged entries if they are reset to a different state than HEAD, but it
did not reset the changes in the work tree, leaving the conflict markers
in the resulting file in the work tree.
Fix it by doing three things:
- Update the documentation to match the wish of original "reset --merge"
better, namely, "An unmerged entry is a sign that the path didn't have
any local modification and can be safely resetted to whatever the new
HEAD records";
- Update read_index_unmerged(), which reads the index file into the cache
while dropping any higher-stage entries down to stage #0, not to copy
the object name from the higher stage entry. The code used to take the
object name from the a stage entry ("base" if you happened to have
stage #1, or "ours" if both sides added, etc.), which essentially meant
that you are getting random results depending on what the merge did.
The _only_ reason we want to keep a previously unmerged entry in the
index at stage #0 is so that we don't forget the fact that we have
corresponding file in the work tree in order to be able to remove it
when the tree we are resetting to does not have the path. In order to
differentiate such an entry from ordinary cache entry, the cache entry
added by read_index_unmerged() is marked as CE_CONFLICTED.
- Update merged_entry() and deleted_entry() so that they pay attention to
cache entries marked as CE_CONFLICTED. They are previously unmerged
entries, and the files in the work tree that correspond to them are
resetted away by oneway_merge() to the version from the tree we are
resetting to.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-01 15:04:04 +08:00
|
|
|
return -1;
|
2010-07-31 14:14:29 +08:00
|
|
|
if (!o->skip_sparse_checkout && will_have_skip_worktree(merge, o))
|
|
|
|
update |= CE_SKIP_WORKTREE;
|
"reset --merge": fix unmerged case
Commit 9e8ecea (Add 'merge' mode to 'git reset', 2008-12-01) disallowed
"git reset --merge" when there was unmerged entries. But it wished if
unmerged entries were reset as if --hard (instead of --merge) has been
used. This makes sense because all "mergy" operations makes sure that
any path involved in the merge does not have local modifications before
starting, so resetting such a path away won't lose any information.
The previous commit changed the behavior of --merge to accept resetting
unmerged entries if they are reset to a different state than HEAD, but it
did not reset the changes in the work tree, leaving the conflict markers
in the resulting file in the work tree.
Fix it by doing three things:
- Update the documentation to match the wish of original "reset --merge"
better, namely, "An unmerged entry is a sign that the path didn't have
any local modification and can be safely resetted to whatever the new
HEAD records";
- Update read_index_unmerged(), which reads the index file into the cache
while dropping any higher-stage entries down to stage #0, not to copy
the object name from the higher stage entry. The code used to take the
object name from the a stage entry ("base" if you happened to have
stage #1, or "ours" if both sides added, etc.), which essentially meant
that you are getting random results depending on what the merge did.
The _only_ reason we want to keep a previously unmerged entry in the
index at stage #0 is so that we don't forget the fact that we have
corresponding file in the work tree in order to be able to remove it
when the tree we are resetting to does not have the path. In order to
differentiate such an entry from ordinary cache entry, the cache entry
added by read_index_unmerged() is marked as CE_CONFLICTED.
- Update merged_entry() and deleted_entry() so that they pay attention to
cache entries marked as CE_CONFLICTED. They are previously unmerged
entries, and the files in the work tree that correspond to them are
resetted away by oneway_merge() to the version from the tree we are
resetting to.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-01 15:04:04 +08:00
|
|
|
invalidate_ce_path(merge, o);
|
|
|
|
} else if (!(old->ce_flags & CE_CONFLICTED)) {
|
2006-07-31 02:26:15 +08:00
|
|
|
/*
|
|
|
|
* See if we can re-use the old CE directly?
|
|
|
|
* That way we get the uptodate stat info.
|
|
|
|
*
|
2008-03-17 02:42:50 +08:00
|
|
|
* This also removes the UPDATE flag on a match; otherwise
|
|
|
|
* we will end up overwriting local changes in the work tree.
|
2006-07-31 02:26:15 +08:00
|
|
|
*/
|
|
|
|
if (same(old, merge)) {
|
2008-02-23 12:41:17 +08:00
|
|
|
copy_cache_entry(merge, old);
|
2008-03-17 02:42:50 +08:00
|
|
|
update = 0;
|
2006-07-31 02:26:15 +08:00
|
|
|
} else {
|
2008-02-08 00:39:48 +08:00
|
|
|
if (verify_uptodate(old, o))
|
|
|
|
return -1;
|
2009-08-20 21:47:02 +08:00
|
|
|
if (ce_skip_worktree(old))
|
|
|
|
update |= CE_SKIP_WORKTREE;
|
2008-03-07 04:26:14 +08:00
|
|
|
invalidate_ce_path(old, o);
|
2006-07-31 02:26:15 +08:00
|
|
|
}
|
"reset --merge": fix unmerged case
Commit 9e8ecea (Add 'merge' mode to 'git reset', 2008-12-01) disallowed
"git reset --merge" when there was unmerged entries. But it wished if
unmerged entries were reset as if --hard (instead of --merge) has been
used. This makes sense because all "mergy" operations makes sure that
any path involved in the merge does not have local modifications before
starting, so resetting such a path away won't lose any information.
The previous commit changed the behavior of --merge to accept resetting
unmerged entries if they are reset to a different state than HEAD, but it
did not reset the changes in the work tree, leaving the conflict markers
in the resulting file in the work tree.
Fix it by doing three things:
- Update the documentation to match the wish of original "reset --merge"
better, namely, "An unmerged entry is a sign that the path didn't have
any local modification and can be safely resetted to whatever the new
HEAD records";
- Update read_index_unmerged(), which reads the index file into the cache
while dropping any higher-stage entries down to stage #0, not to copy
the object name from the higher stage entry. The code used to take the
object name from the a stage entry ("base" if you happened to have
stage #1, or "ours" if both sides added, etc.), which essentially meant
that you are getting random results depending on what the merge did.
The _only_ reason we want to keep a previously unmerged entry in the
index at stage #0 is so that we don't forget the fact that we have
corresponding file in the work tree in order to be able to remove it
when the tree we are resetting to does not have the path. In order to
differentiate such an entry from ordinary cache entry, the cache entry
added by read_index_unmerged() is marked as CE_CONFLICTED.
- Update merged_entry() and deleted_entry() so that they pay attention to
cache entries marked as CE_CONFLICTED. They are previously unmerged
entries, and the files in the work tree that correspond to them are
resetted away by oneway_merge() to the version from the tree we are
resetting to.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-01 15:04:04 +08:00
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Previously unmerged entry left as an existence
|
|
|
|
* marker by read_index_unmerged();
|
|
|
|
*/
|
|
|
|
invalidate_ce_path(old, o);
|
2006-07-31 02:26:15 +08:00
|
|
|
}
|
|
|
|
|
2008-03-17 02:42:50 +08:00
|
|
|
add_entry(o, merge, update, CE_STAGEMASK);
|
2006-07-31 02:26:15 +08:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int deleted_entry(struct cache_entry *ce, struct cache_entry *old,
|
|
|
|
struct unpack_trees_options *o)
|
|
|
|
{
|
2008-03-07 10:12:28 +08:00
|
|
|
/* Did it exist in the index? */
|
|
|
|
if (!old) {
|
2010-08-11 16:38:06 +08:00
|
|
|
if (verify_absent(ce, ERROR_WOULD_LOSE_UNTRACKED_REMOVED, o))
|
2008-02-08 00:39:48 +08:00
|
|
|
return -1;
|
2008-03-07 10:12:28 +08:00
|
|
|
return 0;
|
|
|
|
}
|
"reset --merge": fix unmerged case
Commit 9e8ecea (Add 'merge' mode to 'git reset', 2008-12-01) disallowed
"git reset --merge" when there was unmerged entries. But it wished if
unmerged entries were reset as if --hard (instead of --merge) has been
used. This makes sense because all "mergy" operations makes sure that
any path involved in the merge does not have local modifications before
starting, so resetting such a path away won't lose any information.
The previous commit changed the behavior of --merge to accept resetting
unmerged entries if they are reset to a different state than HEAD, but it
did not reset the changes in the work tree, leaving the conflict markers
in the resulting file in the work tree.
Fix it by doing three things:
- Update the documentation to match the wish of original "reset --merge"
better, namely, "An unmerged entry is a sign that the path didn't have
any local modification and can be safely resetted to whatever the new
HEAD records";
- Update read_index_unmerged(), which reads the index file into the cache
while dropping any higher-stage entries down to stage #0, not to copy
the object name from the higher stage entry. The code used to take the
object name from the a stage entry ("base" if you happened to have
stage #1, or "ours" if both sides added, etc.), which essentially meant
that you are getting random results depending on what the merge did.
The _only_ reason we want to keep a previously unmerged entry in the
index at stage #0 is so that we don't forget the fact that we have
corresponding file in the work tree in order to be able to remove it
when the tree we are resetting to does not have the path. In order to
differentiate such an entry from ordinary cache entry, the cache entry
added by read_index_unmerged() is marked as CE_CONFLICTED.
- Update merged_entry() and deleted_entry() so that they pay attention to
cache entries marked as CE_CONFLICTED. They are previously unmerged
entries, and the files in the work tree that correspond to them are
resetted away by oneway_merge() to the version from the tree we are
resetting to.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2010-01-01 15:04:04 +08:00
|
|
|
if (!(old->ce_flags & CE_CONFLICTED) && verify_uptodate(old, o))
|
2008-03-07 10:12:28 +08:00
|
|
|
return -1;
|
|
|
|
add_entry(o, ce, CE_REMOVE, 0);
|
2008-03-07 04:26:14 +08:00
|
|
|
invalidate_ce_path(ce, o);
|
2006-07-31 02:26:15 +08:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2007-04-02 15:06:12 +08:00
|
|
|
static int keep_entry(struct cache_entry *ce, struct unpack_trees_options *o)
|
2006-07-31 02:26:15 +08:00
|
|
|
{
|
2008-03-07 10:12:28 +08:00
|
|
|
add_entry(o, ce, 0, 0);
|
2006-07-31 02:26:15 +08:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
#if DBRT_DEBUG
|
|
|
|
static void show_stage_entry(FILE *o,
|
|
|
|
const char *label, const struct cache_entry *ce)
|
|
|
|
{
|
|
|
|
if (!ce)
|
|
|
|
fprintf(o, "%s (missing)\n", label);
|
|
|
|
else
|
|
|
|
fprintf(o, "%s%06o %s %d\t%s\n",
|
|
|
|
label,
|
2008-01-15 08:03:17 +08:00
|
|
|
ce->ce_mode,
|
2006-07-31 02:26:15 +08:00
|
|
|
sha1_to_hex(ce->sha1),
|
|
|
|
ce_stage(ce),
|
|
|
|
ce->name);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2008-03-07 10:12:28 +08:00
|
|
|
int threeway_merge(struct cache_entry **stages, struct unpack_trees_options *o)
|
2006-07-31 02:26:15 +08:00
|
|
|
{
|
|
|
|
struct cache_entry *index;
|
|
|
|
struct cache_entry *head;
|
|
|
|
struct cache_entry *remote = stages[o->head_idx + 1];
|
|
|
|
int count;
|
|
|
|
int head_match = 0;
|
|
|
|
int remote_match = 0;
|
|
|
|
|
|
|
|
int df_conflict_head = 0;
|
|
|
|
int df_conflict_remote = 0;
|
|
|
|
|
|
|
|
int any_anc_missing = 0;
|
|
|
|
int no_anc_exists = 1;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 1; i < o->head_idx; i++) {
|
2007-04-07 20:49:19 +08:00
|
|
|
if (!stages[i] || stages[i] == o->df_conflict_entry)
|
2006-07-31 02:26:15 +08:00
|
|
|
any_anc_missing = 1;
|
2007-04-07 20:49:19 +08:00
|
|
|
else
|
2006-07-31 02:26:15 +08:00
|
|
|
no_anc_exists = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
index = stages[0];
|
|
|
|
head = stages[o->head_idx];
|
|
|
|
|
|
|
|
if (head == o->df_conflict_entry) {
|
|
|
|
df_conflict_head = 1;
|
|
|
|
head = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (remote == o->df_conflict_entry) {
|
|
|
|
df_conflict_remote = 1;
|
|
|
|
remote = NULL;
|
|
|
|
}
|
|
|
|
|
2009-10-12 04:38:11 +08:00
|
|
|
/*
|
|
|
|
* First, if there's a #16 situation, note that to prevent #13
|
2006-07-31 02:26:15 +08:00
|
|
|
* and #14.
|
|
|
|
*/
|
|
|
|
if (!same(remote, head)) {
|
|
|
|
for (i = 1; i < o->head_idx; i++) {
|
|
|
|
if (same(stages[i], head)) {
|
|
|
|
head_match = i;
|
|
|
|
}
|
|
|
|
if (same(stages[i], remote)) {
|
|
|
|
remote_match = i;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-10-12 04:38:11 +08:00
|
|
|
/*
|
|
|
|
* We start with cases where the index is allowed to match
|
2006-07-31 02:26:15 +08:00
|
|
|
* something other than the head: #14(ALT) and #2ALT, where it
|
|
|
|
* is permitted to match the result instead.
|
|
|
|
*/
|
|
|
|
/* #14, #14ALT, #2ALT */
|
|
|
|
if (remote && !df_conflict_head && head_match && !remote_match) {
|
|
|
|
if (index && !same(index, remote) && !same(index, head))
|
2008-05-18 03:03:49 +08:00
|
|
|
return o->gently ? -1 : reject_merge(index, o);
|
2006-07-31 02:26:15 +08:00
|
|
|
return merged_entry(remote, index, o);
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* If we have an entry in the index cache, then we want to
|
|
|
|
* make sure that it matches head.
|
|
|
|
*/
|
2008-02-08 00:40:02 +08:00
|
|
|
if (index && !same(index, head))
|
2008-05-18 03:03:49 +08:00
|
|
|
return o->gently ? -1 : reject_merge(index, o);
|
2006-07-31 02:26:15 +08:00
|
|
|
|
|
|
|
if (head) {
|
|
|
|
/* #5ALT, #15 */
|
|
|
|
if (same(head, remote))
|
|
|
|
return merged_entry(head, index, o);
|
|
|
|
/* #13, #3ALT */
|
|
|
|
if (!df_conflict_remote && remote_match && !head_match)
|
|
|
|
return merged_entry(head, index, o);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* #1 */
|
2008-03-07 10:12:28 +08:00
|
|
|
if (!head && !remote && any_anc_missing)
|
2006-07-31 02:26:15 +08:00
|
|
|
return 0;
|
|
|
|
|
2009-10-12 04:38:11 +08:00
|
|
|
/*
|
|
|
|
* Under the "aggressive" rule, we resolve mostly trivial
|
2006-07-31 02:26:15 +08:00
|
|
|
* cases that we historically had git-merge-one-file resolve.
|
|
|
|
*/
|
|
|
|
if (o->aggressive) {
|
2009-10-12 04:38:11 +08:00
|
|
|
int head_deleted = !head;
|
|
|
|
int remote_deleted = !remote;
|
2007-07-18 02:28:28 +08:00
|
|
|
struct cache_entry *ce = NULL;
|
2007-04-07 20:49:19 +08:00
|
|
|
|
|
|
|
if (index)
|
2007-07-18 02:28:28 +08:00
|
|
|
ce = index;
|
2007-04-07 20:49:19 +08:00
|
|
|
else if (head)
|
2007-07-18 02:28:28 +08:00
|
|
|
ce = head;
|
2007-04-07 20:49:19 +08:00
|
|
|
else if (remote)
|
2007-07-18 02:28:28 +08:00
|
|
|
ce = remote;
|
2007-04-07 20:49:19 +08:00
|
|
|
else {
|
|
|
|
for (i = 1; i < o->head_idx; i++) {
|
|
|
|
if (stages[i] && stages[i] != o->df_conflict_entry) {
|
2007-07-18 02:28:28 +08:00
|
|
|
ce = stages[i];
|
2007-04-07 20:49:19 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2006-07-31 02:26:15 +08:00
|
|
|
/*
|
|
|
|
* Deleted in both.
|
|
|
|
* Deleted in one and unchanged in the other.
|
|
|
|
*/
|
|
|
|
if ((head_deleted && remote_deleted) ||
|
|
|
|
(head_deleted && remote && remote_match) ||
|
|
|
|
(remote_deleted && head && head_match)) {
|
|
|
|
if (index)
|
|
|
|
return deleted_entry(index, index, o);
|
2008-03-07 10:12:28 +08:00
|
|
|
if (ce && !head_deleted) {
|
2010-08-11 16:38:06 +08:00
|
|
|
if (verify_absent(ce, ERROR_WOULD_LOSE_UNTRACKED_REMOVED, o))
|
2008-02-08 00:39:48 +08:00
|
|
|
return -1;
|
|
|
|
}
|
2006-07-31 02:26:15 +08:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Added in both, identically.
|
|
|
|
*/
|
|
|
|
if (no_anc_exists && head && remote && same(head, remote))
|
|
|
|
return merged_entry(head, index, o);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Below are "no merge" cases, which require that the index be
|
|
|
|
* up-to-date to avoid the files getting overwritten with
|
|
|
|
* conflict resolution files.
|
|
|
|
*/
|
|
|
|
if (index) {
|
2008-02-08 00:39:48 +08:00
|
|
|
if (verify_uptodate(index, o))
|
|
|
|
return -1;
|
2006-07-31 02:26:15 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
o->nontrivial_merge = 1;
|
|
|
|
|
2007-04-07 20:42:01 +08:00
|
|
|
/* #2, #3, #4, #6, #7, #9, #10, #11. */
|
2006-07-31 02:26:15 +08:00
|
|
|
count = 0;
|
|
|
|
if (!head_match || !remote_match) {
|
|
|
|
for (i = 1; i < o->head_idx; i++) {
|
2007-04-07 20:49:19 +08:00
|
|
|
if (stages[i] && stages[i] != o->df_conflict_entry) {
|
2007-04-02 15:06:12 +08:00
|
|
|
keep_entry(stages[i], o);
|
2006-07-31 02:26:15 +08:00
|
|
|
count++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#if DBRT_DEBUG
|
|
|
|
else {
|
|
|
|
fprintf(stderr, "read-tree: warning #16 detected\n");
|
|
|
|
show_stage_entry(stderr, "head ", stages[head_match]);
|
|
|
|
show_stage_entry(stderr, "remote ", stages[remote_match]);
|
|
|
|
}
|
|
|
|
#endif
|
2007-04-02 15:06:12 +08:00
|
|
|
if (head) { count += keep_entry(head, o); }
|
|
|
|
if (remote) { count += keep_entry(remote, o); }
|
2006-07-31 02:26:15 +08:00
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Two-way merge.
|
|
|
|
*
|
|
|
|
* The rule is to "carry forward" what is in the index without losing
|
2009-10-24 16:31:32 +08:00
|
|
|
* information across a "fast-forward", favoring a successful merge
|
2006-07-31 02:26:15 +08:00
|
|
|
* over a merge failure when it makes sense. For details of the
|
|
|
|
* "carry forward" rule, please see <Documentation/git-read-tree.txt>.
|
|
|
|
*
|
|
|
|
*/
|
2008-03-07 10:12:28 +08:00
|
|
|
int twoway_merge(struct cache_entry **src, struct unpack_trees_options *o)
|
2006-07-31 02:26:15 +08:00
|
|
|
{
|
|
|
|
struct cache_entry *current = src[0];
|
2007-04-03 07:29:56 +08:00
|
|
|
struct cache_entry *oldtree = src[1];
|
|
|
|
struct cache_entry *newtree = src[2];
|
2006-07-31 02:26:15 +08:00
|
|
|
|
|
|
|
if (o->merge_size != 2)
|
|
|
|
return error("Cannot do a twoway merge of %d trees",
|
|
|
|
o->merge_size);
|
|
|
|
|
2007-04-03 07:29:56 +08:00
|
|
|
if (oldtree == o->df_conflict_entry)
|
|
|
|
oldtree = NULL;
|
|
|
|
if (newtree == o->df_conflict_entry)
|
|
|
|
newtree = NULL;
|
|
|
|
|
2006-07-31 02:26:15 +08:00
|
|
|
if (current) {
|
|
|
|
if ((!oldtree && !newtree) || /* 4 and 5 */
|
|
|
|
(!oldtree && newtree &&
|
|
|
|
same(current, newtree)) || /* 6 and 7 */
|
|
|
|
(oldtree && newtree &&
|
|
|
|
same(oldtree, newtree)) || /* 14 and 15 */
|
|
|
|
(oldtree && newtree &&
|
2007-04-03 07:29:56 +08:00
|
|
|
!same(oldtree, newtree) && /* 18 and 19 */
|
2006-07-31 02:26:15 +08:00
|
|
|
same(current, newtree))) {
|
2007-04-02 15:06:12 +08:00
|
|
|
return keep_entry(current, o);
|
2006-07-31 02:26:15 +08:00
|
|
|
}
|
|
|
|
else if (oldtree && !newtree && same(current, oldtree)) {
|
|
|
|
/* 10 or 11 */
|
|
|
|
return deleted_entry(oldtree, current, o);
|
|
|
|
}
|
|
|
|
else if (oldtree && newtree &&
|
|
|
|
same(current, oldtree) && !same(current, newtree)) {
|
|
|
|
/* 20 or 21 */
|
|
|
|
return merged_entry(newtree, current, o);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
/* all other failures */
|
|
|
|
if (oldtree)
|
2008-05-18 03:03:49 +08:00
|
|
|
return o->gently ? -1 : reject_merge(oldtree, o);
|
2006-07-31 02:26:15 +08:00
|
|
|
if (current)
|
2008-05-18 03:03:49 +08:00
|
|
|
return o->gently ? -1 : reject_merge(current, o);
|
2006-07-31 02:26:15 +08:00
|
|
|
if (newtree)
|
2008-05-18 03:03:49 +08:00
|
|
|
return o->gently ? -1 : reject_merge(newtree, o);
|
2006-07-31 02:26:15 +08:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
checkout: do not lose staged removal
The logic to checkout a different commit implements the safety to never
lose user's local changes. For example, switching from a commit to
another commit, when you have changed a path that is different between
them, need to merge your changes to the version from the switched-to
commit, which you may not necessarily be able to resolve easily. By
default, "git checkout" refused to switch branches, to give you a chance
to stash your local changes (or use "-m" to merge, accepting the risks of
getting conflicts).
This safety, however, had one deliberate hole since early June 2005. When
your local change was to remove a path (and optionally to stage that
removal), the command checked out the path from the switched-to commit
nevertheless.
This was to allow an initial checkout to happen smoothly (e.g. an initial
checkout is done by starting with an empty index and switching from the
commit at the HEAD to the same commit). We can tighten the rule slightly
to allow this special case to pass, without losing sight of removal
explicitly done by the user, by noticing if the index is truly empty when
the operation begins.
For historical background, see:
http://thread.gmane.org/gmane.comp.version-control.git/4641/focus=4646
This case is marked as *0* in the message, which both Linus and I said "it
feels somewhat wrong but otherwise we cannot start from an empty index".
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-09-08 10:49:25 +08:00
|
|
|
else if (newtree) {
|
|
|
|
if (oldtree && !o->initial_checkout) {
|
|
|
|
/*
|
|
|
|
* deletion of the path was staged;
|
|
|
|
*/
|
|
|
|
if (same(oldtree, newtree))
|
|
|
|
return 1;
|
|
|
|
return reject_merge(oldtree, o);
|
|
|
|
}
|
2006-07-31 02:26:15 +08:00
|
|
|
return merged_entry(newtree, current, o);
|
checkout: do not lose staged removal
The logic to checkout a different commit implements the safety to never
lose user's local changes. For example, switching from a commit to
another commit, when you have changed a path that is different between
them, need to merge your changes to the version from the switched-to
commit, which you may not necessarily be able to resolve easily. By
default, "git checkout" refused to switch branches, to give you a chance
to stash your local changes (or use "-m" to merge, accepting the risks of
getting conflicts).
This safety, however, had one deliberate hole since early June 2005. When
your local change was to remove a path (and optionally to stage that
removal), the command checked out the path from the switched-to commit
nevertheless.
This was to allow an initial checkout to happen smoothly (e.g. an initial
checkout is done by starting with an empty index and switching from the
commit at the HEAD to the same commit). We can tighten the rule slightly
to allow this special case to pass, without losing sight of removal
explicitly done by the user, by noticing if the index is truly empty when
the operation begins.
For historical background, see:
http://thread.gmane.org/gmane.comp.version-control.git/4641/focus=4646
This case is marked as *0* in the message, which both Linus and I said "it
feels somewhat wrong but otherwise we cannot start from an empty index".
Signed-off-by: Junio C Hamano <gitster@pobox.com>
2008-09-08 10:49:25 +08:00
|
|
|
}
|
2007-08-11 03:31:20 +08:00
|
|
|
return deleted_entry(oldtree, current, o);
|
2006-07-31 02:26:15 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Bind merge.
|
|
|
|
*
|
|
|
|
* Keep the index entries at stage0, collapse stage1 but make sure
|
|
|
|
* stage0 does not have anything there.
|
|
|
|
*/
|
|
|
|
int bind_merge(struct cache_entry **src,
|
2008-03-07 10:12:28 +08:00
|
|
|
struct unpack_trees_options *o)
|
2006-07-31 02:26:15 +08:00
|
|
|
{
|
|
|
|
struct cache_entry *old = src[0];
|
|
|
|
struct cache_entry *a = src[1];
|
|
|
|
|
|
|
|
if (o->merge_size != 1)
|
|
|
|
return error("Cannot do a bind merge of %d trees\n",
|
|
|
|
o->merge_size);
|
|
|
|
if (a && old)
|
2008-02-08 00:39:52 +08:00
|
|
|
return o->gently ? -1 :
|
2010-08-11 16:38:04 +08:00
|
|
|
error(ERRORMSG(o, ERROR_BIND_OVERLAP), a->name, old->name);
|
2006-07-31 02:26:15 +08:00
|
|
|
if (!a)
|
2007-04-02 15:06:12 +08:00
|
|
|
return keep_entry(old, o);
|
2006-07-31 02:26:15 +08:00
|
|
|
else
|
|
|
|
return merged_entry(a, NULL, o);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* One-way merge.
|
|
|
|
*
|
|
|
|
* The rule is:
|
|
|
|
* - take the stat information from stage0, take the data from stage1
|
|
|
|
*/
|
2008-03-07 10:12:28 +08:00
|
|
|
int oneway_merge(struct cache_entry **src, struct unpack_trees_options *o)
|
2006-07-31 02:26:15 +08:00
|
|
|
{
|
|
|
|
struct cache_entry *old = src[0];
|
|
|
|
struct cache_entry *a = src[1];
|
|
|
|
|
|
|
|
if (o->merge_size != 1)
|
|
|
|
return error("Cannot do a oneway merge of %d trees",
|
|
|
|
o->merge_size);
|
|
|
|
|
checkout -f: deal with a D/F conflict entry correctly
When we switch branches with "checkout -f", unpack_trees() feeds two
cache_entries to oneway_merge() function in its src[] array argument. The
zeroth entry comes from the current index, and the first entry represents
what the merge result should be, taken from the tree recorded in the
commit we are switching to.
When we have a blob (either regular file or a symlink) in the index and in
the work tree at path "foo", and the switched-to tree has "foo/bar",
i.e. "foo" becomes a directory, src[0] is obviously that blob currently
registered at "foo". Even though we do not have anything at "foo" in the
switched-to tree, src[1] is _not_ NULL in this case.
The unpack_trees() machinery places a special marker df_conflict_entry
to signal that no blob exists at "foo", but it will become a directory
that may have somthing underneath it (namely "foo/bar"), so a usual 3-way
merge can notice the situation.
But oneway_merge() codepath failed to notice this and passed the special
marker directly to merged_entry(). This happens to remove the "foo" in
the end because the df_conflict_entry does not have any name (hence the
"error" message) and its addition in add_index_entry() is rejected, but it
is wrong.
Signed-off-by: Junio C Hamano <gitster@pobox.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-07-19 03:26:38 +08:00
|
|
|
if (!a || a == o->df_conflict_entry)
|
2006-07-31 02:26:15 +08:00
|
|
|
return deleted_entry(old, old, o);
|
2008-03-07 10:12:28 +08:00
|
|
|
|
2006-07-31 02:26:15 +08:00
|
|
|
if (old && same(old, a)) {
|
2008-03-07 10:12:28 +08:00
|
|
|
int update = 0;
|
2009-08-20 21:46:59 +08:00
|
|
|
if (o->reset && !ce_uptodate(old) && !ce_skip_worktree(old)) {
|
2006-07-31 02:26:15 +08:00
|
|
|
struct stat st;
|
|
|
|
if (lstat(old->name, &st) ||
|
2009-12-14 19:43:58 +08:00
|
|
|
ie_match_stat(o->src_index, old, &st, CE_MATCH_IGNORE_VALID|CE_MATCH_IGNORE_SKIP_WORKTREE))
|
2008-03-07 10:12:28 +08:00
|
|
|
update |= CE_UPDATE;
|
2006-07-31 02:26:15 +08:00
|
|
|
}
|
2008-03-07 10:12:28 +08:00
|
|
|
add_entry(o, old, update, 0);
|
|
|
|
return 0;
|
2006-07-31 02:26:15 +08:00
|
|
|
}
|
|
|
|
return merged_entry(a, old, o);
|
|
|
|
}
|