git/resolve-undo.c

200 lines
4.5 KiB
C
Raw Normal View History

#include "cache.h"
#include "dir.h"
#include "resolve-undo.h"
#include "string-list.h"
/* The only error case is to run out of memory in string-list */
void record_resolve_undo(struct index_state *istate, struct cache_entry *ce)
{
struct string_list_item *lost;
struct resolve_undo_info *ui;
struct string_list *resolve_undo;
int stage = ce_stage(ce);
if (!stage)
return;
if (!istate->resolve_undo) {
CALLOC_ARRAY(resolve_undo, 1);
resolve_undo->strdup_strings = 1;
istate->resolve_undo = resolve_undo;
}
resolve_undo = istate->resolve_undo;
lost = string_list_insert(resolve_undo, ce->name);
if (!lost->util)
lost->util = xcalloc(1, sizeof(*ui));
ui = lost->util;
oidcpy(&ui->oid[stage - 1], &ce->oid);
ui->mode[stage - 1] = ce->ce_mode;
}
void resolve_undo_write(struct strbuf *sb, struct string_list *resolve_undo)
{
struct string_list_item *item;
for_each_string_list_item(item, resolve_undo) {
struct resolve_undo_info *ui = item->util;
int i;
if (!ui)
continue;
strbuf_addstr(sb, item->string);
strbuf_addch(sb, 0);
for (i = 0; i < 3; i++)
strbuf_addf(sb, "%o%c", ui->mode[i], 0);
for (i = 0; i < 3; i++) {
if (!ui->mode[i])
continue;
strbuf_add(sb, ui->oid[i].hash, the_hash_algo->rawsz);
}
}
}
struct string_list *resolve_undo_read(const char *data, unsigned long size)
{
struct string_list *resolve_undo;
size_t len;
char *endptr;
int i;
const unsigned rawsz = the_hash_algo->rawsz;
CALLOC_ARRAY(resolve_undo, 1);
resolve_undo->strdup_strings = 1;
while (size) {
struct string_list_item *lost;
struct resolve_undo_info *ui;
len = strlen(data) + 1;
if (size <= len)
goto error;
lost = string_list_insert(resolve_undo, data);
if (!lost->util)
lost->util = xcalloc(1, sizeof(*ui));
ui = lost->util;
size -= len;
data += len;
for (i = 0; i < 3; i++) {
ui->mode[i] = strtoul(data, &endptr, 8);
if (!endptr || endptr == data || *endptr)
goto error;
len = (endptr + 1) - (char*)data;
if (size <= len)
goto error;
size -= len;
data += len;
}
for (i = 0; i < 3; i++) {
if (!ui->mode[i])
continue;
if (size < rawsz)
goto error;
oidread(&ui->oid[i], (const unsigned char *)data);
size -= rawsz;
data += rawsz;
}
}
return resolve_undo;
error:
string_list_clear(resolve_undo, 1);
error("Index records invalid resolve-undo information");
return NULL;
}
void resolve_undo_clear_index(struct index_state *istate)
{
struct string_list *resolve_undo = istate->resolve_undo;
if (!resolve_undo)
return;
string_list_clear(resolve_undo, 1);
free(resolve_undo);
istate->resolve_undo = NULL;
istate->cache_changed |= RESOLVE_UNDO_CHANGED;
}
int unmerge_index_entry_at(struct index_state *istate, int pos)
{
Convert "struct cache_entry *" to "const ..." wherever possible I attempted to make index_state->cache[] a "const struct cache_entry **" to find out how existing entries in index are modified and where. The question I have is what do we do if we really need to keep track of on-disk changes in the index. The result is - diff-lib.c: setting CE_UPTODATE - name-hash.c: setting CE_HASHED - preload-index.c, read-cache.c, unpack-trees.c and builtin/update-index: obvious - entry.c: write_entry() may refresh the checked out entry via fill_stat_cache_info(). This causes "non-const struct cache_entry *" in builtin/apply.c, builtin/checkout-index.c and builtin/checkout.c - builtin/ls-files.c: --with-tree changes stagemask and may set CE_UPDATE Of these, write_entry() and its call sites are probably most interesting because it modifies on-disk info. But this is stat info and can be retrieved via refresh, at least for porcelain commands. Other just uses ce_flags for local purposes. So, keeping track of "dirty" entries is just a matter of setting a flag in index modification functions exposed by read-cache.c. Except unpack-trees, the rest of the code base does not do anything funny behind read-cache's back. The actual patch is less valueable than the summary above. But if anyone wants to re-identify the above sites. Applying this patch, then this: diff --git a/cache.h b/cache.h index 430d021..1692891 100644 --- a/cache.h +++ b/cache.h @@ -267,7 +267,7 @@ static inline unsigned int canon_mode(unsigned int mode) #define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1) struct index_state { - struct cache_entry **cache; + const struct cache_entry **cache; unsigned int version; unsigned int cache_nr, cache_alloc, cache_changed; struct string_list *resolve_undo; will help quickly identify them without bogus warnings. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-09 23:29:00 +08:00
const struct cache_entry *ce;
struct string_list_item *item;
struct resolve_undo_info *ru;
checkout: avoid unnecessary match_pathspec calls In checkout_paths() we do this - for all updated items, call match_pathspec - for all items, call match_pathspec (inside unmerge_cache) - for all items, call match_pathspec (for showing "path .. is unmerged) - for updated items, call match_pathspec and update paths That's a lot of duplicate match_pathspec(s) and the function is not exactly cheap to be called so many times, especially on large indexes. This patch makes it call match_pathspec once per updated index entry, save the result in ce_flags and reuse the results in the following loops. The changes in 0a1283b (checkout $tree $path: do not clobber local changes in $path not in $tree - 2011-09-30) limit the affected paths to ones we read from $tree. We do not do anything to other modified entries in this case, so the "for all items" above could be modified to "for all updated items". But.. The command's behavior now is modified slightly: unmerged entries that match $path, but not updated by $tree, are now NOT touched. Although this should be considered a bug fix, not a regression. A new test is added for this change. And while at there, free ps_matched after use. The following command is tested on webkit, 215k entries. The pattern is chosen mainly to make match_pathspec sweat: git checkout -- "*[a-zA-Z]*[a-zA-Z]*[a-zA-Z]*" before after real 0m3.493s 0m2.737s user 0m2.239s 0m1.586s sys 0m1.252s 0m1.151s Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-03-27 13:58:21 +08:00
int i, err = 0, matched;
char *name;
if (!istate->resolve_undo)
return pos;
ce = istate->cache[pos];
if (ce_stage(ce)) {
/* already unmerged */
while ((pos < istate->cache_nr) &&
! strcmp(istate->cache[pos]->name, ce->name))
pos++;
return pos - 1; /* return the last entry processed */
}
item = string_list_lookup(istate->resolve_undo, ce->name);
if (!item)
return pos;
ru = item->util;
if (!ru)
return pos;
checkout: avoid unnecessary match_pathspec calls In checkout_paths() we do this - for all updated items, call match_pathspec - for all items, call match_pathspec (inside unmerge_cache) - for all items, call match_pathspec (for showing "path .. is unmerged) - for updated items, call match_pathspec and update paths That's a lot of duplicate match_pathspec(s) and the function is not exactly cheap to be called so many times, especially on large indexes. This patch makes it call match_pathspec once per updated index entry, save the result in ce_flags and reuse the results in the following loops. The changes in 0a1283b (checkout $tree $path: do not clobber local changes in $path not in $tree - 2011-09-30) limit the affected paths to ones we read from $tree. We do not do anything to other modified entries in this case, so the "for all items" above could be modified to "for all updated items". But.. The command's behavior now is modified slightly: unmerged entries that match $path, but not updated by $tree, are now NOT touched. Although this should be considered a bug fix, not a regression. A new test is added for this change. And while at there, free ps_matched after use. The following command is tested on webkit, 215k entries. The pattern is chosen mainly to make match_pathspec sweat: git checkout -- "*[a-zA-Z]*[a-zA-Z]*[a-zA-Z]*" before after real 0m3.493s 0m2.737s user 0m2.239s 0m1.586s sys 0m1.252s 0m1.151s Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-03-27 13:58:21 +08:00
matched = ce->ce_flags & CE_MATCHED;
name = xstrdup(ce->name);
remove_index_entry_at(istate, pos);
for (i = 0; i < 3; i++) {
struct cache_entry *nce;
if (!ru->mode[i])
continue;
block alloc: add lifecycle APIs for cache_entry structs It has been observed that the time spent loading an index with a large number of entries is partly dominated by malloc() calls. This change is in preparation for using memory pools to reduce the number of malloc() calls made to allocate cahce entries when loading an index. Add an API to allocate and discard cache entries, abstracting the details of managing the memory backing the cache entries. This commit does actually change how memory is managed - this will be done in a later commit in the series. This change makes the distinction between cache entries that are associated with an index and cache entries that are not associated with an index. A main use of cache entries is with an index, and we can optimize the memory management around this. We still have other cases where a cache entry is not persisted with an index, and so we need to handle the "transient" use case as well. To keep the congnitive overhead of managing the cache entries, there will only be a single discard function. This means there must be enough information kept with the cache entry so that we know how to discard them. A summary of the main functions in the API is: make_cache_entry: create cache entry for use in an index. Uses specified parameters to populate cache_entry fields. make_empty_cache_entry: Create an empty cache entry for use in an index. Returns cache entry with empty fields. make_transient_cache_entry: create cache entry that is not used in an index. Uses specified parameters to populate cache_entry fields. make_empty_transient_cache_entry: create cache entry that is not used in an index. Returns cache entry with empty fields. discard_cache_entry: A single function that knows how to discard a cache entry regardless of how it was allocated. Signed-off-by: Jameson Miller <jamill@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2018-07-03 03:49:31 +08:00
nce = make_cache_entry(istate,
ru->mode[i],
&ru->oid[i],
name, i + 1, 0);
checkout: avoid unnecessary match_pathspec calls In checkout_paths() we do this - for all updated items, call match_pathspec - for all items, call match_pathspec (inside unmerge_cache) - for all items, call match_pathspec (for showing "path .. is unmerged) - for updated items, call match_pathspec and update paths That's a lot of duplicate match_pathspec(s) and the function is not exactly cheap to be called so many times, especially on large indexes. This patch makes it call match_pathspec once per updated index entry, save the result in ce_flags and reuse the results in the following loops. The changes in 0a1283b (checkout $tree $path: do not clobber local changes in $path not in $tree - 2011-09-30) limit the affected paths to ones we read from $tree. We do not do anything to other modified entries in this case, so the "for all items" above could be modified to "for all updated items". But.. The command's behavior now is modified slightly: unmerged entries that match $path, but not updated by $tree, are now NOT touched. Although this should be considered a bug fix, not a regression. A new test is added for this change. And while at there, free ps_matched after use. The following command is tested on webkit, 215k entries. The pattern is chosen mainly to make match_pathspec sweat: git checkout -- "*[a-zA-Z]*[a-zA-Z]*[a-zA-Z]*" before after real 0m3.493s 0m2.737s user 0m2.239s 0m1.586s sys 0m1.252s 0m1.151s Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-03-27 13:58:21 +08:00
if (matched)
nce->ce_flags |= CE_MATCHED;
if (add_index_entry(istate, nce, ADD_CACHE_OK_TO_ADD)) {
err = 1;
error("cannot unmerge '%s'", name);
}
}
free(name);
if (err)
return pos;
free(ru);
item->util = NULL;
return unmerge_index_entry_at(istate, pos);
}
checkout: avoid unnecessary match_pathspec calls In checkout_paths() we do this - for all updated items, call match_pathspec - for all items, call match_pathspec (inside unmerge_cache) - for all items, call match_pathspec (for showing "path .. is unmerged) - for updated items, call match_pathspec and update paths That's a lot of duplicate match_pathspec(s) and the function is not exactly cheap to be called so many times, especially on large indexes. This patch makes it call match_pathspec once per updated index entry, save the result in ce_flags and reuse the results in the following loops. The changes in 0a1283b (checkout $tree $path: do not clobber local changes in $path not in $tree - 2011-09-30) limit the affected paths to ones we read from $tree. We do not do anything to other modified entries in this case, so the "for all items" above could be modified to "for all updated items". But.. The command's behavior now is modified slightly: unmerged entries that match $path, but not updated by $tree, are now NOT touched. Although this should be considered a bug fix, not a regression. A new test is added for this change. And while at there, free ps_matched after use. The following command is tested on webkit, 215k entries. The pattern is chosen mainly to make match_pathspec sweat: git checkout -- "*[a-zA-Z]*[a-zA-Z]*[a-zA-Z]*" before after real 0m3.493s 0m2.737s user 0m2.239s 0m1.586s sys 0m1.252s 0m1.151s Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-03-27 13:58:21 +08:00
void unmerge_marked_index(struct index_state *istate)
{
int i;
if (!istate->resolve_undo)
return;
/* TODO: audit for interaction with sparse-index. */
ensure_full_index(istate);
checkout: avoid unnecessary match_pathspec calls In checkout_paths() we do this - for all updated items, call match_pathspec - for all items, call match_pathspec (inside unmerge_cache) - for all items, call match_pathspec (for showing "path .. is unmerged) - for updated items, call match_pathspec and update paths That's a lot of duplicate match_pathspec(s) and the function is not exactly cheap to be called so many times, especially on large indexes. This patch makes it call match_pathspec once per updated index entry, save the result in ce_flags and reuse the results in the following loops. The changes in 0a1283b (checkout $tree $path: do not clobber local changes in $path not in $tree - 2011-09-30) limit the affected paths to ones we read from $tree. We do not do anything to other modified entries in this case, so the "for all items" above could be modified to "for all updated items". But.. The command's behavior now is modified slightly: unmerged entries that match $path, but not updated by $tree, are now NOT touched. Although this should be considered a bug fix, not a regression. A new test is added for this change. And while at there, free ps_matched after use. The following command is tested on webkit, 215k entries. The pattern is chosen mainly to make match_pathspec sweat: git checkout -- "*[a-zA-Z]*[a-zA-Z]*[a-zA-Z]*" before after real 0m3.493s 0m2.737s user 0m2.239s 0m1.586s sys 0m1.252s 0m1.151s Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-03-27 13:58:21 +08:00
for (i = 0; i < istate->cache_nr; i++) {
Convert "struct cache_entry *" to "const ..." wherever possible I attempted to make index_state->cache[] a "const struct cache_entry **" to find out how existing entries in index are modified and where. The question I have is what do we do if we really need to keep track of on-disk changes in the index. The result is - diff-lib.c: setting CE_UPTODATE - name-hash.c: setting CE_HASHED - preload-index.c, read-cache.c, unpack-trees.c and builtin/update-index: obvious - entry.c: write_entry() may refresh the checked out entry via fill_stat_cache_info(). This causes "non-const struct cache_entry *" in builtin/apply.c, builtin/checkout-index.c and builtin/checkout.c - builtin/ls-files.c: --with-tree changes stagemask and may set CE_UPDATE Of these, write_entry() and its call sites are probably most interesting because it modifies on-disk info. But this is stat info and can be retrieved via refresh, at least for porcelain commands. Other just uses ce_flags for local purposes. So, keeping track of "dirty" entries is just a matter of setting a flag in index modification functions exposed by read-cache.c. Except unpack-trees, the rest of the code base does not do anything funny behind read-cache's back. The actual patch is less valueable than the summary above. But if anyone wants to re-identify the above sites. Applying this patch, then this: diff --git a/cache.h b/cache.h index 430d021..1692891 100644 --- a/cache.h +++ b/cache.h @@ -267,7 +267,7 @@ static inline unsigned int canon_mode(unsigned int mode) #define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1) struct index_state { - struct cache_entry **cache; + const struct cache_entry **cache; unsigned int version; unsigned int cache_nr, cache_alloc, cache_changed; struct string_list *resolve_undo; will help quickly identify them without bogus warnings. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-09 23:29:00 +08:00
const struct cache_entry *ce = istate->cache[i];
checkout: avoid unnecessary match_pathspec calls In checkout_paths() we do this - for all updated items, call match_pathspec - for all items, call match_pathspec (inside unmerge_cache) - for all items, call match_pathspec (for showing "path .. is unmerged) - for updated items, call match_pathspec and update paths That's a lot of duplicate match_pathspec(s) and the function is not exactly cheap to be called so many times, especially on large indexes. This patch makes it call match_pathspec once per updated index entry, save the result in ce_flags and reuse the results in the following loops. The changes in 0a1283b (checkout $tree $path: do not clobber local changes in $path not in $tree - 2011-09-30) limit the affected paths to ones we read from $tree. We do not do anything to other modified entries in this case, so the "for all items" above could be modified to "for all updated items". But.. The command's behavior now is modified slightly: unmerged entries that match $path, but not updated by $tree, are now NOT touched. Although this should be considered a bug fix, not a regression. A new test is added for this change. And while at there, free ps_matched after use. The following command is tested on webkit, 215k entries. The pattern is chosen mainly to make match_pathspec sweat: git checkout -- "*[a-zA-Z]*[a-zA-Z]*[a-zA-Z]*" before after real 0m3.493s 0m2.737s user 0m2.239s 0m1.586s sys 0m1.252s 0m1.151s Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-03-27 13:58:21 +08:00
if (ce->ce_flags & CE_MATCHED)
i = unmerge_index_entry_at(istate, i);
}
}
void unmerge_index(struct index_state *istate, const struct pathspec *pathspec)
{
int i;
if (!istate->resolve_undo)
return;
/* TODO: audit for interaction with sparse-index. */
ensure_full_index(istate);
for (i = 0; i < istate->cache_nr; i++) {
Convert "struct cache_entry *" to "const ..." wherever possible I attempted to make index_state->cache[] a "const struct cache_entry **" to find out how existing entries in index are modified and where. The question I have is what do we do if we really need to keep track of on-disk changes in the index. The result is - diff-lib.c: setting CE_UPTODATE - name-hash.c: setting CE_HASHED - preload-index.c, read-cache.c, unpack-trees.c and builtin/update-index: obvious - entry.c: write_entry() may refresh the checked out entry via fill_stat_cache_info(). This causes "non-const struct cache_entry *" in builtin/apply.c, builtin/checkout-index.c and builtin/checkout.c - builtin/ls-files.c: --with-tree changes stagemask and may set CE_UPDATE Of these, write_entry() and its call sites are probably most interesting because it modifies on-disk info. But this is stat info and can be retrieved via refresh, at least for porcelain commands. Other just uses ce_flags for local purposes. So, keeping track of "dirty" entries is just a matter of setting a flag in index modification functions exposed by read-cache.c. Except unpack-trees, the rest of the code base does not do anything funny behind read-cache's back. The actual patch is less valueable than the summary above. But if anyone wants to re-identify the above sites. Applying this patch, then this: diff --git a/cache.h b/cache.h index 430d021..1692891 100644 --- a/cache.h +++ b/cache.h @@ -267,7 +267,7 @@ static inline unsigned int canon_mode(unsigned int mode) #define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1) struct index_state { - struct cache_entry **cache; + const struct cache_entry **cache; unsigned int version; unsigned int cache_nr, cache_alloc, cache_changed; struct string_list *resolve_undo; will help quickly identify them without bogus warnings. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
2013-07-09 23:29:00 +08:00
const struct cache_entry *ce = istate->cache[i];
if (!ce_path_match(istate, ce, pathspec, NULL))
continue;
i = unmerge_index_entry_at(istate, i);
}
}