Merge branch 'jh/fsmonitor-prework'

Preliminary changes to fsmonitor integration.

* jh/fsmonitor-prework:
  fsmonitor: refactor initialization of fsmonitor_last_update token
  fsmonitor: allow all entries for a folder to be invalidated
  fsmonitor: log FSMN token when reading and writing the index
  fsmonitor: log invocation of FSMonitor hook to trace2
  read-cache: log the number of scanned files to trace2
  read-cache: log the number of lstat calls to trace2
  preload-index: log the number of lstat calls to trace2
  p7519: add trace logging during perf test
  p7519: move watchman cleanup earlier in the test
  p7519: fix watchman watch-list test on Windows
  p7519: do not rely on "xargs -d" in test
This commit is contained in:
Junio C Hamano 2021-03-01 14:02:56 -08:00
commit 700696bcfc
7 changed files with 196 additions and 26 deletions

View File

@ -90,7 +90,11 @@ int read_fsmonitor_extension(struct index_state *istate, const void *data,
if (!istate->split_index)
assert_index_minimum(istate, istate->fsmonitor_dirty->bit_size);
trace_printf_key(&trace_fsmonitor, "read fsmonitor extension successful");
trace2_data_string("index", NULL, "extension/fsmn/read/token",
istate->fsmonitor_last_update);
trace_printf_key(&trace_fsmonitor,
"read fsmonitor extension successful '%s'",
istate->fsmonitor_last_update);
return 0;
}
@ -134,7 +138,11 @@ void write_fsmonitor_extension(struct strbuf *sb, struct index_state *istate)
put_be32(&ewah_size, sb->len - ewah_start);
memcpy(sb->buf + fixup, &ewah_size, sizeof(uint32_t));
trace_printf_key(&trace_fsmonitor, "write fsmonitor extension successful");
trace2_data_string("index", NULL, "extension/fsmn/write/token",
istate->fsmonitor_last_update);
trace_printf_key(&trace_fsmonitor,
"write fsmonitor extension successful '%s'",
istate->fsmonitor_last_update);
}
/*
@ -143,6 +151,7 @@ void write_fsmonitor_extension(struct strbuf *sb, struct index_state *istate)
static int query_fsmonitor(int version, const char *last_update, struct strbuf *query_result)
{
struct child_process cp = CHILD_PROCESS_INIT;
int result;
if (!core_fsmonitor)
return -1;
@ -153,16 +162,63 @@ static int query_fsmonitor(int version, const char *last_update, struct strbuf *
cp.use_shell = 1;
cp.dir = get_git_work_tree();
return capture_command(&cp, query_result, 1024);
trace2_region_enter("fsm_hook", "query", NULL);
result = capture_command(&cp, query_result, 1024);
if (result)
trace2_data_intmax("fsm_hook", NULL, "query/failed", result);
else {
trace2_data_intmax("fsm_hook", NULL, "query/response-length",
query_result->len);
if (fsmonitor_is_trivial_response(query_result))
trace2_data_intmax("fsm_hook", NULL,
"query/trivial-response", 1);
}
trace2_region_leave("fsm_hook", "query", NULL);
return result;
}
static void fsmonitor_refresh_callback(struct index_state *istate, const char *name)
int fsmonitor_is_trivial_response(const struct strbuf *query_result)
{
int pos = index_name_pos(istate, name, strlen(name));
static char trivial_response[3] = { '\0', '/', '\0' };
int is_trivial = !memcmp(trivial_response,
&query_result->buf[query_result->len - 3], 3);
if (pos >= 0) {
struct cache_entry *ce = istate->cache[pos];
ce->ce_flags &= ~CE_FSMONITOR_VALID;
return is_trivial;
}
static void fsmonitor_refresh_callback(struct index_state *istate, char *name)
{
int i, len = strlen(name);
if (name[len - 1] == '/') {
/*
* TODO We should binary search to find the first path with
* TODO this directory prefix. Then linearly update entries
* TODO while the prefix matches. Taking care to search without
* TODO the trailing slash -- because '/' sorts after a few
* TODO interesting special chars, like '.' and ' '.
*/
/* Mark all entries for the folder invalid */
for (i = 0; i < istate->cache_nr; i++) {
if (istate->cache[i]->ce_flags & CE_FSMONITOR_VALID &&
starts_with(istate->cache[i]->name, name))
istate->cache[i]->ce_flags &= ~CE_FSMONITOR_VALID;
}
/* Need to remove the / from the path for the untracked cache */
name[len - 1] = '\0';
} else {
int pos = index_name_pos(istate, name, strlen(name));
if (pos >= 0) {
struct cache_entry *ce = istate->cache[pos];
ce->ce_flags &= ~CE_FSMONITOR_VALID;
}
}
/*
@ -288,16 +344,45 @@ void refresh_fsmonitor(struct index_state *istate)
istate->fsmonitor_last_update = strbuf_detach(&last_update_token, NULL);
}
/*
* The caller wants to turn on FSMonitor. And when the caller writes
* the index to disk, a FSMonitor extension should be included. This
* requires that `istate->fsmonitor_last_update` not be NULL. But we
* have not actually talked to a FSMonitor process yet, so we don't
* have an initial value for this field.
*
* For a protocol V1 FSMonitor process, this field is a formatted
* "nanoseconds since epoch" field. However, for a protocol V2
* FSMonitor process, this field is an opaque token.
*
* Historically, `add_fsmonitor()` has initialized this field to the
* current time for protocol V1 processes. There are lots of race
* conditions here, but that code has shipped...
*
* The only true solution is to use a V2 FSMonitor and get a current
* or default token value (that it understands), but we cannot do that
* until we have actually talked to an instance of the FSMonitor process
* (but the protocol requires that we send a token first...).
*
* For simplicity, just initialize like we have a V1 process and require
* that V2 processes adapt.
*/
static void initialize_fsmonitor_last_update(struct index_state *istate)
{
struct strbuf last_update = STRBUF_INIT;
strbuf_addf(&last_update, "%"PRIu64"", getnanotime());
istate->fsmonitor_last_update = strbuf_detach(&last_update, NULL);
}
void add_fsmonitor(struct index_state *istate)
{
unsigned int i;
struct strbuf last_update = STRBUF_INIT;
if (!istate->fsmonitor_last_update) {
trace_printf_key(&trace_fsmonitor, "add fsmonitor");
istate->cache_changed |= FSMONITOR_CHANGED;
strbuf_addf(&last_update, "%"PRIu64"", getnanotime());
istate->fsmonitor_last_update = strbuf_detach(&last_update, NULL);
initialize_fsmonitor_last_update(istate);
/* reset the fsmonitor state */
for (i = 0; i < istate->cache_nr; i++)

View File

@ -44,6 +44,11 @@ void tweak_fsmonitor(struct index_state *istate);
*/
void refresh_fsmonitor(struct index_state *istate);
/*
* Does the received result contain the "trivial" response?
*/
int fsmonitor_is_trivial_response(const struct strbuf *query_result);
/*
* Set the given cache entries CE_FSMONITOR_VALID bit. This should be
* called any time the cache entry has been updated to reflect the

View File

@ -31,6 +31,7 @@ struct thread_data {
struct pathspec pathspec;
struct progress_data *progress;
int offset, nr;
int t2_nr_lstat;
};
static void *preload_thread(void *_data)
@ -73,6 +74,7 @@ static void *preload_thread(void *_data)
continue;
if (threaded_has_symlink_leading_path(&cache, ce->name, ce_namelen(ce)))
continue;
p->t2_nr_lstat++;
if (lstat(ce->name, &st))
continue;
if (ie_match_stat(index, ce, &st, CE_MATCH_RACY_IS_DIRTY|CE_MATCH_IGNORE_FSMONITOR))
@ -98,6 +100,7 @@ void preload_index(struct index_state *index,
int threads, i, work, offset;
struct thread_data data[MAX_PARALLEL];
struct progress_data pd;
int t2_sum_lstat = 0;
if (!HAVE_THREADS || !core_preload_index)
return;
@ -107,6 +110,9 @@ void preload_index(struct index_state *index,
threads = 2;
if (threads < 2)
return;
trace2_region_enter("index", "preload", NULL);
trace_performance_enter();
if (threads > MAX_PARALLEL)
threads = MAX_PARALLEL;
@ -141,10 +147,14 @@ void preload_index(struct index_state *index,
struct thread_data *p = data+i;
if (pthread_join(p->pthread, NULL))
die("unable to join threaded lstat");
t2_sum_lstat += p->t2_nr_lstat;
}
stop_progress(&pd.progress);
trace_performance_leave("preload index");
trace2_data_intmax("index", NULL, "preload/sum_lstat", t2_sum_lstat);
trace2_region_leave("index", "preload", NULL);
}
int repo_read_index_preload(struct repository *repo,

View File

@ -1364,7 +1364,9 @@ int add_index_entry(struct index_state *istate, struct cache_entry *ce, int opti
static struct cache_entry *refresh_cache_ent(struct index_state *istate,
struct cache_entry *ce,
unsigned int options, int *err,
int *changed_ret)
int *changed_ret,
int *t2_did_lstat,
int *t2_did_scan)
{
struct stat st;
struct cache_entry *updated;
@ -1406,6 +1408,8 @@ static struct cache_entry *refresh_cache_ent(struct index_state *istate,
return NULL;
}
if (t2_did_lstat)
*t2_did_lstat = 1;
if (lstat(ce->name, &st) < 0) {
if (ignore_missing && errno == ENOENT)
return ce;
@ -1442,6 +1446,8 @@ static struct cache_entry *refresh_cache_ent(struct index_state *istate,
}
}
if (t2_did_scan)
*t2_did_scan = 1;
if (ie_modified(istate, ce, &st, options)) {
if (err)
*err = EINVAL;
@ -1519,6 +1525,8 @@ int refresh_index(struct index_state *istate, unsigned int flags,
const char *added_fmt;
const char *unmerged_fmt;
struct progress *progress = NULL;
int t2_sum_lstat = 0;
int t2_sum_scan = 0;
if (flags & REFRESH_PROGRESS && isatty(2))
progress = start_delayed_progress(_("Refresh index"),
@ -1536,11 +1544,14 @@ int refresh_index(struct index_state *istate, unsigned int flags,
* we only have to do the special cases that are left.
*/
preload_index(istate, pathspec, 0);
trace2_region_enter("index", "refresh", NULL);
for (i = 0; i < istate->cache_nr; i++) {
struct cache_entry *ce, *new_entry;
int cache_errno = 0;
int changed = 0;
int filtered = 0;
int t2_did_lstat = 0;
int t2_did_scan = 0;
ce = istate->cache[i];
if (ignore_submodules && S_ISGITLINK(ce->ce_mode))
@ -1566,7 +1577,11 @@ int refresh_index(struct index_state *istate, unsigned int flags,
if (filtered)
continue;
new_entry = refresh_cache_ent(istate, ce, options, &cache_errno, &changed);
new_entry = refresh_cache_ent(istate, ce, options,
&cache_errno, &changed,
&t2_did_lstat, &t2_did_scan);
t2_sum_lstat += t2_did_lstat;
t2_sum_scan += t2_did_scan;
if (new_entry == ce)
continue;
if (progress)
@ -1602,6 +1617,9 @@ int refresh_index(struct index_state *istate, unsigned int flags,
replace_index_entry(istate, i, new_entry);
}
trace2_data_intmax("index", NULL, "refresh/sum_lstat", t2_sum_lstat);
trace2_data_intmax("index", NULL, "refresh/sum_scan", t2_sum_scan);
trace2_region_leave("index", "refresh", NULL);
if (progress) {
display_progress(progress, istate->cache_nr);
stop_progress(&progress);
@ -1614,7 +1632,7 @@ struct cache_entry *refresh_cache_entry(struct index_state *istate,
struct cache_entry *ce,
unsigned int options)
{
return refresh_cache_ent(istate, ce, options, NULL, NULL);
return refresh_cache_ent(istate, ce, options, NULL, NULL, NULL, NULL);
}

1
t/perf/.gitignore vendored
View File

@ -1,3 +1,4 @@
/build/
/test-results/
/test-trace/
/trash directory*/

View File

@ -7,10 +7,10 @@ perf: pre-clean
./run
pre-clean:
rm -rf test-results
rm -rf test-results test-trace
clean:
rm -rf build "trash directory".* test-results
rm -rf build "trash directory".* test-results test-trace
test-lint:
$(MAKE) -C .. test-lint

View File

@ -32,6 +32,8 @@ test_description="Test core.fsmonitor"
#
# GIT_PERF_7519_DROP_CACHE: if set, the OS caches are dropped between tests
#
# GIT_PERF_7519_TRACE: if set, enable trace logging during the test.
# Trace logs will be grouped by fsmonitor provider.
test_perf_large_repo
test_checkout_worktree
@ -70,6 +72,32 @@ then
fi
fi
trace_start() {
if test -n "$GIT_PERF_7519_TRACE"
then
name="$1"
TEST_TRACE_DIR="$TEST_OUTPUT_DIRECTORY/test-trace/p7519/"
echo "Writing trace logging to $TEST_TRACE_DIR"
mkdir -p "$TEST_TRACE_DIR"
# Start Trace2 logging and any other GIT_TRACE_* logs that you
# want for this named test case.
GIT_TRACE2_PERF="$TEST_TRACE_DIR/$name.trace2perf"
export GIT_TRACE2_PERF
>"$GIT_TRACE2_PERF"
fi
}
trace_stop() {
if test -n "$GIT_PERF_7519_TRACE"
then
unset GIT_TRACE2_PERF
fi
}
test_expect_success "one time repo setup" '
# set untrackedCache depending on the environment
if test -n "$GIT_PERF_7519_UNTRACKED_CACHE"
@ -101,7 +129,7 @@ test_expect_success "one time repo setup" '
# If Watchman exists, watch the work tree and attempt a query.
if test_have_prereq WATCHMAN; then
watchman watch "$GIT_WORK_TREE" &&
watchman watch-list | grep -q -F "$GIT_WORK_TREE"
watchman watch-list | grep -q -F "p7519-fsmonitor"
fi
'
@ -169,8 +197,18 @@ test_fsmonitor_suite() {
git status -uall
'
# Update the mtimes on upto 100k files to make status think
# that they are dirty. For simplicity, omit any files with
# LFs (i.e. anything that ls-files thinks it needs to dquote).
# Then fully backslash-quote the paths to capture any
# whitespace so that they pass thru xargs properly.
#
test_perf_w_drop_caches "status (dirty) ($DESC)" '
git ls-files | head -100000 | xargs -d "\n" touch -h &&
git ls-files | \
head -100000 | \
grep -v \" | \
sed '\''s/\(.\)/\\\1/g'\'' | \
xargs test-tool chmtime -300 &&
git status
'
@ -203,6 +241,12 @@ test_fsmonitor_suite() {
'
}
#
# Run a full set of perf tests using each Hook-based fsmonitor provider,
# such as Watchman.
#
trace_start fsmonitor-watchman
if test -n "$GIT_PERF_7519_FSMONITOR"; then
for INTEGRATION_PATH in $GIT_PERF_7519_FSMONITOR; do
test_expect_success "setup for fsmonitor $INTEGRATION_PATH" 'setup_for_fsmonitor'
@ -213,14 +257,6 @@ else
test_fsmonitor_suite
fi
test_expect_success "setup without fsmonitor" '
unset INTEGRATION_SCRIPT &&
git config --unset core.fsmonitor &&
git update-index --no-fsmonitor
'
test_fsmonitor_suite
if test_have_prereq WATCHMAN
then
watchman watch-del "$GIT_WORK_TREE" >/dev/null 2>&1 &&
@ -229,5 +265,20 @@ then
# preventing the removal of the trash directory
watchman shutdown-server >/dev/null 2>&1
fi
trace_stop
#
# Run a full set of perf tests with the fsmonitor feature disabled.
#
trace_start fsmonitor-disabled
test_expect_success "setup without fsmonitor" '
unset INTEGRATION_SCRIPT &&
git config --unset core.fsmonitor &&
git update-index --no-fsmonitor
'
test_fsmonitor_suite
trace_stop
test_done