mirror of
https://github.com/coreutils/coreutils.git
synced 2024-11-24 10:23:31 +08:00
tail: fix inotify startup races
The previous fixes to races in the various tail tests, identified actual races in the tail inotify implementation. With --follow=descriptor, if the tailed file was replaced before the inotify watch was added, then any subsequent changes were ignored. Similarly in --follow=name mode, all changes to a new name were effectively ignored if that name was created after the original open() but before the inotify_add_watch(). * src/tail.c (tail_forever_inotify): Fix 3 cases. 1. With -f, don't stop tailing when file removed before watch. 2. With -f, watch right file when file replaced before watch. 3. With -F, inspect correct file when replaced before watch. Existing tests identify these when tail compiled with TAIL_TEST_SLEEP. * tests/tail-2/inotify-rotate-resources.sh: This test also identifies the issue with --follow=name when TAIL_TEST_SLEEP is used. Adjust so the test is immune to such races, and also fail quicker on remote file systems. * tests/tail-2/inotify-race2.sh: A new test using GDB, based on inotify-race.sh, which tests the -F race without needed recompilation with sleeps. * tests/local.mk: Reference the new test. * NEWS: Mention the bug.
This commit is contained in:
parent
9069f4ec85
commit
cf06a872e7
4
NEWS
4
NEWS
@ -45,6 +45,10 @@ GNU coreutils NEWS -*- outline -*-
|
||||
tail -f again follows changes to a file after it's renamed.
|
||||
[bug introduced in coreutils-7.5]
|
||||
|
||||
tail --follow no longer misses changes to files if those files were
|
||||
replaced before inotify watches were created.
|
||||
[bug introduced in coreutils-7.5]
|
||||
|
||||
** New features
|
||||
|
||||
chroot accepts the new --skip-chdir option to not change the working directory
|
||||
|
58
src/tail.c
58
src/tail.c
@ -925,12 +925,10 @@ fremote (int fd, const char *name)
|
||||
# define fremote(fd, name) false
|
||||
#endif
|
||||
|
||||
/* FIXME: describe */
|
||||
|
||||
/* open/fstat F->name and handle changes. */
|
||||
static void
|
||||
recheck (struct File_spec *f, bool blocking)
|
||||
{
|
||||
/* open/fstat the file and announce if dev/ino have changed */
|
||||
struct stat new_stats;
|
||||
bool ok = true;
|
||||
bool is_stdin = (STREQ (f->name, "-"));
|
||||
@ -1312,7 +1310,7 @@ wd_comparator (const void *e1, const void *e2)
|
||||
return spec1->wd == spec2->wd;
|
||||
}
|
||||
|
||||
/* Helper function used by 'tail_forever_inotify'. */
|
||||
/* Output (new) data for FSPEC->fd. */
|
||||
static void
|
||||
check_fspec (struct File_spec *fspec, int wd, int *prev_wd)
|
||||
{
|
||||
@ -1364,12 +1362,18 @@ static bool
|
||||
tail_forever_inotify (int wd, struct File_spec *f, size_t n_files,
|
||||
double sleep_interval)
|
||||
{
|
||||
# if TAIL_TEST_SLEEP
|
||||
/* Delay between open() and inotify_add_watch()
|
||||
to help trigger different cases. */
|
||||
xnanosleep (1000000);
|
||||
# endif
|
||||
unsigned int max_realloc = 3;
|
||||
|
||||
/* Map an inotify watch descriptor to the name of the file it's watching. */
|
||||
Hash_table *wd_to_name;
|
||||
|
||||
bool found_watchable_file = false;
|
||||
bool tailed_but_unwatchable = false;
|
||||
bool found_unwatchable_dir = false;
|
||||
bool no_inotify_resources = false;
|
||||
bool writer_is_dead = false;
|
||||
@ -1438,6 +1442,8 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t n_files,
|
||||
|
||||
if (f[i].wd < 0)
|
||||
{
|
||||
if (f[i].fd != -1) /* already tailed. */
|
||||
tailed_but_unwatchable = true;
|
||||
if (errno == ENOSPC || errno == ENOMEM)
|
||||
{
|
||||
no_inotify_resources = true;
|
||||
@ -1459,8 +1465,10 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t n_files,
|
||||
/* Linux kernel 2.6.24 at least has a bug where eventually, ENOSPC is always
|
||||
returned by inotify_add_watch. In any case we should revert to polling
|
||||
when there are no inotify resources. Also a specified directory may not
|
||||
be currently present or accessible, so revert to polling. */
|
||||
if (no_inotify_resources || found_unwatchable_dir)
|
||||
be currently present or accessible, so revert to polling. Also an already
|
||||
tailed but unwatchable due rename/unlink race, should also revert. */
|
||||
if (no_inotify_resources || found_unwatchable_dir
|
||||
|| (follow_mode == Follow_descriptor && tailed_but_unwatchable))
|
||||
{
|
||||
hash_free (wd_to_name);
|
||||
|
||||
@ -1472,12 +1480,38 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t n_files,
|
||||
|
||||
prev_wd = f[n_files - 1].wd;
|
||||
|
||||
/* Check files again. New data can be available since last time we checked
|
||||
and before they are watched by inotify. */
|
||||
/* Check files again. New files or data can be available since last time we
|
||||
checked and before they are watched by inotify. */
|
||||
for (i = 0; i < n_files; i++)
|
||||
{
|
||||
if (!f[i].ignore)
|
||||
check_fspec (&f[i], f[i].wd, &prev_wd);
|
||||
if (! f[i].ignore)
|
||||
{
|
||||
/* check for new files. */
|
||||
if (follow_mode == Follow_name)
|
||||
recheck (&(f[i]), false);
|
||||
else if (f[i].fd != -1)
|
||||
{
|
||||
/* If the file was replaced in the small window since we tailed,
|
||||
then assume the watch is on the wrong item (different to
|
||||
that we've already produced output for), and so revert to
|
||||
polling the original descriptor. */
|
||||
struct stat stats;
|
||||
|
||||
if (stat (f[i].name, &stats) == 0
|
||||
&& (f[i].dev != stats.st_dev || f[i].ino != stats.st_ino))
|
||||
{
|
||||
error (0, errno, _("%s was replaced"),
|
||||
quote (pretty_name (&(f[i]))));
|
||||
hash_free (wd_to_name);
|
||||
|
||||
errno = 0;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/* check for new data. */
|
||||
check_fspec (&f[i], f[i].wd, &prev_wd);
|
||||
}
|
||||
}
|
||||
|
||||
evlen += sizeof (struct inotify_event) + 1;
|
||||
@ -1840,7 +1874,7 @@ tail_file (struct File_spec *f, uintmax_t n_units)
|
||||
{
|
||||
struct stat stats;
|
||||
|
||||
#if TEST_RACE_BETWEEN_FINAL_READ_AND_INITIAL_FSTAT
|
||||
#if TAIL_TEST_SLEEP
|
||||
/* Before the tail function provided 'read_pos', there was
|
||||
a race condition described in the URL below. This sleep
|
||||
call made the window big enough to exercise the problem. */
|
||||
@ -2286,7 +2320,7 @@ main (int argc, char **argv)
|
||||
if (fflush (stdout) != 0)
|
||||
error (EXIT_FAILURE, errno, _("write error"));
|
||||
|
||||
if (!tail_forever_inotify (wd, F, n_files, sleep_interval))
|
||||
if (! tail_forever_inotify (wd, F, n_files, sleep_interval))
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
error (0, errno, _("inotify cannot be used, reverting to polling"));
|
||||
|
@ -161,6 +161,7 @@ check-root:
|
||||
all_tests = \
|
||||
tests/misc/help-version.sh \
|
||||
tests/tail-2/inotify-race.sh \
|
||||
tests/tail-2/inotify-race2.sh \
|
||||
tests/misc/invalid-opt.pl \
|
||||
tests/rm/ext3-perf.sh \
|
||||
tests/rm/cycle.sh \
|
||||
|
103
tests/tail-2/inotify-race2.sh
Executable file
103
tests/tail-2/inotify-race2.sh
Executable file
@ -0,0 +1,103 @@
|
||||
#!/bin/sh
|
||||
# Ensure that tail does not ignore a tailed-forever file that has been
|
||||
# replaced between tail's initial read-to-EOF, and when the inotify watches
|
||||
# are established in tail_forever_inotify. That new file would be ignored
|
||||
# indefinitely.
|
||||
|
||||
# Copyright (C) 2015 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
|
||||
print_ver_ tail
|
||||
|
||||
# Terminate any background gdb/tail process
|
||||
cleanup_() {
|
||||
kill $pid 2>/dev/null && wait $pid
|
||||
kill $sleep 2>/dev/null && wait $sleep
|
||||
}
|
||||
|
||||
touch file || framework_failure_
|
||||
touch tail.out || framework_failure_
|
||||
|
||||
( timeout 10s gdb --version ) > gdb.out 2>&1
|
||||
case $(cat gdb.out) in
|
||||
*'GNU gdb'*) ;;
|
||||
*) skip_ "can't run gdb";;
|
||||
esac
|
||||
|
||||
# Break on a line rather than a symbol, to cater for inline functions
|
||||
break_src="$abs_top_srcdir/src/tail.c"
|
||||
break_line=$(grep -n ^tail_forever_inotify "$break_src") || framework_failure_
|
||||
break_line=$(echo "$break_line" | cut -d: -f1) || framework_failure_
|
||||
|
||||
|
||||
# Note we get tail to monitor a background sleep process
|
||||
# rather than using timeout(1), as timeout sends SIGCONT
|
||||
# signals to its monitored process, and gdb (7.9 at least)
|
||||
# has _intermittent_ issues with this.
|
||||
# Sending SIGCONT resulted in either delayed child termination,
|
||||
# or no child termination resulting in a hung test.
|
||||
# See https://sourceware.org/bugzilla/show_bug.cgi?id=18364
|
||||
|
||||
env sleep 10 & sleep=$!
|
||||
|
||||
# See if gdb works and
|
||||
# tail_forever_inotify is compiled and run
|
||||
gdb -nx --batch-silent \
|
||||
--eval-command="break $break_line" \
|
||||
--eval-command="run --pid=$sleep -f file" \
|
||||
--eval-command='quit' \
|
||||
tail < /dev/null > gdb.out 2>&1
|
||||
|
||||
kill $sleep || skip_ 'breakpoint not hit'
|
||||
wait $sleep
|
||||
|
||||
# FIXME: The above is seen to _intermittently_ fail with:
|
||||
# warning: .dynamic section for "/lib/libc.so.6" is not at the expected address
|
||||
# warning: difference appears to be caused by prelink, adjusting expectations
|
||||
compare /dev/null gdb.out || skip_ "can't set breakpoints in tail"
|
||||
|
||||
env sleep 10 & sleep=$!
|
||||
|
||||
echo never-seen-with-tail-8.23 > file.new || framework_failure_
|
||||
|
||||
# Run "tail -F file", stopping to replace with a new file before
|
||||
# inotify initialization, and then continue. Before the fix,
|
||||
# changes to the new file would effectively be ignored.
|
||||
gdb -nx --batch-silent \
|
||||
--eval-command="break $break_line" \
|
||||
--eval-command="run --pid=$sleep -F file 2>tail.err >>tail.out" \
|
||||
--eval-command='shell mv file.new file' \
|
||||
--eval-command='continue' \
|
||||
--eval-command='quit' \
|
||||
tail < /dev/null > /dev/null 2>&1 & pid=$!
|
||||
|
||||
# Note even updating the watched 'file' wouldn't have output
|
||||
# anything between coreutils 7.5 and 8.23 inclusive as
|
||||
# The old file descriptor (still held open by tail) was being fstat().
|
||||
|
||||
tail --pid=$pid -f tail.out | (read; kill $pid)
|
||||
|
||||
# gdb has a bug in Debian's gdb-6.8-3 at least that causes it to not
|
||||
# cleanup and exit correctly when it receives a SIGTERM, but
|
||||
# killing sleep, should cause the tail process and thus gdb to exit.
|
||||
kill $sleep
|
||||
wait $sleep
|
||||
|
||||
wait $pid
|
||||
|
||||
compare /dev/null tail.out && { cat tail.err; fail=1; }
|
||||
|
||||
Exit $fail
|
@ -22,7 +22,11 @@ print_ver_ tail
|
||||
grep '^#define HAVE_INOTIFY 1' "$CONFIG_HEADER" >/dev/null \
|
||||
|| skip_ 'inotify required'
|
||||
|
||||
require_strace_ inotify_rm_watch
|
||||
require_strace_ 'inotify_add_watch,inotify_rm_watch'
|
||||
|
||||
# Quickly skip on remote file systems
|
||||
df --local . >/dev/null 2>&1 ||
|
||||
skip_ 'inotify not used on remote file system'
|
||||
|
||||
check_tail_output()
|
||||
{
|
||||
@ -59,7 +63,7 @@ touch k || framework_failure_
|
||||
# however without it strace will ignore SIGTERM.
|
||||
# strace does always honor SIGTERM with the -I2 option,
|
||||
# though that's not available on RHEL6 for example.
|
||||
timeout 180 strace -e inotify_rm_watch -o strace.out \
|
||||
timeout 180 strace -e inotify_add_watch,inotify_rm_watch -o strace.out \
|
||||
tail -F $fastpoll k >> out 2>&1 & pid=$!
|
||||
|
||||
reverted_to_polling_=0
|
||||
@ -67,19 +71,24 @@ for i in $(seq 2); do
|
||||
echo $i
|
||||
|
||||
echo 'tailed' > k;
|
||||
# wait for 'tailed' in (after first iteration; new) file and then in 'out'
|
||||
grep_timeout 'tailed' || { cleanup_fail 'failed to find "tailed"'; break; }
|
||||
|
||||
# Wait for watch on (new) file
|
||||
strace_re='inotify_add_watch.*MODIFY' retry_delay_ check_strace .1 8 ||
|
||||
no_watch_=1
|
||||
|
||||
# Assume this is not because we're leaking
|
||||
# (resources may already be depleted)
|
||||
# The explicit check for inotify_rm_watch should confirm that.
|
||||
grep -F 'reverting to polling' out >/dev/null && skip_ 'inotify unused'
|
||||
|
||||
# Otherwise failure is unknown
|
||||
test "$no_watch_" && { cat out; framework_failure_ 'no inotify_add_watch'; }
|
||||
|
||||
mv k k.tmp
|
||||
# wait for tail to detect the rename
|
||||
grep_timeout 'inaccessible' ||
|
||||
{ cleanup_fail 'failed to detect rename'; break; }
|
||||
|
||||
# Assume this is not because we're leaking.
|
||||
# The explicit check for inotify_rm_watch should confirm that.
|
||||
grep -F 'reverting to polling' out >/dev/null &&
|
||||
{ reverted_to_polling_=1; break; }
|
||||
|
||||
# Note we strace here rather than consuming all available watches
|
||||
# to be more efficient, but more importantly avoid depleting resources.
|
||||
# Note also available resources can currently be tuned with:
|
||||
@ -95,8 +104,6 @@ for i in $(seq 2); do
|
||||
>out && >strace.out || framework_failure_ 'failed to reset output files'
|
||||
done
|
||||
|
||||
test "$reverted_to_polling_" = 1 && skip_ 'inotify resources already depleted'
|
||||
|
||||
cleanup_
|
||||
|
||||
Exit $fail
|
||||
|
Loading…
Reference in New Issue
Block a user