tail: fix inotify startup races

The previous fixes to races in the various tail tests,
identified actual races in the tail inotify implementation.
With --follow=descriptor, if the tailed file was replaced before
the inotify watch was added, then any subsequent changes were ignored.
Similarly in --follow=name mode, all changes to a new name were
effectively ignored if that name was created after the original open()
but before the inotify_add_watch().

* src/tail.c (tail_forever_inotify): Fix 3 cases.
1. With -f, don't stop tailing when file removed before watch.
2. With -f, watch right file when file replaced before watch.
3. With -F, inspect correct file when replaced before watch.
Existing tests identify these when tail compiled with TAIL_TEST_SLEEP.
* tests/tail-2/inotify-rotate-resources.sh:
This test also identifies the issue with --follow=name
when TAIL_TEST_SLEEP is used.  Adjust so the test is immune
to such races, and also fail quicker on remote file systems.
* tests/tail-2/inotify-race2.sh: A new test using GDB,
based on inotify-race.sh, which tests the -F race
without needed recompilation with sleeps.
* tests/local.mk: Reference the new test.
* NEWS: Mention the bug.
This commit is contained in:
Pádraig Brady 2015-05-05 22:11:24 +01:00
parent 9069f4ec85
commit cf06a872e7
5 changed files with 172 additions and 23 deletions

4
NEWS
View File

@ -45,6 +45,10 @@ GNU coreutils NEWS -*- outline -*-
tail -f again follows changes to a file after it's renamed.
[bug introduced in coreutils-7.5]
tail --follow no longer misses changes to files if those files were
replaced before inotify watches were created.
[bug introduced in coreutils-7.5]
** New features
chroot accepts the new --skip-chdir option to not change the working directory

View File

@ -925,12 +925,10 @@ fremote (int fd, const char *name)
# define fremote(fd, name) false
#endif
/* FIXME: describe */
/* open/fstat F->name and handle changes. */
static void
recheck (struct File_spec *f, bool blocking)
{
/* open/fstat the file and announce if dev/ino have changed */
struct stat new_stats;
bool ok = true;
bool is_stdin = (STREQ (f->name, "-"));
@ -1312,7 +1310,7 @@ wd_comparator (const void *e1, const void *e2)
return spec1->wd == spec2->wd;
}
/* Helper function used by 'tail_forever_inotify'. */
/* Output (new) data for FSPEC->fd. */
static void
check_fspec (struct File_spec *fspec, int wd, int *prev_wd)
{
@ -1364,12 +1362,18 @@ static bool
tail_forever_inotify (int wd, struct File_spec *f, size_t n_files,
double sleep_interval)
{
# if TAIL_TEST_SLEEP
/* Delay between open() and inotify_add_watch()
to help trigger different cases. */
xnanosleep (1000000);
# endif
unsigned int max_realloc = 3;
/* Map an inotify watch descriptor to the name of the file it's watching. */
Hash_table *wd_to_name;
bool found_watchable_file = false;
bool tailed_but_unwatchable = false;
bool found_unwatchable_dir = false;
bool no_inotify_resources = false;
bool writer_is_dead = false;
@ -1438,6 +1442,8 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t n_files,
if (f[i].wd < 0)
{
if (f[i].fd != -1) /* already tailed. */
tailed_but_unwatchable = true;
if (errno == ENOSPC || errno == ENOMEM)
{
no_inotify_resources = true;
@ -1459,8 +1465,10 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t n_files,
/* Linux kernel 2.6.24 at least has a bug where eventually, ENOSPC is always
returned by inotify_add_watch. In any case we should revert to polling
when there are no inotify resources. Also a specified directory may not
be currently present or accessible, so revert to polling. */
if (no_inotify_resources || found_unwatchable_dir)
be currently present or accessible, so revert to polling. Also an already
tailed but unwatchable due rename/unlink race, should also revert. */
if (no_inotify_resources || found_unwatchable_dir
|| (follow_mode == Follow_descriptor && tailed_but_unwatchable))
{
hash_free (wd_to_name);
@ -1472,12 +1480,38 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t n_files,
prev_wd = f[n_files - 1].wd;
/* Check files again. New data can be available since last time we checked
and before they are watched by inotify. */
/* Check files again. New files or data can be available since last time we
checked and before they are watched by inotify. */
for (i = 0; i < n_files; i++)
{
if (!f[i].ignore)
check_fspec (&f[i], f[i].wd, &prev_wd);
if (! f[i].ignore)
{
/* check for new files. */
if (follow_mode == Follow_name)
recheck (&(f[i]), false);
else if (f[i].fd != -1)
{
/* If the file was replaced in the small window since we tailed,
then assume the watch is on the wrong item (different to
that we've already produced output for), and so revert to
polling the original descriptor. */
struct stat stats;
if (stat (f[i].name, &stats) == 0
&& (f[i].dev != stats.st_dev || f[i].ino != stats.st_ino))
{
error (0, errno, _("%s was replaced"),
quote (pretty_name (&(f[i]))));
hash_free (wd_to_name);
errno = 0;
return true;
}
}
/* check for new data. */
check_fspec (&f[i], f[i].wd, &prev_wd);
}
}
evlen += sizeof (struct inotify_event) + 1;
@ -1840,7 +1874,7 @@ tail_file (struct File_spec *f, uintmax_t n_units)
{
struct stat stats;
#if TEST_RACE_BETWEEN_FINAL_READ_AND_INITIAL_FSTAT
#if TAIL_TEST_SLEEP
/* Before the tail function provided 'read_pos', there was
a race condition described in the URL below. This sleep
call made the window big enough to exercise the problem. */
@ -2286,7 +2320,7 @@ main (int argc, char **argv)
if (fflush (stdout) != 0)
error (EXIT_FAILURE, errno, _("write error"));
if (!tail_forever_inotify (wd, F, n_files, sleep_interval))
if (! tail_forever_inotify (wd, F, n_files, sleep_interval))
return EXIT_FAILURE;
}
error (0, errno, _("inotify cannot be used, reverting to polling"));

View File

@ -161,6 +161,7 @@ check-root:
all_tests = \
tests/misc/help-version.sh \
tests/tail-2/inotify-race.sh \
tests/tail-2/inotify-race2.sh \
tests/misc/invalid-opt.pl \
tests/rm/ext3-perf.sh \
tests/rm/cycle.sh \

103
tests/tail-2/inotify-race2.sh Executable file
View File

@ -0,0 +1,103 @@
#!/bin/sh
# Ensure that tail does not ignore a tailed-forever file that has been
# replaced between tail's initial read-to-EOF, and when the inotify watches
# are established in tail_forever_inotify. That new file would be ignored
# indefinitely.
# Copyright (C) 2015 Free Software Foundation, Inc.
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
print_ver_ tail
# Terminate any background gdb/tail process
cleanup_() {
kill $pid 2>/dev/null && wait $pid
kill $sleep 2>/dev/null && wait $sleep
}
touch file || framework_failure_
touch tail.out || framework_failure_
( timeout 10s gdb --version ) > gdb.out 2>&1
case $(cat gdb.out) in
*'GNU gdb'*) ;;
*) skip_ "can't run gdb";;
esac
# Break on a line rather than a symbol, to cater for inline functions
break_src="$abs_top_srcdir/src/tail.c"
break_line=$(grep -n ^tail_forever_inotify "$break_src") || framework_failure_
break_line=$(echo "$break_line" | cut -d: -f1) || framework_failure_
# Note we get tail to monitor a background sleep process
# rather than using timeout(1), as timeout sends SIGCONT
# signals to its monitored process, and gdb (7.9 at least)
# has _intermittent_ issues with this.
# Sending SIGCONT resulted in either delayed child termination,
# or no child termination resulting in a hung test.
# See https://sourceware.org/bugzilla/show_bug.cgi?id=18364
env sleep 10 & sleep=$!
# See if gdb works and
# tail_forever_inotify is compiled and run
gdb -nx --batch-silent \
--eval-command="break $break_line" \
--eval-command="run --pid=$sleep -f file" \
--eval-command='quit' \
tail < /dev/null > gdb.out 2>&1
kill $sleep || skip_ 'breakpoint not hit'
wait $sleep
# FIXME: The above is seen to _intermittently_ fail with:
# warning: .dynamic section for "/lib/libc.so.6" is not at the expected address
# warning: difference appears to be caused by prelink, adjusting expectations
compare /dev/null gdb.out || skip_ "can't set breakpoints in tail"
env sleep 10 & sleep=$!
echo never-seen-with-tail-8.23 > file.new || framework_failure_
# Run "tail -F file", stopping to replace with a new file before
# inotify initialization, and then continue. Before the fix,
# changes to the new file would effectively be ignored.
gdb -nx --batch-silent \
--eval-command="break $break_line" \
--eval-command="run --pid=$sleep -F file 2>tail.err >>tail.out" \
--eval-command='shell mv file.new file' \
--eval-command='continue' \
--eval-command='quit' \
tail < /dev/null > /dev/null 2>&1 & pid=$!
# Note even updating the watched 'file' wouldn't have output
# anything between coreutils 7.5 and 8.23 inclusive as
# The old file descriptor (still held open by tail) was being fstat().
tail --pid=$pid -f tail.out | (read; kill $pid)
# gdb has a bug in Debian's gdb-6.8-3 at least that causes it to not
# cleanup and exit correctly when it receives a SIGTERM, but
# killing sleep, should cause the tail process and thus gdb to exit.
kill $sleep
wait $sleep
wait $pid
compare /dev/null tail.out && { cat tail.err; fail=1; }
Exit $fail

View File

@ -22,7 +22,11 @@ print_ver_ tail
grep '^#define HAVE_INOTIFY 1' "$CONFIG_HEADER" >/dev/null \
|| skip_ 'inotify required'
require_strace_ inotify_rm_watch
require_strace_ 'inotify_add_watch,inotify_rm_watch'
# Quickly skip on remote file systems
df --local . >/dev/null 2>&1 ||
skip_ 'inotify not used on remote file system'
check_tail_output()
{
@ -59,7 +63,7 @@ touch k || framework_failure_
# however without it strace will ignore SIGTERM.
# strace does always honor SIGTERM with the -I2 option,
# though that's not available on RHEL6 for example.
timeout 180 strace -e inotify_rm_watch -o strace.out \
timeout 180 strace -e inotify_add_watch,inotify_rm_watch -o strace.out \
tail -F $fastpoll k >> out 2>&1 & pid=$!
reverted_to_polling_=0
@ -67,19 +71,24 @@ for i in $(seq 2); do
echo $i
echo 'tailed' > k;
# wait for 'tailed' in (after first iteration; new) file and then in 'out'
grep_timeout 'tailed' || { cleanup_fail 'failed to find "tailed"'; break; }
# Wait for watch on (new) file
strace_re='inotify_add_watch.*MODIFY' retry_delay_ check_strace .1 8 ||
no_watch_=1
# Assume this is not because we're leaking
# (resources may already be depleted)
# The explicit check for inotify_rm_watch should confirm that.
grep -F 'reverting to polling' out >/dev/null && skip_ 'inotify unused'
# Otherwise failure is unknown
test "$no_watch_" && { cat out; framework_failure_ 'no inotify_add_watch'; }
mv k k.tmp
# wait for tail to detect the rename
grep_timeout 'inaccessible' ||
{ cleanup_fail 'failed to detect rename'; break; }
# Assume this is not because we're leaking.
# The explicit check for inotify_rm_watch should confirm that.
grep -F 'reverting to polling' out >/dev/null &&
{ reverted_to_polling_=1; break; }
# Note we strace here rather than consuming all available watches
# to be more efficient, but more importantly avoid depleting resources.
# Note also available resources can currently be tuned with:
@ -95,8 +104,6 @@ for i in $(seq 2); do
>out && >strace.out || framework_failure_ 'failed to reset output files'
done
test "$reverted_to_polling_" = 1 && skip_ 'inotify resources already depleted'
cleanup_
Exit $fail