tail: avoid a race where we could miss new data with --pid

* src/tail.c (tail_forever, tail_forever_inotify): Close a race in tail_forever_inotify where new data written after the file check by a now dead process, but before the pid check, is not output. We use the POSIX guarantee that read() and write() are serialized wrt each other even in separate processes, to assume full file consistency after exit() and so poll for new data _after_ the writer has exited. This also allows us to not redundantly _wait_ for new data if the process is dead. * tests/tail-2/pid: Remove the now partially invalid sub second sleep check as we now don't unconditionally wait, and replace it with a check for the redundant sleep. Also clarify some of the existing comments. * NEWS: Mention the fix.
2024-11-24 10:23:31 +08:00 · 2009-10-01 08:36:25 +01:00 · 2009-10-01 08:36:25 +01:00 · f8726e05c4
commit f8726e05c4
parent 95c01c656e
3 changed files with 39 additions and 30 deletions
--- a/6
+++ b/6
@ -22,6 +22,12 @@ GNU coreutils NEWS                                    -*- outline -*-
  from a failed stat/lstat.  For example ls -Lis now prints "?", not "0",
  for the inode number and allocated size of a dereferenced dangling symlink.

+  tail --follow --pid now avoids a race condition where data written
+  just before the process dies might not have been output by tail.
+  Also, tail no longer delays at all when the specified pid is not live.
+  [The race was introduced in coreutils-7.5,
+   and the unnecessary delay was present since textutils-1.22o]
+
 ** Portability

  On Solaris 9, many commands would mistakenly treat file/ the same as
--- a/src/tail.c
+++ b/src/tail.c
@ -1135,9 +1135,6 @@ tail_forever (struct File_spec *f, size_t n_files, double sleep_interval)
          if (writer_is_dead)
            break;

-          if (xnanosleep (sleep_interval))
-            error (EXIT_FAILURE, errno, _("cannot read realtime clock"));
-
          /* Once the writer is dead, read the files once more to
             avoid a race condition.  */
          writer_is_dead = (pid != 0
@ -1146,6 +1143,10 @@ tail_forever (struct File_spec *f, size_t n_files, double sleep_interval)
                               signal to the writer, so kill fails and sets
                               errno to EPERM.  */
                            && errno != EPERM);
+
+          if (!writer_is_dead && xnanosleep (sleep_interval))
+            error (EXIT_FAILURE, errno, _("cannot read realtime clock"));
+
        }
    }
 }
@ -1179,6 +1180,7 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t n_files,
  Hash_table *wd_table;

  bool found_watchable = false;
+  bool writer_is_dead = false;
  int prev_wd;
  size_t evlen = 0;
  char *evbuf;
@ -1266,30 +1268,30 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t n_files,
         indefinetely.  */
      if (pid)
        {
-          fd_set rfd;
-          struct timeval select_timeout;
-          int n_descriptors;
+          if (writer_is_dead)
+            exit (EXIT_SUCCESS);

-          FD_ZERO (&rfd);
-          FD_SET (wd, &rfd);
+          writer_is_dead = (kill (pid, 0) != 0 && errno != EPERM);

-          select_timeout.tv_sec = (time_t) sleep_interval;
-          select_timeout.tv_usec = 1000000 * (sleep_interval
-                                              - select_timeout.tv_sec);
-
-          n_descriptors = select (wd + 1, &rfd, NULL, NULL, &select_timeout);
-
-          if (n_descriptors == -1)
-            error (EXIT_FAILURE, errno, _("error monitoring inotify event"));
-
-          if (n_descriptors == 0)
+          struct timeval delay; /* how long to wait for file changes.  */
+          if (writer_is_dead)
+            delay.tv_sec = delay.tv_usec = 0;
+          else
            {
-              /* See if the process we are monitoring is still alive.  */
-              if (kill (pid, 0) != 0 && errno != EPERM)
-                exit (EXIT_SUCCESS);
-
-              continue;
+              delay.tv_sec = (time_t) sleep_interval;
+              delay.tv_usec = 1000000 * (sleep_interval - delay.tv_sec);
            }
+
+           fd_set rfd;
+           FD_ZERO (&rfd);
+           FD_SET (wd, &rfd);
+
+           int file_change = select (wd + 1, &rfd, NULL, NULL, &delay);
+
+           if (file_change == 0)
+             continue;
+           else if (file_change == -1)
+             error (EXIT_FAILURE, errno, _("error monitoring inotify event"));
        }

      if (len <= evbuf_off)
--- a/tests/tail-2/pid
+++ b/tests/tail-2/pid
@ -29,26 +29,27 @@ touch here || framework_failure
 fail=0

 for inotify in ---disable-inotify ''; do
-  # Use tail itself to create a background process to monitor.
+  # Use tail itself to create a background process to monitor,
+  # which will auto exit when "here" is removed.
  tail -f $inotify here &
  bg_pid=$!

  # Ensure that tail --pid=PID does not exit when PID is alive.
-  timeout 1 tail -s.1 -f $inotify here --pid=$bg_pid
+  timeout 1 tail -f -s.1 --pid=$bg_pid $inotify here
  test $? = 124 || fail=1

  # Cleanup background process
  kill $bg_pid

-  # Ensure that tail --pid=PID exits successfully when PID is dead.
+  # Ensure that tail --pid=PID exits with success status when PID is dead.
  # Use an unlikely-to-be-live PID
-  timeout 3 tail -s.1 --pid=$PID_T_MAX -f $inotify /dev/null
+  timeout 3 tail -f -s.1 --pid=$PID_T_MAX $inotify /dev/null
  ret=$?
-  test $ret = 124 && skip_test_ "pid $PID_T_MAX present"
+  test $ret = 124 && skip_test_ "pid $PID_T_MAX present or tail too slow"
  test $ret = 0 || fail=1

-  # Ensure fractional sleep parameter is honored with --pid
-  timeout 3 tail -s.1 -f $inotify /dev/null --pid=$PID_T_MAX
+  # Ensure tail doesn't wait for data when PID is dead
+  timeout 3 tail -f -s10 --pid=$PID_T_MAX $inotify /dev/null
  test $? = 124 && fail=1
 done