wc: don't miscount /sys and similar file systems

Fix similar problems in head, od, split, tac, and tail. Reported by George Shuklin in: http://bugs.gnu.org/18621 * NEWS: Document this. * src/head.c (elseek): Move up. (elide_tail_bytes_pipe, elide_tail_lines_pipe): New arg CURRENT_POS. All uses changed. (elide_tail_bytes_file, elide_tail_lines_file): New arg ST and remove arg SIZE. All uses changed. * src/head.c (elide_tail_bytes_file): * src/od.c (skip): Avoid optimization for /sys files, where st_size is bogus and st_size == st_blksize. Don't report error at EOF when not optimizing. * src/head.c, src/od.c, src/tail.c: Include "stat-size.h". * src/split.c (input_file_size): New function. (bytes_split, lines_chunk_split, bytes_chunk_extract): New arg INITIAL_READ. All uses changed. Use it to double-check st_size. * src/tac.c (tac_seekable): New arg FILE_POS. All uses changed. (copy_to_temp): Return size of temp file. All uses changed. * src/tac.c (tac_seekable): * src/tail.c (tail_bytes): * src/wc.c (wc): Don't trust st_size; double-check by reading. * src/wc.c (wc): New arg CURRENT_POS. All uses changed. * tests/local.mk (all_tests): Add tests/misc/wc-proc.sh, tests/misc/od-j.sh, tests/tail-2/tail-c.sh. * tests/misc/head-c.sh: * tests/misc/tac-2-nonseekable.sh: * tests/split/b-chunk.sh: Add tests for problems with /proc and /sys files. * tests/misc/od-j.sh, tests/misc/wc-proc.sh, tests/tail-2/tail-c.sh: New files.
2024-11-24 10:23:31 +08:00 · 2014-10-07 16:46:08 -07:00 · 2014-10-07 16:46:08 -07:00 · 2662702b9e
commit 2662702b9e
parent b020002b4b
14 changed files with 465 additions and 190 deletions
--- a/3
+++ b/3
@ -12,6 +12,9 @@ GNU coreutils NEWS                                    -*- outline -*-
  file types, a warning is issued for source directories with duplicate names,
  or with -H the directory is copied again using the symlink name.

+  head, od, split, tac, tail, and wc no longer mishandle input from files in
+  /proc and /sys file systems that report somewhat-incorrect file sizes.
+
 ** New features

  chroot accepts the new --skip-chdir option to not change the working directory
--- a/src/head.c
+++ b/src/head.c
@ -36,6 +36,7 @@
 #include "quote.h"
 #include "quotearg.h"
 #include "safe-read.h"
+#include "stat-size.h"
 #include "xfreopen.h"
 #include "xstrtol.h"

@ -206,13 +207,42 @@ copy_fd (int src_fd, uintmax_t n_bytes)
  return COPY_FD_OK;
 }

-/* Print all but the last N_ELIDE bytes from the input available via
-   the non-seekable file descriptor FD.  Return true upon success.
+/* Call lseek (FD, OFFSET, WHENCE), where file descriptor FD
+   corresponds to the file FILENAME.  WHENCE must be SEEK_SET or
+   SEEK_CUR.  Return the resulting offset.  Give a diagnostic and
+   return -1 if lseek fails.  */
+
+static off_t
+elseek (int fd, off_t offset, int whence, char const *filename)
+{
+  off_t new_offset = lseek (fd, offset, whence);
+  char buf[INT_BUFSIZE_BOUND (offset)];
+
+  if (new_offset < 0)
+    error (0, errno,
+           _(whence == SEEK_SET
+             ? N_("%s: cannot seek to offset %s")
+             : N_("%s: cannot seek to relative offset %s")),
+           quotearg_colon (filename),
+           offtostr (offset, buf));
+
+  return new_offset;
+}
+
+/* For an input file with name FILENAME and descriptor FD,
+   output all but the last N_ELIDE_0 bytes.
+   If CURRENT_POS is nonnegative, assume that the input file is
+   positioned at CURRENT_POS and that it should be repositioned to
+   just before the elided bytes before returning.
+   Return true upon success.
   Give a diagnostic and return false upon error.  */
 static bool
-elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0)
+elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0,
+                       off_t current_pos)
 {
  size_t n_elide = n_elide_0;
+  uintmax_t desired_pos = current_pos;
+  bool ok = true;

 #ifndef HEAD_TAIL_PIPE_READ_BUFSIZE
 # define HEAD_TAIL_PIPE_READ_BUFSIZE BUFSIZ
@ -251,7 +281,6 @@ elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0)

  if (n_elide <= HEAD_TAIL_PIPE_BYTECOUNT_THRESHOLD)
    {
-      bool ok = true;
      bool first = true;
      bool eof = false;
      size_t n_to_read = READ_BUFSIZE + n_elide;
@ -293,22 +322,26 @@ elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0)
          /* Output any (but maybe just part of the) elided data from
             the previous round.  */
          if (! first)
-            xwrite_stdout (b[!i] + READ_BUFSIZE, n_elide - delta);
+            {
+              desired_pos += n_elide - delta;
+              xwrite_stdout (b[!i] + READ_BUFSIZE, n_elide - delta);
+            }
          first = false;

          if (n_elide < n_read)
-            xwrite_stdout (b[i], n_read - n_elide);
+            {
+              desired_pos += n_read - n_elide;
+              xwrite_stdout (b[i], n_read - n_elide);
+            }
        }

      free (b[0]);
-      return ok;
    }
  else
    {
      /* Read blocks of size READ_BUFSIZE, until we've read at least n_elide
         bytes.  Then, for each new buffer we read, also write an old one.  */

-      bool ok = true;
      bool eof = false;
      size_t n_read;
      bool buffered_enough;
@ -357,7 +390,10 @@ elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0)
            buffered_enough = true;

          if (buffered_enough)
-            xwrite_stdout (b[i_next], n_read);
+            {
+              desired_pos += n_read;
+              xwrite_stdout (b[i_next], n_read);
+            }
        }

      /* Output any remainder: rem bytes from b[i] + n_read.  */
@ -366,6 +402,7 @@ elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0)
          if (buffered_enough)
            {
              size_t n_bytes_left_in_b_i = READ_BUFSIZE - n_read;
+              desired_pos += rem;
              if (rem < n_bytes_left_in_b_i)
                {
                  xwrite_stdout (b[i] + n_read, rem);
@ -392,6 +429,7 @@ elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0)
               */
              size_t y = READ_BUFSIZE - rem;
              size_t x = n_read - y;
+              desired_pos += x;
              xwrite_stdout (b[i_next], x);
            }
        }
@ -400,36 +438,16 @@ elide_tail_bytes_pipe (const char *filename, int fd, uintmax_t n_elide_0)
      for (i = 0; i < n_alloc; i++)
        free (b[i]);
      free (b);
-
-      return ok;
    }
-}

-/* Call lseek (FD, OFFSET, WHENCE), where file descriptor FD
-   corresponds to the file FILENAME.  WHENCE must be SEEK_SET or
-   SEEK_CUR.  Return the resulting offset.  Give a diagnostic and
-   return -1 if lseek fails.  */
-
-static off_t
-elseek (int fd, off_t offset, int whence, char const *filename)
-{
-  off_t new_offset = lseek (fd, offset, whence);
-  char buf[INT_BUFSIZE_BOUND (offset)];
-
-  if (new_offset < 0)
-    error (0, errno,
-           _(whence == SEEK_SET
-             ? N_("%s: cannot seek to offset %s")
-             : N_("%s: cannot seek to relative offset %s")),
-           quotearg_colon (filename),
-           offtostr (offset, buf));
-
-  return new_offset;
+  if (0 <= current_pos && elseek (fd, desired_pos, SEEK_SET, filename) < 0)
+    ok = false;
+  return ok;
 }

 /* For the file FILENAME with descriptor FD, output all but the last N_ELIDE
   bytes.  If SIZE is nonnegative, this is a regular file positioned
-   at START_POS with SIZE bytes.  Return true on success.
+   at CURRENT_POS with SIZE bytes.  Return true on success.
   Give a diagnostic and return false upon error.  */

 /* NOTE: if the input file shrinks by more than N_ELIDE bytes between
@ -437,10 +455,11 @@ elseek (int fd, off_t offset, int whence, char const *filename)

 static bool
 elide_tail_bytes_file (const char *filename, int fd, uintmax_t n_elide,
-                       off_t current_pos, off_t size)
+                       struct stat const *st, off_t current_pos)
 {
-  if (size < 0)
-    return elide_tail_bytes_pipe (filename, fd, n_elide);
+  off_t size = st->st_size;
+  if (size <= ST_BLKSIZE (*st))
+    return elide_tail_bytes_pipe (filename, fd, n_elide, current_pos);
  else
    {
      /* Be careful here.  The current position may actually be
@ -460,13 +479,16 @@ elide_tail_bytes_file (const char *filename, int fd, uintmax_t n_elide,
    }
 }

-/* Print all but the last N_ELIDE lines from the input stream
-   open for reading via file descriptor FD.
+/* For an input file with name FILENAME and descriptor FD,
+   output all but the last N_ELIDE_0 bytes.
+   If CURRENT_POS is nonnegative, the input file is positioned there
+   and should be repositioned to just before the elided bytes.
   Buffer the specified number of lines as a linked list of LBUFFERs,
   adding them as needed.  Return true if successful.  */

 static bool
-elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide)
+elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide,
+                       off_t current_pos)
 {
  struct linebuffer
  {
@ -475,6 +497,7 @@ elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide)
    size_t nlines;
    struct linebuffer *next;
  };
+  uintmax_t desired_pos = current_pos;
  typedef struct linebuffer LBUFFER;
  LBUFFER *first, *last, *tmp;
  size_t total_lines = 0;	/* Total number of newlines in all buffers.  */
@ -497,6 +520,7 @@ elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide)

      if (! n_elide)
        {
+          desired_pos += n_read;
          xwrite_stdout (tmp->buffer, n_read);
          continue;
        }
@ -536,6 +560,7 @@ elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide)
          last = last->next = tmp;
          if (n_elide < total_lines - first->nlines)
            {
+              desired_pos += first->nbytes;
              xwrite_stdout (first->buffer, first->nbytes);
              tmp = first;
              total_lines -= first->nlines;
@ -565,6 +590,7 @@ elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide)

  for (tmp = first; n_elide < total_lines - tmp->nlines; tmp = tmp->next)
    {
+      desired_pos += tmp->nbytes;
      xwrite_stdout (tmp->buffer, tmp->nbytes);
      total_lines -= tmp->nlines;
    }
@ -581,6 +607,7 @@ elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide)
          ++tmp->nlines;
          --n;
        }
+      desired_pos += p - tmp->buffer;
      xwrite_stdout (tmp->buffer, p - tmp->buffer);
    }

@ -591,6 +618,9 @@ free_lbuffers:
      free (first);
      first = tmp;
    }
+
+  if (0 <= current_pos && elseek (fd, desired_pos, SEEK_SET, filename) < 0)
+    ok = false;
  return ok;
 }

@ -714,10 +744,11 @@ elide_tail_lines_seekable (const char *pretty_filename, int fd,

 static bool
 elide_tail_lines_file (const char *filename, int fd, uintmax_t n_elide,
-                       off_t current_pos, off_t size)
+                       struct stat const *st, off_t current_pos)
 {
-  if (size < 0)
-    return elide_tail_lines_pipe (filename, fd, n_elide);
+  off_t size = st->st_size;
+  if (size <= ST_BLKSIZE (*st))
+    return elide_tail_lines_pipe (filename, fd, n_elide, current_pos);
  else
    {
      /* Find the offset, OFF, of the Nth newline from the end,
@ -802,28 +833,24 @@ head (const char *filename, int fd, uintmax_t n_units, bool count_lines,

  if (elide_from_end)
    {
-      off_t current_pos = -1, size = -1;
-      if (! presume_input_pipe)
+      off_t current_pos = -1;
+      struct stat st;
+      if (fstat (fd, &st) != 0)
        {
-          struct stat st;
-          if (fstat (fd, &st) != 0)
-            {
-              error (0, errno, _("cannot fstat %s"),
-                     quotearg_colon (filename));
-              return false;
-            }
-          if (S_ISREG (st.st_mode))
-            {
-              size = st.st_size;
-              current_pos = elseek (fd, 0, SEEK_CUR, filename);
-              if (current_pos < 0)
-                return false;
-            }
+          error (0, errno, _("cannot fstat %s"),
+                 quotearg_colon (filename));
+          return false;
+        }
+      if (! presume_input_pipe && usable_st_size (&st))
+        {
+          current_pos = elseek (fd, 0, SEEK_CUR, filename);
+          if (current_pos < 0)
+            return false;
        }
      if (count_lines)
-        return elide_tail_lines_file (filename, fd, n_units, current_pos, size);
+        return elide_tail_lines_file (filename, fd, n_units, &st, current_pos);
      else
-        return elide_tail_bytes_file (filename, fd, n_units, current_pos, size);
+        return elide_tail_bytes_file (filename, fd, n_units, &st, current_pos);
    }
  if (count_lines)
    return head_lines (filename, fd, n_units);
--- a/src/od.c
+++ b/src/od.c
@ -27,6 +27,7 @@
 #include "error.h"
 #include "ftoastr.h"
 #include "quote.h"
+#include "stat-size.h"
 #include "xfreopen.h"
 #include "xprintf.h"
 #include "xstrtol.h"
@ -1034,9 +1035,11 @@ skip (uintmax_t n_skip)
             If the number of bytes left to skip is larger than
             the size of the current file, we can decrement n_skip
             and go on to the next file.  Skip this optimization also
-             when st_size is 0, because some kernels report that
-             nonempty files in /proc have st_size == 0.  */
-          if (S_ISREG (file_stats.st_mode) && 0 < file_stats.st_size)
+             when st_size is no greater than the block size, because
+             some kernels report nonsense small file sizes for
+             proc-like file systems.  */
+          if (usable_st_size (&file_stats)
+              && ST_BLKSIZE (file_stats) < file_stats.st_size)
            {
              if ((uintmax_t) file_stats.st_size < n_skip)
                n_skip -= file_stats.st_size;
@ -1052,6 +1055,7 @@ skip (uintmax_t n_skip)
            }

          /* If it's not a regular file with nonnegative size,
+             or if it's so small that it might be in a proc-like file system,
             position the file pointer by reading.  */

          else
@ -1067,10 +1071,15 @@ skip (uintmax_t n_skip)
                  n_skip -= n_bytes_read;
                  if (n_bytes_read != n_bytes_to_read)
                    {
-                      in_errno = errno;
-                      ok = false;
-                      n_skip = 0;
-                      break;
+                      if (ferror (in_stream))
+                        {
+                          in_errno = errno;
+                          ok = false;
+                          n_skip = 0;
+                          break;
+                        }
+                      if (feof (in_stream))
+                        break;
                    }
                }
            }
--- a/src/split.c
+++ b/src/split.c
@ -246,6 +246,37 @@ r/K/N   likewise but only output Kth of N to stdout\n\
  exit (status);
 }

+/* Return the number of bytes that can be read from FD, a file with
+   apparent size SIZE.  Actually read the data into BUF (of size
+   BUFSIZE) if the file appears to be smaller than BUFSIZE, as this
+   works better on proc-like file systems.  If the returned value is
+   less than BUFSIZE, store all the file's data into BUF; otherwise,
+   restore the input file's position so that the file can be reread if
+   needed.  */
+
+static off_t
+input_file_size (int fd, off_t size, char *buf, size_t bufsize)
+{
+  if (size < bufsize)
+    {
+      size = 0;
+      while (true)
+        {
+          size_t save = size < bufsize ? size : 0;
+          size_t n_read = safe_read (fd, buf + save, bufsize - save);
+          if (n_read == 0)
+            break;
+          if (n_read == SAFE_READ_ERROR)
+            error (EXIT_FAILURE, errno, "%s", infile);
+          size += n_read;
+        }
+      if (bufsize <= size && lseek (fd, - size, SEEK_CUR) < 0)
+        error (EXIT_FAILURE, errno, "%s", infile);
+    }
+
+  return size;
+}
+
 /* Compute the next sequential output file name and store it into the
   string 'outfile'.  */

@ -511,10 +542,13 @@ cwrite (bool new_file_flag, const char *bp, size_t bytes)
 }

 /* Split into pieces of exactly N_BYTES bytes.
-   Use buffer BUF, whose size is BUFSIZE.  */
+   Use buffer BUF, whose size is BUFSIZE.
+   If INITIAL_READ != SIZE_MAX, the entire input file has already been
+   partly read into BUF and BUF contains INITIAL_READ input bytes.  */

 static void
-bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, uintmax_t max_files)
+bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, size_t initial_read,
+             uintmax_t max_files)
 {
  size_t n_read;
  bool new_file_flag = true;
@ -525,9 +559,17 @@ bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize, uintmax_t max_files)

  do
    {
-      n_read = safe_read (STDIN_FILENO, buf, bufsize);
-      if (n_read == SAFE_READ_ERROR)
-        error (EXIT_FAILURE, errno, "%s", infile);
+      if (initial_read != SIZE_MAX)
+        {
+          n_read = initial_read;
+          initial_read = SIZE_MAX;
+        }
+      else
+        {
+          n_read = safe_read (STDIN_FILENO, buf, bufsize);
+          if (n_read == SAFE_READ_ERROR)
+            error (EXIT_FAILURE, errno, "%s", infile);
+        }
      bp_out = buf;
      to_read = n_read;
      while (true)
@ -736,7 +778,7 @@ line_bytes_split (uintmax_t n_bytes, char *buf, size_t bufsize)

 static void
 lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
-                   off_t file_size)
+                   size_t initial_read, off_t file_size)
 {
  assert (n && k <= n && n <= file_size);

@ -751,7 +793,12 @@ lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
    {
      /* Start reading 1 byte before kth chunk of file.  */
      off_t start = (k - 1) * chunk_size - 1;
-      if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
+      if (initial_read != SIZE_MAX)
+        {
+          memmove (buf, buf + start, initial_read - start);
+          initial_read -= start;
+        }
+      else if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
        error (EXIT_FAILURE, errno, "%s", infile);
      n_written = start;
      chunk_no = k - 1;
@ -761,10 +808,19 @@ lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
  while (n_written < file_size)
    {
      char *bp = buf, *eob;
-      size_t n_read = safe_read (STDIN_FILENO, buf, bufsize);
-      if (n_read == SAFE_READ_ERROR)
-        error (EXIT_FAILURE, errno, "%s", infile);
-      else if (n_read == 0)
+      size_t n_read;
+      if (initial_read != SIZE_MAX)
+        {
+          n_read = initial_read;
+          initial_read = SIZE_MAX;
+        }
+      else
+        {
+          n_read = safe_read (STDIN_FILENO, buf, bufsize);
+          if (n_read == SAFE_READ_ERROR)
+            error (EXIT_FAILURE, errno, "%s", infile);
+        }
+      if (n_read == 0)
        break; /* eof.  */
      n_read = MIN (n_read, file_size - n_written);
      chunk_truncated = false;
@ -841,7 +897,7 @@ lines_chunk_split (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,

 static void
 bytes_chunk_extract (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
-                     off_t file_size)
+                     size_t initial_read, off_t file_size)
 {
  off_t start;
  off_t end;
@ -851,15 +907,29 @@ bytes_chunk_extract (uintmax_t k, uintmax_t n, char *buf, size_t bufsize,
  start = (k - 1) * (file_size / n);
  end = (k == n) ? file_size : k * (file_size / n);

-  if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
+  if (initial_read != SIZE_MAX)
+    {
+      memmove (buf, buf + start, initial_read - start);
+      initial_read -= start;
+    }
+  else if (lseek (STDIN_FILENO, start, SEEK_CUR) < 0)
    error (EXIT_FAILURE, errno, "%s", infile);

  while (start < end)
    {
-      size_t n_read = safe_read (STDIN_FILENO, buf, bufsize);
-      if (n_read == SAFE_READ_ERROR)
-        error (EXIT_FAILURE, errno, "%s", infile);
-      else if (n_read == 0)
+      size_t n_read;
+      if (initial_read != SIZE_MAX)
+        {
+          n_read = initial_read;
+          initial_read = SIZE_MAX;
+        }
+      else
+        {
+          n_read = safe_read (STDIN_FILENO, buf, bufsize);
+          if (n_read == SAFE_READ_ERROR)
+            error (EXIT_FAILURE, errno, "%s", infile);
+        }
+      if (n_read == 0)
        break; /* eof.  */
      n_read = MIN (n_read, end - start);
      if (full_write (STDOUT_FILENO, buf, n_read) != n_read
@ -1403,22 +1473,34 @@ main (int argc, char **argv)
  if (in_blk_size == 0)
    in_blk_size = io_blksize (in_stat_buf);

+  void *b = xmalloc (in_blk_size + 1 + page_size - 1);
+  char *buf = ptr_align (b, page_size);
+  size_t initial_read = SIZE_MAX;
+
  if (split_type == type_chunk_bytes || split_type == type_chunk_lines)
    {
      off_t input_offset = lseek (STDIN_FILENO, 0, SEEK_CUR);
-      if (usable_st_size (&in_stat_buf))
-        file_size = in_stat_buf.st_size;
-      else if (0 <= input_offset)
+      if (0 <= input_offset)
        {
-          file_size = lseek (STDIN_FILENO, 0, SEEK_END);
-          input_offset = (file_size < 0
-                          ? file_size
-                          : lseek (STDIN_FILENO, input_offset, SEEK_SET));
+          if (usable_st_size (&in_stat_buf))
+            {
+              file_size = input_file_size (STDIN_FILENO, in_stat_buf.st_size,
+                                           buf, in_blk_size);
+              if (file_size < in_blk_size)
+                initial_read = file_size;
+            }
+          else
+            {
+              file_size = lseek (STDIN_FILENO, 0, SEEK_END);
+              input_offset = (file_size < 0
+                              ? file_size
+                              : lseek (STDIN_FILENO, input_offset, SEEK_SET));
+              file_size -= input_offset;
+            }
        }
      if (input_offset < 0)
        error (EXIT_FAILURE, 0, _("%s: cannot determine file size"),
               quote (infile));
-      file_size -= input_offset;
      /* Overflow, and sanity checking.  */
      if (OFF_T_MAX < n_units)
        {
@ -1431,9 +1513,6 @@ main (int argc, char **argv)
      file_size = MAX (file_size, n_units);
    }

-  void *b = xmalloc (in_blk_size + 1 + page_size - 1);
-  char *buf = ptr_align (b, page_size);
-
  /* When filtering, closure of one pipe must not terminate the process,
     as there may still be other streams expecting input from us.  */
  if (filter_command)
@ -1454,7 +1533,7 @@ main (int argc, char **argv)
      break;

    case type_bytes:
-      bytes_split (n_units, buf, in_blk_size, 0);
+      bytes_split (n_units, buf, in_blk_size, SIZE_MAX, 0);
      break;

    case type_byteslines:
@ -1463,13 +1542,16 @@ main (int argc, char **argv)

    case type_chunk_bytes:
      if (k_units == 0)
-        bytes_split (file_size / n_units, buf, in_blk_size, n_units);
+        bytes_split (file_size / n_units, buf, in_blk_size, initial_read,
+                     n_units);
      else
-        bytes_chunk_extract (k_units, n_units, buf, in_blk_size, file_size);
+        bytes_chunk_extract (k_units, n_units, buf, in_blk_size, initial_read,
+                             file_size);
      break;

    case type_chunk_lines:
-      lines_chunk_split (k_units, n_units, buf, in_blk_size, file_size);
+      lines_chunk_split (k_units, n_units, buf, in_blk_size, initial_read,
+                         file_size);
      break;

    case type_rr:
--- a/src/tac.c
+++ b/src/tac.c
@ -187,10 +187,11 @@ output (const char *start, const char *past_end)
 }

 /* Print in reverse the file open on descriptor FD for reading FILE.
+   The file is already positioned at FILE_POS, which should be near its end.
   Return true if successful.  */

 static bool
-tac_seekable (int input_fd, const char *file)
+tac_seekable (int input_fd, const char *file, off_t file_pos)
 {
  /* Pointer to the location in 'G_buffer' where the search for
     the next separator will begin. */
@ -203,9 +204,6 @@ tac_seekable (int input_fd, const char *file)
  /* Length of the record growing in 'G_buffer'. */
  size_t saved_record_size;

-  /* Offset in the file of the next read. */
-  off_t file_pos;
-
  /* True if 'output' has not been called yet for any file.
     Only used when the separator is attached to the preceding record. */
  bool first_time = true;
@ -213,27 +211,43 @@ tac_seekable (int input_fd, const char *file)
  char const *separator1 = separator + 1; /* Speed optimization, non-regexp. */
  size_t match_length1 = match_length - 1; /* Speed optimization, non-regexp. */

-  /* Find the size of the input file. */
-  file_pos = lseek (input_fd, 0, SEEK_END);
-  if (file_pos < 1)
-    return true;			/* It's an empty file. */
-
  /* Arrange for the first read to lop off enough to leave the rest of the
     file a multiple of 'read_size'.  Since 'read_size' can change, this may
     not always hold during the program run, but since it usually will, leave
     it here for i/o efficiency (page/sector boundaries and all that).
     Note: the efficiency gain has not been verified. */
-  saved_record_size = file_pos % read_size;
-  if (saved_record_size == 0)
-    saved_record_size = read_size;
-  file_pos -= saved_record_size;
-  /* 'file_pos' now points to the start of the last (probably partial) block
-     in the input file. */
+  size_t remainder = file_pos % read_size;
+  if (remainder != 0)
+    {
+      file_pos -= remainder;
+      if (lseek (input_fd, file_pos, SEEK_SET) < 0)
+        error (0, errno, _("%s: seek failed"), quotearg_colon (file));
+    }

-  if (lseek (input_fd, file_pos, SEEK_SET) < 0)
-    error (0, errno, _("%s: seek failed"), quotearg_colon (file));
+  /* Scan backward, looking for end of file.  This caters to proc-like
+     file systems where the file size is just an estimate.  */
+  while ((saved_record_size = safe_read (input_fd, G_buffer, read_size)) == 0
+         && file_pos != 0)
+    {
+      off_t rsize = read_size;
+      if (lseek (input_fd, -rsize, SEEK_CUR) < 0)
+        error (0, errno, _("%s: seek failed"), quotearg_colon (file));
+      file_pos -= read_size;
+    }

-  if (safe_read (input_fd, G_buffer, saved_record_size) != saved_record_size)
+  /* Now scan forward, looking for end of file.  */
+  while (saved_record_size == read_size)
+    {
+      size_t nread = safe_read (input_fd, G_buffer, read_size);
+      if (nread == 0)
+        break;
+      saved_record_size = nread;
+      if (saved_record_size == SAFE_READ_ERROR)
+        break;
+      file_pos += nread;
+    }
+
+  if (saved_record_size == SAFE_READ_ERROR)
    {
      error (0, errno, _("%s: read error"), quotearg_colon (file));
      return false;
@ -485,15 +499,16 @@ temp_stream (FILE **fp, char **file_name)

 /* Copy from file descriptor INPUT_FD (corresponding to the named FILE) to
   a temporary file, and set *G_TMP and *G_TEMPFILE to the resulting stream
-   and file name.  Return true if successful.  */
+   and file name.  Return the number of bytes copied, or -1 on error.  */

-static bool
+static off_t
 copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file)
 {
  FILE *fp;
  char *file_name;
+  off_t bytes_copied = 0;
  if (!temp_stream (&fp, &file_name))
-    return false;
+    return -1;

  while (1)
    {
@ -511,6 +526,8 @@ copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file)
          error (0, errno, _("%s: write error"), quotearg_colon (file_name));
          goto Fail;
        }
+
+      bytes_copied += bytes_read;
    }

  if (fflush (fp) != 0)
@ -521,11 +538,11 @@ copy_to_temp (FILE **g_tmp, char **g_tempfile, int input_fd, char const *file)

  *g_tmp = fp;
  *g_tempfile = file_name;
-  return true;
+  return bytes_copied;

 Fail:
  fclose (fp);
-  return false;
+  return -1;
 }

 /* Copy INPUT_FD to a temporary, then tac that file.
@ -536,10 +553,11 @@ tac_nonseekable (int input_fd, const char *file)
 {
  FILE *tmp_stream;
  char *tmp_file;
-  if (!copy_to_temp (&tmp_stream, &tmp_file, input_fd, file))
+  off_t bytes_copied = copy_to_temp (&tmp_stream, &tmp_file, input_fd, file);
+  if (bytes_copied < 0)
    return false;

-  bool ok = tac_seekable (fileno (tmp_stream), tmp_file);
+  bool ok = tac_seekable (fileno (tmp_stream), tmp_file, bytes_copied);
  return ok;
 }

@ -578,7 +596,7 @@ tac_file (const char *filename)

  ok = (file_size < 0 || isatty (fd)
        ? tac_nonseekable (fd, filename)
-        : tac_seekable (fd, filename));
+        : tac_seekable (fd, filename, file_size));

  if (!is_stdin && close (fd) != 0)
    {
--- a/src/tail.c
+++ b/src/tail.c
@ -40,6 +40,7 @@
 #include "posixver.h"
 #include "quote.h"
 #include "safe-read.h"
+#include "stat-size.h"
 #include "stat-time.h"
 #include "xfreopen.h"
 #include "xnanosleep.h"
@ -1665,40 +1666,30 @@ tail_bytes (const char *pretty_filename, int fd, uintmax_t n_bytes,
          if (t)
            return t < 0;
        }
-      *read_pos += dump_remainder (pretty_filename, fd, COPY_TO_EOF);
+      n_bytes = COPY_TO_EOF;
    }
  else
    {
-      if ( ! presume_input_pipe
-           && S_ISREG (stats.st_mode) && n_bytes <= OFF_T_MAX)
-        {
-          off_t current_pos = xlseek (fd, 0, SEEK_CUR, pretty_filename);
-          off_t end_pos = xlseek (fd, 0, SEEK_END, pretty_filename);
-          off_t diff = end_pos - current_pos;
-          /* Be careful here.  The current position may actually be
-             beyond the end of the file.  */
-          off_t bytes_remaining = diff < 0 ? 0 : diff;
-          off_t nb = n_bytes;
-
-          if (bytes_remaining <= nb)
-            {
-              /* From the current position to end of file, there are no
-                 more bytes than have been requested.  So reposition the
-                 file pointer to the incoming current position and print
-                 everything after that.  */
-              *read_pos = xlseek (fd, current_pos, SEEK_SET, pretty_filename);
-            }
-          else
-            {
-              /* There are more bytes remaining than were requested.
-                 Back up.  */
-              *read_pos = xlseek (fd, -nb, SEEK_END, pretty_filename);
-            }
-          *read_pos += dump_remainder (pretty_filename, fd, n_bytes);
-        }
-      else
+      off_t end_pos = ((! presume_input_pipe && usable_st_size (&stats)
+                        && n_bytes <= OFF_T_MAX)
+                       ? stats.st_size : -1);
+      if (end_pos <= ST_BLKSIZE (stats))
        return pipe_bytes (pretty_filename, fd, n_bytes, read_pos);
+      off_t current_pos = xlseek (fd, 0, SEEK_CUR, pretty_filename);
+      if (current_pos < end_pos)
+        {
+          off_t bytes_remaining = end_pos - current_pos;
+
+          if (n_bytes < bytes_remaining)
+            {
+              current_pos = end_pos - n_bytes;
+              xlseek (fd, current_pos, SEEK_SET, pretty_filename);
+            }
+        }
+      *read_pos = current_pos;
    }
+
+  *read_pos += dump_remainder (pretty_filename, fd, n_bytes);
  return true;
 }

--- a/src/wc.c
+++ b/src/wc.c
@ -184,9 +184,10 @@ write_counts (uintmax_t lines,

 /* Count words.  FILE_X is the name of the file (or NULL for standard
   input) that is open on descriptor FD.  *FSTATUS is its status.
+   CURRENT_POS is the current file offset if known, negative if unknown.
   Return true if successful.  */
 static bool
-wc (int fd, char const *file_x, struct fstatus *fstatus)
+wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos)
 {
  bool ok = true;
  char buf[BUFFER_SIZE + 1];
@ -229,32 +230,34 @@ wc (int fd, char const *file_x, struct fstatus *fstatus)

  if (count_bytes && !count_chars && !print_lines && !count_complicated)
    {
-      off_t current_pos, end_pos;
-
      if (0 < fstatus->failed)
        fstatus->failed = fstat (fd, &fstatus->st);

-      if (! fstatus->failed && S_ISREG (fstatus->st.st_mode)
-          && (current_pos = lseek (fd, 0, SEEK_CUR)) != -1
-          && (end_pos = lseek (fd, 0, SEEK_END)) != -1)
+      /* For sized files, seek to one buffer before EOF rather than to EOF.
+         This works better for files in proc-like file systems where
+         the size is only approximate.  */
+      if (! fstatus->failed && usable_st_size (&fstatus->st)
+          && 0 <= fstatus->st.st_size)
        {
-          /* Be careful here.  The current position may actually be
-             beyond the end of the file.  As in the example above.  */
-          bytes = end_pos < current_pos ? 0 : end_pos - current_pos;
+          size_t end_pos = fstatus->st.st_size;
+          off_t hi_pos = end_pos - end_pos % BUFFER_SIZE;
+          if (current_pos < 0)
+            current_pos = lseek (fd, 0, SEEK_CUR);
+          if (0 <= current_pos && current_pos < hi_pos
+              && 0 <= lseek (fd, hi_pos, SEEK_CUR))
+            bytes = hi_pos - current_pos;
        }
-      else
+
+      fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
+      while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
        {
-          fdadvise (fd, 0, 0, FADVISE_SEQUENTIAL);
-          while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
+          if (bytes_read == SAFE_READ_ERROR)
            {
-              if (bytes_read == SAFE_READ_ERROR)
-                {
-                  error (0, errno, "%s", file);
-                  ok = false;
-                  break;
-                }
-              bytes += bytes_read;
+              error (0, errno, "%s", file);
+              ok = false;
+              break;
            }
+          bytes += bytes_read;
        }
    }
  else if (!count_chars && !count_complicated)
@ -500,7 +503,7 @@ wc_file (char const *file, struct fstatus *fstatus)
      have_read_stdin = true;
      if (O_BINARY && ! isatty (STDIN_FILENO))
        xfreopen (NULL, "rb", stdin);
-      return wc (STDIN_FILENO, file, fstatus);
+      return wc (STDIN_FILENO, file, fstatus, -1);
    }
  else
    {
@ -512,7 +515,7 @@ wc_file (char const *file, struct fstatus *fstatus)
        }
      else
        {
-          bool ok = wc (fd, file, fstatus);
+          bool ok = wc (fd, file, fstatus, 0);
          if (close (fd) != 0)
            {
              error (0, errno, "%s", file);
--- a/tests/local.mk
+++ b/tests/local.mk
@ -256,6 +256,7 @@ all_tests =					\
  tests/misc/wc-files0-from.pl			\
  tests/misc/wc-files0.sh			\
  tests/misc/wc-parallel.sh			\
+  tests/misc/wc-proc.sh				\
  tests/misc/cat-proc.sh			\
  tests/misc/cat-buf.sh				\
  tests/misc/cat-self.sh			\
@ -295,6 +296,7 @@ all_tests =					\
  tests/misc/nproc-positive.sh			\
  tests/misc/numfmt.pl				\
  tests/misc/od-N.sh				\
+  tests/misc/od-j.sh				\
  tests/misc/od-multiple-t.sh			\
  tests/misc/od-x8.sh				\
  tests/misc/paste.pl				\
@ -399,6 +401,7 @@ all_tests =					\
  tests/tail-2/wait.sh				\
  tests/tail-2/retry.sh				\
  tests/tail-2/symlink.sh				\
+  tests/tail-2/tail-c.sh			\
  tests/chmod/c-option.sh			\
  tests/chmod/equal-x.sh			\
  tests/chmod/equals.sh				\
--- a/tests/misc/head-c.sh
+++ b/tests/misc/head-c.sh
@ -42,4 +42,16 @@ esac
 # based on the value passed to -c
 (ulimit -v 20000; head --bytes=-$SSIZE_MAX < /dev/null) || fail=1

+# Make sure it works on funny files in /proc and /sys.
+
+for file in /proc/cpuinfo /sys/kernel/profiling; do
+  if test -r $file; then
+    cp -f $file copy &&
+    head -c -1 copy > exp1 || framework_failure_
+
+    head -c -1 $file > out1 || fail=1
+    compare exp1 out1 || fail=1
+  fi
+done
+
 Exit $fail
--- a/tests/misc/od-j.sh
+++ b/tests/misc/od-j.sh
@ -0,0 +1,39 @@
+#!/bin/sh
+# Verify that 'od -j N' skips N bytes of input.
+
+# Copyright 2014 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
+print_ver_ od
+
+for file in ${srcdir=.}/tests/init.sh /proc/version /sys/kernel/profiling; do
+  test -r $file || continue
+
+  cp -f $file copy &&
+  bytes=$(wc -c < copy) || framework_failure_
+
+  od -An $file > exp || fail=1
+  od -An -j $bytes $file $file > out || fail=1
+  compare out exp || fail=1
+
+  od -An -j 4096 copy copy > exp1 2> experr1; expstatus=$?
+  od -An -j 4096 $file $file > out1 2> err1; status=$?
+  test $status -eq $expstatus || fail=1
+  compare out1 exp1 || fail=1
+  compare err1 experr1 || fail=1
+done
+
+Exit $fail
--- a/tests/misc/tac-2-nonseekable.sh
+++ b/tests/misc/tac-2-nonseekable.sh
@ -1,5 +1,5 @@
 #!/bin/sh
-# ensure that tac works with two or more non-seekable inputs
+# ensure that tac works with non-seekable or quasi-seekable inputs

 # Copyright (C) 2011-2014 Free Software Foundation, Inc.

@ -24,4 +24,16 @@ echo x > exp || fail=1
 compare exp out || fail=1
 compare /dev/null err || fail=1

+# Make sure it works on funny files in /proc and /sys.
+
+for file in /proc/version /sys/kernel/profiling; do
+  if test -r $file; then
+    cp -f $file copy &&
+    tac copy > exp1 || framework_failure_
+
+    tac $file > out1 || fail=1
+    compare exp1 out1 || fail=1
+  fi
+done
+
 Exit $fail
--- a/tests/misc/wc-proc.sh
+++ b/tests/misc/wc-proc.sh
@ -0,0 +1,32 @@
+#!/bin/sh
+# Test wc on /proc and /sys files.
+
+# Copyright 2014 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
+print_ver_ wc
+
+for file in /proc/version /sys/kernel/profiling; do
+  if test -r $file; then
+    cp -f $file copy &&
+    wc -c < copy > exp1 || framework_failure_
+
+    wc -c < $file > out1 || fail=1
+    compare exp1 out1 || fail=1
+  fi
+done
+
+Exit $fail
--- a/tests/split/b-chunk.sh
+++ b/tests/split/b-chunk.sh
@ -31,20 +31,29 @@ stat x?? 2>/dev/null && fail=1

 printf '1\n2\n3\n4\n5\n' > in || framework_failure_

-split -n 3 in > out || fail=1
-split -n 1/3 in > b1 || fail=1
-split -n 2/3 in > b2 || fail=1
-split -n 3/3 in > b3 || fail=1
-printf '1\n2' > exp-1
-printf '\n3\n' > exp-2
-printf '4\n5\n' > exp-3
+for file in in /proc/version /sys/kernel/profiling; do
+  split -n 3 $file > out || fail=1
+  split -n 1/3 $file > b1 || fail=1
+  split -n 2/3 $file > b2 || fail=1
+  split -n 3/3 $file > b3 || fail=1

-compare exp-1 xaa || fail=1
-compare exp-2 xab || fail=1
-compare exp-3 xac || fail=1
-compare exp-1 b1 || fail=1
-compare exp-2 b2 || fail=1
-compare exp-3 b3 || fail=1
-test -f xad && fail=1
+  case $file in
+    in)
+      printf '1\n2' > exp-1
+      printf '\n3\n' > exp-2
+      printf '4\n5\n' > exp-3
+
+      compare exp-1 xaa || fail=1
+      compare exp-2 xab || fail=1
+      compare exp-3 xac || fail=1
+      ;;
+  esac
+
+  compare xaa b1 || fail=1
+  compare xab b2 || fail=1
+  compare xac b3 || fail=1
+  cat xaa xab xac | compare - $file || fail=1
+  test -f xad && fail=1
+done

 Exit $fail
--- a/tests/tail-2/tail-c.sh
+++ b/tests/tail-2/tail-c.sh
@ -0,0 +1,35 @@
+#!/bin/sh
+# exercise tail -c
+
+# Copyright 2014 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
+print_ver_ tail
+require_ulimit_v_
+
+# Make sure it works on funny files in /proc and /sys.
+
+for file in /proc/version /sys/kernel/profiling; do
+  if test -r $file; then
+    cp -f $file copy &&
+    tail -c -1 copy > exp1 || framework_failure_
+
+    tail -c -1 $file > out1 || fail=1
+    compare exp1 out1 || fail=1
+  fi
+done
+
+Exit $fail