dd: add count_bytes, skip_bytes and seek_bytes flags

dd now accepts the count_bytes and skip_bytes input flag and the seek_bytes output flag, to more easily allow processing portions of a file. * src/dd.c (scanargs): Compute skip_records and skip_bytes when 'skip_bytes' iflag is used. Compute max_records and max_bytes when 'count_bytes' iflag is used. Compute seek_records and seek_bytes when 'seek_bytes' oflag is used. (skip_via_lseek): Use new 'bytes' parameter and handle potential 'records' equals to zero. Update the bytes parameter when called with 'fdesc' equal to STDOUT_FILENO. Update the header comments. (dd_copy): Skip accordingly to skip_records AND skip_bytes. Count accordingly to max_records AND max_bytes. Seek on output accordingly to seek_records AND seek_bytes. * NEWS (New features): Mention it. * doc/coreutils.texi (dd invocation): Detail new flags and behaviors. * tests/dd/bytes: New file. Tests for these new flags. * tests/Makefile.am (TESTS): Add it.
2024-11-28 04:24:45 +08:00 · 2012-02-04 15:25:54 +01:00 · 2012-02-04 15:25:54 +01:00 · 140eca15c4
commit 140eca15c4
parent da2b32e7af
5 changed files with 253 additions and 64 deletions
--- a/5
+++ b/5
@ -2,6 +2,11 @@ GNU coreutils NEWS                                    -*- outline -*-

 * Noteworthy changes in release ?.? (????-??-??) [?]

+** New features
+
+  dd now accepts the count_bytes, skip_bytes iflags and the count_bytes
+  oflag, to more easily allow processing portions of a file.
+
 ** Bug fixes

  mv now lets you move a symlink onto a same-inode destination file that
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@ -8057,18 +8057,24 @@ When converting variable-length records to fixed-length ones
 (@option{conv=block}) or the reverse (@option{conv=unblock}),
 use @var{bytes} as the fixed record length.

-@item skip=@var{blocks}
+@item skip=@var{n}
@opindex skip
-Skip @var{blocks} @samp{ibs}-byte blocks in the input file before copying.
+Skip @var{n} @samp{ibs}-byte blocks in the input file before copying.
+If @samp{iflag=skip_bytes} is specified, @var{n} is interpreted
+as a byte count rather than a block count.

-@item seek=@var{blocks}
+@item seek=@var{n}
@opindex seek
-Skip @var{blocks} @samp{obs}-byte blocks in the output file before copying.
+Skip @var{n} @samp{obs}-byte blocks in the output file before copying.
+if @samp{oflag=seek_bytes} is specified, @var{n} is interpreted
+as a byte count rather than a block count.

-@item count=@var{blocks}
+@item count=@var{n}
@opindex count
-Copy @var{blocks} @samp{ibs}-byte blocks from the input file, instead
+Copy @var{n} @samp{ibs}-byte blocks from the input file, instead
 of everything until the end of the file.
+if @samp{iflag=count_bytes} is specified, @var{n} is interpreted
+as a byte count rather than a block count.

@item status=noxfer
@opindex status
@ -8321,6 +8327,27 @@ When that happens, continue calling @code{read} to fill the remainder
 of the block.
 This flag can be used only with @code{iflag}.

+@item count_bytes
+@opindex count_bytes
+Interpret the @samp{count=} operand as a byte count,
+rather than a block count, which allows specifying
+a length that is not a multiple of the I/O block size.
+This flag can be used only with @code{iflag}.
+
+@item skip_bytes
+@opindex skip_bytes
+Interpret the @samp{skip=} operand as a byte count,
+rather than a block count, which allows specifying
+an offset that is not a multiple of the I/O block size.
+This flag can be used only with @code{iflag}.
+
+@item seek_bytes
+@opindex seek_bytes
+Interpret the @samp{seek=} operand as a byte count,
+rather than a block count, which allows specifying
+an offset that is not a multiple of the I/O block size.
+This flag can be used only with @code{oflag}.
+
@end table

 These flags are not supported on all systems, and @samp{dd} rejects
@ -8343,10 +8370,13 @@ should not be too large---values larger than a few megabytes
 are generally wasteful or (as in the gigabyte..exabyte case) downright
 counterproductive or error-inducing.

-Use different @command{dd} invocations to use different block sizes for
-skipping and I/O@.  For example, the following shell commands copy data
-in 512 KiB blocks between a disk and a tape, but do not save or restore a
-4 KiB label at the start of the disk:
+To process data that is at an offset or size that is not a
+multiple of the I/O@ block size, you can use the @samp{skip_bytes},
+@samp{seek_bytes} and @samp{count_bytes} flags.  Alternatively
+the traditional method of separate @command{dd} invocations can be used.
+For example, the following shell commands copy data
+in 512 KiB blocks between a disk and a tape, but do not save
+or restore a 4 KiB label at the start of the disk:

@example
 disk=/dev/rdsk/c0t1d0s2
--- a/src/dd.c
+++ b/src/dd.c
@ -156,12 +156,23 @@ static size_t conversion_blocksize = 0;
 /* Skip this many records of 'input_blocksize' bytes before input. */
 static uintmax_t skip_records = 0;

+/* Skip this many bytes before input in addition of 'skip_records'
+   records.  */
+static size_t skip_bytes = 0;
+
 /* Skip this many records of 'output_blocksize' bytes before output. */
 static uintmax_t seek_records = 0;

+/* Skip this many bytes in addition to 'seek_records' records before
+   output.  */
+static uintmax_t seek_bytes = 0;
+
 /* Copy only this many records.  The default is effectively infinity.  */
 static uintmax_t max_records = (uintmax_t) -1;

+/* Copy this many bytes in addition to 'max_records' records.  */
+static size_t max_bytes = 0;
+
 /* Bit vector of conversions to apply. */
 static int conversions_mask = 0;

@ -241,7 +252,7 @@ static bool i_nocache, o_nocache;
 static ssize_t (*iread_fnc) (int fd, char *buf, size_t size);

 /* A longest symbol in the struct symbol_values tables below.  */
-#define LONGEST_SYMBOL "fdatasync"
+#define LONGEST_SYMBOL "count_bytes"

 /* A symbol and the corresponding integer value.  */
 struct symbol_value
@ -296,37 +307,55 @@ enum
    O_FULLBLOCK = FFS_MASK (v),
    v2 = v ^ O_FULLBLOCK,

-    O_NOCACHE = FFS_MASK (v2)
+    O_NOCACHE = FFS_MASK (v2),
+    v3 = v2 ^ O_NOCACHE,
+
+    O_COUNT_BYTES = FFS_MASK (v3),
+    v4 = v3 ^ O_COUNT_BYTES,
+
+    O_SKIP_BYTES = FFS_MASK (v4),
+    v5 = v4 ^ O_SKIP_BYTES,
+
+    O_SEEK_BYTES = FFS_MASK (v5)
  };

 /* Ensure that we got something.  */
 verify (O_FULLBLOCK != 0);
 verify (O_NOCACHE != 0);
+verify (O_COUNT_BYTES != 0);
+verify (O_SKIP_BYTES != 0);
+verify (O_SEEK_BYTES != 0);

 #define MULTIPLE_BITS_SET(i) (((i) & ((i) - 1)) != 0)

 /* Ensure that this is a single-bit value.  */
 verify ( ! MULTIPLE_BITS_SET (O_FULLBLOCK));
 verify ( ! MULTIPLE_BITS_SET (O_NOCACHE));
+verify ( ! MULTIPLE_BITS_SET (O_COUNT_BYTES));
+verify ( ! MULTIPLE_BITS_SET (O_SKIP_BYTES));
+verify ( ! MULTIPLE_BITS_SET (O_SEEK_BYTES));

 /* Flags, for iflag="..." and oflag="...".  */
 static struct symbol_value const flags[] =
 {
-  {"append",	O_APPEND},
-  {"binary",	O_BINARY},
-  {"cio",	O_CIO},
-  {"direct",	O_DIRECT},
-  {"directory",	O_DIRECTORY},
-  {"dsync",	O_DSYNC},
-  {"noatime",	O_NOATIME},
-  {"nocache",	O_NOCACHE},   /* Discard cache.  */
-  {"noctty",	O_NOCTTY},
-  {"nofollow",	HAVE_WORKING_O_NOFOLLOW ? O_NOFOLLOW : 0},
-  {"nolinks",	O_NOLINKS},
-  {"nonblock",	O_NONBLOCK},
-  {"sync",	O_SYNC},
-  {"text",	O_TEXT},
-  {"fullblock", O_FULLBLOCK}, /* Accumulate full blocks from input.  */
+  {"append",	  O_APPEND},
+  {"binary",	  O_BINARY},
+  {"cio",	  O_CIO},
+  {"direct",	  O_DIRECT},
+  {"directory",   O_DIRECTORY},
+  {"dsync",	  O_DSYNC},
+  {"noatime",	  O_NOATIME},
+  {"nocache",	  O_NOCACHE},   /* Discard cache.  */
+  {"noctty",	  O_NOCTTY},
+  {"nofollow",	  HAVE_WORKING_O_NOFOLLOW ? O_NOFOLLOW : 0},
+  {"nolinks",	  O_NOLINKS},
+  {"nonblock",	  O_NONBLOCK},
+  {"sync",	  O_SYNC},
+  {"text",	  O_TEXT},
+  {"fullblock",   O_FULLBLOCK}, /* Accumulate full blocks from input.  */
+  {"count_bytes", O_COUNT_BYTES},
+  {"skip_bytes",  O_SKIP_BYTES},
+  {"seek_bytes",  O_SEEK_BYTES},
  {"",		0}
 };

@ -489,7 +518,7 @@ Copy a file, converting and formatting according to the operands.\n\
  bs=BYTES        read and write up to BYTES bytes at a time\n\
  cbs=BYTES       convert BYTES bytes at a time\n\
  conv=CONVS      convert the file as per the comma separated symbol list\n\
-  count=BLOCKS    copy only BLOCKS input blocks\n\
+  count=N         copy only N input blocks\n\
  ibs=BYTES       read up to BYTES bytes at a time (default: 512)\n\
 "), stdout);
      fputs (_("\
@ -498,8 +527,8 @@ Copy a file, converting and formatting according to the operands.\n\
  obs=BYTES       write BYTES bytes at a time (default: 512)\n\
  of=FILE         write to FILE instead of stdout\n\
  oflag=FLAGS     write as per the comma separated symbol list\n\
-  seek=BLOCKS     skip BLOCKS obs-sized blocks at start of output\n\
-  skip=BLOCKS     skip BLOCKS ibs-sized blocks at start of input\n\
+  seek=N          skip N obs-sized blocks at start of output\n\
+  skip=N          skip N ibs-sized blocks at start of input\n\
  status=noxfer   suppress transfer statistics\n\
 "), stdout);
      fputs (_("\
@ -568,6 +597,15 @@ Each FLAG symbol may be:\n\
        fputs (_("  binary    use binary I/O for data\n"), stdout);
      if (O_TEXT)
        fputs (_("  text      use text I/O for data\n"), stdout);
+      if (O_COUNT_BYTES)
+        fputs (_("  count_bytes  treat 'count=N' as a byte count (iflag only)\n\
+"), stdout);
+      if (O_SKIP_BYTES)
+        fputs (_("  skip_bytes  treat 'skip=N' as a byte count (iflag only)\n\
+"), stdout);
+      if (O_SEEK_BYTES)
+        fputs (_("  seek_bytes  treat 'seek=N' as a byte count (oflag only)\n\
+"), stdout);

      {
        char const *siginfo_name = (SIGINFO == SIGUSR1 ? "USR1" : "INFO");
@ -1120,6 +1158,9 @@ scanargs (int argc, char *const *argv)
 {
  int i;
  size_t blocksize = 0;
+  uintmax_t count = (uintmax_t) -1;
+  uintmax_t skip = 0;
+  uintmax_t seek = 0;

  for (i = optind; i < argc; i++)
    {
@ -1175,11 +1216,11 @@ scanargs (int argc, char *const *argv)
              conversion_blocksize = n;
            }
          else if (operand_is (name, "skip"))
-            skip_records = n;
+            skip = n;
          else if (operand_is (name, "seek"))
-            seek_records = n;
+            seek = n;
          else if (operand_is (name, "count"))
-            max_records = n;
+            count = n;
          else
            {
              error (0, 0, _("unrecognized operand %s"), quote (name));
@ -1216,6 +1257,43 @@ scanargs (int argc, char *const *argv)
      usage (EXIT_FAILURE);
    }

+  if (input_flags & O_SEEK_BYTES)
+    {
+      error (0, 0, "%s: %s", _("invalid input flag"), "'seek_bytes'");
+      usage (EXIT_FAILURE);
+    }
+
+  if (output_flags & (O_COUNT_BYTES | O_SKIP_BYTES))
+    {
+      error (0, 0, "%s: %s", _("invalid output flag"),
+             output_flags & O_COUNT_BYTES ? "'count_bytes'" : "'skip_bytes'");
+      usage (EXIT_FAILURE);
+    }
+
+  if (input_flags & O_SKIP_BYTES && skip != 0)
+    {
+      skip_records = skip / input_blocksize;
+      skip_bytes = skip % input_blocksize;
+    }
+  else if (skip != 0)
+    skip_records = skip;
+
+  if (input_flags & O_COUNT_BYTES && count != (uintmax_t) -1)
+    {
+      max_records = count / input_blocksize;
+      max_bytes = count % input_blocksize;
+    }
+  else if (count != (uintmax_t) -1)
+    max_records = count;
+
+  if (output_flags & O_SEEK_BYTES && seek != 0)
+    {
+      seek_records = seek / output_blocksize;
+      seek_bytes = seek % output_blocksize;
+    }
+  else if (seek != 0)
+    seek_records = seek;
+
  /* Warn about partial reads if bs=SIZE is given and iflag=fullblock
     is not, and if counting or skipping bytes or using direct I/O.
     This helps to avoid confusion with miscounts, and to avoid issues
@ -1411,18 +1489,20 @@ skip_via_lseek (char const *filename, int fdesc, off_t offset, int whence)
 # define skip_via_lseek(Filename, Fd, Offset, Whence) lseek (Fd, Offset, Whence)
 #endif

-/* Throw away RECORDS blocks of BLOCKSIZE bytes on file descriptor FDESC,
-   which is open with read permission for FILE.  Store up to BLOCKSIZE
-   bytes of the data at a time in BUF, if necessary.  RECORDS must be
-   nonzero.  If fdesc is STDIN_FILENO, advance the input offset.
-   Return the number of records remaining, i.e., that were not skipped
-   because EOF was reached.  */
+/* Throw away RECORDS blocks of BLOCKSIZE bytes plus BYTES bytes on
+   file descriptor FDESC, which is open with read permission for FILE.
+   Store up to BLOCKSIZE bytes of the data at a time in BUF, if
+   necessary. RECORDS or BYTES must be nonzero. If FDESC is
+   STDIN_FILENO, advance the input offset. Return the number of
+   records remaining, i.e., that were not skipped because EOF was
+   reached.  If FDESC is STDOUT_FILENO, on return, BYTES is the
+   remaining bytes in addition to the remaining records.  */

 static uintmax_t
 skip (int fdesc, char const *file, uintmax_t records, size_t blocksize,
-      char *buf)
+      size_t *bytes, char *buf)
 {
-  uintmax_t offset = records * blocksize;
+  uintmax_t offset = records * blocksize + *bytes;

  /* Try lseek and if an error indicates it was an inappropriate operation --
     or if the file offset is not representable as an off_t --
@ -1450,7 +1530,10 @@ skip (int fdesc, char const *file, uintmax_t records, size_t blocksize,
           advance_input_offset (offset);
        }
      else
-        records = 0;
+        {
+          records = 0;
+          *bytes = 0;
+        }
      return records;
    }
  else
@ -1491,29 +1574,30 @@ skip (int fdesc, char const *file, uintmax_t records, size_t blocksize,

      do
        {
-          ssize_t nread = iread_fnc (fdesc, buf, blocksize);
+          ssize_t nread = iread_fnc (fdesc, buf, records ? blocksize : *bytes);
          if (nread < 0)
            {
              if (fdesc == STDIN_FILENO)
                {
                  error (0, errno, _("reading %s"), quote (file));
                  if (conversions_mask & C_NOERROR)
-                    {
-                      print_stats ();
-                      continue;
-                    }
+                    print_stats ();
                }
              else
                error (0, lseek_errno, _("%s: cannot seek"), quote (file));
              quit (EXIT_FAILURE);
            }
-
-          if (nread == 0)
+          else if (nread == 0)
            break;
-          if (fdesc == STDIN_FILENO)
+          else if (fdesc == STDIN_FILENO)
            advance_input_offset (nread);
+
+          if (records != 0)
+            records--;
+          else
+            *bytes = 0;
        }
-      while (--records != 0);
+      while (records || *bytes);

      return records;
    }
@ -1777,11 +1861,13 @@ dd_copy (void)
      obuf = ibuf;
    }

-  if (skip_records != 0)
+  if (skip_records != 0 || skip_bytes != 0)
    {
-      uintmax_t us_bytes = input_offset + (skip_records * input_blocksize);
+      uintmax_t us_bytes = input_offset + (skip_records * input_blocksize)
+                           + skip_bytes;
      uintmax_t us_blocks = skip (STDIN_FILENO, input_file,
-                                  skip_records, input_blocksize, ibuf);
+                                  skip_records, input_blocksize, &skip_bytes,
+                                  ibuf);
      us_bytes -= input_offset;

      /* POSIX doesn't say what to do when dd detects it has been
@ -1797,34 +1883,41 @@ dd_copy (void)
        }
    }

-  if (seek_records != 0)
+  if (seek_records != 0 || seek_bytes != 0)
    {
+      size_t bytes = seek_bytes;
      uintmax_t write_records = skip (STDOUT_FILENO, output_file,
-                                      seek_records, output_blocksize, obuf);
+                                      seek_records, output_blocksize, &bytes,
+                                      obuf);

-      if (write_records != 0)
+      if (write_records != 0 || bytes != 0)
        {
-          memset (obuf, 0, output_blocksize);
+          memset (obuf, 0, write_records ? output_blocksize : bytes);

          do
            {
-              if (iwrite (STDOUT_FILENO, obuf, output_blocksize)
-                  != output_blocksize)
+              size_t size = write_records ? output_blocksize : bytes;
+              if (iwrite (STDOUT_FILENO, obuf, size) != size)
                {
                  error (0, errno, _("writing to %s"), quote (output_file));
                  quit (EXIT_FAILURE);
                }
+
+              if (write_records != 0)
+                write_records--;
+              else
+                bytes = 0;
            }
-          while (--write_records != 0);
+          while (write_records || bytes);
        }
    }

-  if (max_records == 0)
+  if (max_records == 0 && max_bytes == 0)
    return exit_status;

  while (1)
    {
-      if (r_partial + r_full >= max_records)
+      if (r_partial + r_full >= max_records + (max_bytes ? 1 : 0))
        break;

      /* Zero the buffer before reading, so that if we get a read error,
@ -1835,7 +1928,10 @@ dd_copy (void)
                (conversions_mask & (C_BLOCK | C_UNBLOCK)) ? ' ' : '\0',
                input_blocksize);

-      nread = iread_fnc (STDIN_FILENO, ibuf, input_blocksize);
+      if (r_partial + r_full >= max_records)
+        nread = iread_fnc (STDIN_FILENO, ibuf, max_bytes);
+      else
+        nread = iread_fnc (STDIN_FILENO, ibuf, input_blocksize);

      if (nread >= 0 && i_nocache)
        invalidate_cache (STDIN_FILENO, nread);
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@ -369,6 +369,7 @@ TESTS =						\
  dd/reblock					\
  dd/skip-seek					\
  dd/skip-seek2					\
+  dd/bytes					\
  dd/skip-seek-past-file			\
  dd/stderr					\
  dd/unblock					\
--- a/tests/dd/bytes
+++ b/tests/dd/bytes
@ -0,0 +1,57 @@
+#!/bin/sh
+
+# Copyright (C) 2012 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+print_ver_ dd
+
+# count_bytes
+echo 0123456789abcdefghijklm > in || framework_failure_
+dd count=14 conv=swab iflag=count_bytes < in > out 2> /dev/null || fail=1
+case `cat out` in
+ 1032547698badc) ;;
+ *) fail=1 ;;
+esac
+
+# skip_bytes
+echo 0123456789abcdefghijklm > in || framework_failure_
+dd skip=10 iflag=skip_bytes < in > out 2> /dev/null || fail=1
+case `cat out` in
+ abcdefghijklm) ;;
+ *) fail=1 ;;
+esac
+
+# skip records and bytes from pipe
+echo 0123456789abcdefghijklm |
+ dd skip=10 bs=2 iflag=skip_bytes > out 2> /dev/null || fail=1
+case `cat out` in
+ abcdefghijklm) ;;
+ *) fail=1 ;;
+esac
+
+# seek bytes
+echo abcdefghijklm |
+ dd bs=5 seek=8 oflag=seek_bytes > out 2> /dev/null || fail=1
+echo abcdefghijklm |
+ dd bs=4 seek=2 > expected 2> /dev/null || fail=1
+compare expected out || fail=1
+
+# seek bytes on empty file
+echo abcdefghijklm |
+ dd bs=5 seek=8 oflag=seek_bytes > out2 2> /dev/null || fail=1
+compare expected out2 || fail=1
+
+Exit $fail