md5sum,b2sum,sha*sum: support -z,--zero option

* doc/coreutils.texi (md5sum invocation): Describe the new option,
and how it's not supported by --check, and how it disables escaping.
* src/md5sum.c (delim): A new global to parmeterize the out delimiter.
(main): Don't enable file name escaping with -z, and output '\0'.
* tests/misc/md5sum-newline.pl: Add a test case.
* NEWS: Mention the new feature.
This commit is contained in:
Pádraig Brady 2018-06-10 17:45:35 -07:00
parent 77517a9917
commit c744c65f50
4 changed files with 39 additions and 9 deletions

6
NEWS
View File

@ -46,6 +46,12 @@ GNU coreutils NEWS -*- outline -*-
'cp --force file symlink' now removes the symlink even if
it is self referential.
** New features
md5sum accepts a new option: --zero (-z) to delimit the output lines with a
NUL instead of a newline character. This also disables file name escaping.
This also applies to sha*sum and b2sum.
** Improvements
cut supports line lengths up to the max file size on 32 bit systems.

View File

@ -3865,8 +3865,8 @@ a space, a flag indicating binary or text input mode, and the file name.
Binary mode is indicated with @samp{*}, text mode with @samp{ } (space).
Binary mode is the default on systems where it's significant,
otherwise text mode is the default.
If @var{file} contains a backslash or newline, the
line is started with a backslash, and each problematic character in
Without @option{--zero}, if @var{file} contains a backslash or newline,
the line is started with a backslash, and each problematic character in
the file name is escaped with a backslash, making the output
unambiguous even in the presence of arbitrary file names.
If @var{file} is omitted or specified as @samp{-}, standard input is read.
@ -3899,6 +3899,7 @@ Three input formats are supported. Either the default output
format described above, the @option{--tag} output format,
or the BSD reversed mode format which is similar to the default mode,
but doesn't use a character to distinguish binary and text modes.
Output with @option{--zero} enabled is not supported by @option{--check}.
@sp 1
For each such line, @command{md5sum} reads the named file and computes its
MD5 checksum. Then, if the computed message digest does not match the
@ -3947,9 +3948,9 @@ indicating there was a failure.
@opindex --tag
@cindex BSD output
Output BSD style checksums, which indicate the checksum algorithm used.
As a GNU extension, file names with problematic characters
are escaped as described above, with the same escaping indicator of @samp{\}
at the start of the line, being used.
As a GNU extension, if @option{--zero} is not used, file names with problematic
characters are escaped as described above, with the same escaping indicator of
@samp{\} at the start of the line, being used.
The @option{--tag} option implies binary mode, and is disallowed with
@option{--text} mode as supporting that would unnecessarily complicate
the output format, while providing little benefit.
@ -3982,6 +3983,8 @@ When verifying checksums,
if one or more input line is invalid,
exit nonzero after all warnings have been issued.
@optZero
Also file name escaping is not used.
@end table
@exitstatus

View File

@ -157,6 +157,9 @@ static bool strict = false;
/* Whether a BSD reversed format checksum is detected. */
static int bsd_reversed = -1;
/* line delimiter. */
static unsigned char delim = '\n';
#if HASH_ALGO_BLAKE2
static char const *const algorithm_in_string[] =
{
@ -210,6 +213,7 @@ static struct option const long_options[] =
{ "warn", no_argument, NULL, 'w' },
{ "strict", no_argument, NULL, STRICT_OPTION },
{ "tag", no_argument, NULL, TAG_OPTION },
{ "zero", no_argument, NULL, 'z' },
{ GETOPT_HELP_OPTION_DECL },
{ GETOPT_VERSION_OPTION_DECL },
{ NULL, 0, NULL, 0 }
@ -261,6 +265,10 @@ Print or check %s (%d-bit) checksums.\n\
else
fputs (_("\
-t, --text read in text mode (default)\n\
"), stdout);
fputs (_("\
-z, --zero end each output line with NUL, not newline,\n\
and disable file name escaping\n\
"), stdout);
fputs (_("\
\n\
@ -875,10 +883,10 @@ main (int argc, char **argv)
setvbuf (stdout, NULL, _IOLBF, 0);
#if HASH_ALGO_BLAKE2
const char* short_opts = "l:bctw";
const char* short_opts = "l:bctwz";
const char* b2_length_str = "";
#else
const char* short_opts = "bctw";
const char* short_opts = "bctwz";
#endif
while ((opt = getopt_long (argc, argv, short_opts, long_options, NULL)) != -1)
@ -930,6 +938,9 @@ main (int argc, char **argv)
prefix_tag = true;
binary = 1;
break;
case 'z':
delim = '\0';
break;
case_GETOPT_HELP_CHAR;
case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
default:
@ -964,6 +975,13 @@ main (int argc, char **argv)
usage (EXIT_FAILURE);
}
if (delim != '\n' && do_check)
{
error (0, 0, _("the --zero option is not supported when "
"verifying checksums"));
usage (EXIT_FAILURE);
}
if (prefix_tag && do_check)
{
error (0, 0, _("the --tag option is meaningless when "
@ -1043,7 +1061,8 @@ main (int argc, char **argv)
against old (hashed) outputs, in the presence of files
containing '\\' characters, we decided to not simplify the
output in this case. */
bool needs_escape = strchr (file, '\\') || strchr (file, '\n');
bool needs_escape = (strchr (file, '\\') || strchr (file, '\n'))
&& delim == '\n';
if (prefix_tag)
{
@ -1079,7 +1098,7 @@ main (int argc, char **argv)
print_filename (file, needs_escape);
}
putchar ('\n');
putchar (delim);
}
}
}

View File

@ -30,10 +30,12 @@ system ('touch', "a\nb") == 0
my $degenerate = "d41d8cd98f00b204e9800998ecf8427e";
my $t = '--text';
my $z = '--zero';
my @Tests =
(
['newline', $t, {IN=> {"a\nb"=> ''}}, {OUT=>"\\$degenerate a\\nb\n"}],
['zero', $z, {IN=> {"a\nb"=> ''}}, {OUT=>"$degenerate a\nb\0"}],
);
my $save_temps = $ENV{DEBUG};