From 04e8698fcca7d1e932bc54f5b60e1bbce2e87601 Mon Sep 17 00:00:00 2001 From: Avinal Kumar Date: Fri, 25 Oct 2024 15:48:27 +0530 Subject: [PATCH] stdio-common: Fix scanf parsing for NaN types [BZ #30647] The scanf family of functions like sscanf and fscanf currently ignore nan() and nan(n-char-sequence). This happens because __vfscanf_internal only checks for 'nan'. This commit adds support for all valid nan types i.e. nan, nan() and nan(n-char-sequence), where n-char-sequence can be [a-zA-Z0-9_]+, thus fixing the bug 30647. Any other representation of NaN should result in conversion error. New tests are also added to verify the correct parsing of NaN types for float, double and long double formats. Signed-off-by: Avinal Kumar Reviewed-by: Adhemerval Zanella --- stdio-common/Makefile | 1 + stdio-common/tst-scanf-nan.c | 83 +++++++++++++++++++++++++++++++++ stdio-common/vfscanf-internal.c | 46 +++++++++++++++++- 3 files changed, 129 insertions(+), 1 deletion(-) create mode 100644 stdio-common/tst-scanf-nan.c diff --git a/stdio-common/Makefile b/stdio-common/Makefile index 88105b3c1b..a166eb7cf8 100644 --- a/stdio-common/Makefile +++ b/stdio-common/Makefile @@ -261,6 +261,7 @@ tests := \ tst-scanf-binary-gnu89 \ tst-scanf-bz27650 \ tst-scanf-intn \ + tst-scanf-nan \ tst-scanf-round \ tst-scanf-to_inpunct \ tst-setvbuf1 \ diff --git a/stdio-common/tst-scanf-nan.c b/stdio-common/tst-scanf-nan.c new file mode 100644 index 0000000000..7450b37c4e --- /dev/null +++ b/stdio-common/tst-scanf-nan.c @@ -0,0 +1,83 @@ +/* Test scanf formats for nan, nan(), nan(n-char-sequence) types. + Copyright The GNU Toolchain Authors. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + +#include +#include + +#include + +#define CHECK_SCANF_RET(OK, STR, FMT, ...) \ + do \ + { \ + int ret = sscanf (STR, FMT, __VA_ARGS__); \ + TEST_VERIFY (ret == (OK)); \ + } \ + while (0) + +/* Valid nan types: + 1. nan + 2. nan() + 3. nan([a-zA-Z0-9_]+) + Any other nan format is invalid and should produce a conversion error. + The return value denotes the number of valid conversions. On conversion + error the rest of the input is discarded. */ +static int +do_test (void) +{ + int a; + float b; + double c; + long double d; + + /* All valid inputs. */ + CHECK_SCANF_RET (1, "nan", "%lf", &c); + CHECK_SCANF_RET (1, "nan()", "%lf", &c); + CHECK_SCANF_RET (1, "nan(12345)", "%lf", &c); + CHECK_SCANF_RET (2, "nan12", "%lf%d", &c, &a); + CHECK_SCANF_RET (2, "nan nan()", "%f%Lf", &b, &d); + CHECK_SCANF_RET (2, "nan nan(12345foo)", "%lf%Lf", &c, &d); + CHECK_SCANF_RET (3, "nan nan() 12.234", "%lf%Lf%f", &c, &d, &b); + CHECK_SCANF_RET (4, "nannan()nan(foo)1234", "%lf%f%Lf%d", &c, &b, &d, &a); + + /* Partially valid inputs. */ + CHECK_SCANF_RET (1, "nan( )", "%3lf", &c); + CHECK_SCANF_RET (1, "nan nan(", "%lf%f", &c, &b); + + /* Invalid inputs. */ + + /* Dangling parentheses. */ + CHECK_SCANF_RET (0, "nan(", "%lf", &c); + CHECK_SCANF_RET (0, "nan(123", "%lf", &c); + CHECK_SCANF_RET (0, "nan(12345", "%lf%d", &c, &a); + + /* Field width is not sufficient for valid conversion. */ + CHECK_SCANF_RET (0, "nan()", "%4Lf", &d); + CHECK_SCANF_RET (0, "nan(1", "%5lf", &c); + + /* Space is not a valid character. */ + CHECK_SCANF_RET (0, "nan( )", "%lf", &c); + CHECK_SCANF_RET (0, "nan( )12.34", "%Lf%f", &d, &b); + CHECK_SCANF_RET (0, "nan(12 foo)", "%f", &b); + + /* Period '.' is not a valid character. */ + CHECK_SCANF_RET (0, "nan(12.34) nan(FooBar)", "%lf%Lf", &c, &d); + + return 0; +} + +#include diff --git a/stdio-common/vfscanf-internal.c b/stdio-common/vfscanf-internal.c index 1b82deffa7..5f38f991cc 100644 --- a/stdio-common/vfscanf-internal.c +++ b/stdio-common/vfscanf-internal.c @@ -2028,7 +2028,51 @@ digits_extended_fail: if (width > 0) --width; char_buffer_add (&charbuf, c); - /* It is "nan". */ + /* It is at least "nan". Now we check for nan() and + nan(n-char-sequence). */ + if (width != 0 && inchar () != EOF) + { + if (c == L_('(')) + { + if (width > 0) + --width; + char_buffer_add (&charbuf, c); + /* A '(' was observed, check for a closing ')', there + may or may not be a n-char-sequence in between. We + have to check the longest prefix until there is a + conversion error or closing parenthesis. */ + do + { + if (__glibc_unlikely (width == 0 + || inchar () == EOF)) + { + /* Conversion error because we ran out of + characters. */ + conv_error (); + break; + } + if (!((c >= L_('0') && c <= L_('9')) + || (c >= L_('A') && c <= L_('Z')) + || (c >= L_('a') && c <= L_('z')) + || c == L_('_') || c == L_(')'))) + { + /* Invalid character was observed. Only valid + characters are [a-zA-Z0-9_] and ')'. */ + conv_error (); + break; + } + if (width > 0) + --width; + char_buffer_add (&charbuf, c); + } + while (c != L_(')')); + /* The loop only exits successfully when ')' is the + last character. */ + } + else + /* It is only 'nan'. */ + ungetc (c, s); + } goto scan_float; } else if (TOLOWER (c) == L_('i'))