gdb: consider null terminator for length arguments of value_cstring calls

This patch started when I looked at this bit in cli/cli-cmds.c:

    if (*(char **) cmd->var)
      return value_cstring (*(char **) cmd->var, strlen (*(char **) cmd->var),
			    builtin_type (gdbarch)->builtin_char);
    else
      return value_cstring ("", 1,
			    builtin_type (gdbarch)->builtin_char);

I found it odd that the first value_cstring call passes a length that
does not consider the null terminator of the C string, but second does.
I want to clarify the documentation of value_cstring and fix the one
that is wrong.

Debugging a little bit, I found that the first call is the wrong one.
Doing:

  (gdb) set args foo
  (gdb) print $_gdb_setting("args")
  $1 = "foo"

creates a struct value of code TYPE_CODE_ARRAY of size 3, which doesn't
have a null terminator.  That does not create a valid C string.  It is
however printed correctly by GDB, because the printing code makes sure
not to read past the value's length.

A way to expose the bug is to print each element of the created string,
including the null terminator.  Before:

    (gdb) set args foo
    (gdb) p $_gdb_setting("args")[3]
    no such vector element

After:

    (gdb) set args foo
    (gdb) p $_gdb_setting("args")[3]
    $1 = 0 '\000'

Another perhaps more convincing way of showing the bug is if the string
value is passed to an inferior function call;

    (gdb) print an_inferior_function($_gdb_setting("args))

The space allocate for the string in the inferior will not take into
account a null terminator (with the string "foo", 3 bytes will be
allocated).  If the inferior tries to read the string until the null
terminator, it will read past the allocated buffer.  Compiling the
inferior with AddressSanitizer makes that bad access obvious.

I found a few calls to value_cstring that were wrong, I fixed them
all, all exercised by the test.

The change in guile/scm-math.c maybe requires a bit of explanation.
According to the doc of gdbscm_scm_to_string, if don't pass a length
pointer, we get back a null-terminated string.  If we pass a length
pointer, we get a non-null-terminated string, but we get the length
separately.  Trying to pass "len + 1" to value_cstring would lead to GDB
reading past the allocated buffer, that is exactly of length `len`.  So
instead, don't pass a length pointer and use strlen on the result.

gdb.base/settings.exp and gdb.python/py-mi.exp required changes in some
expected outputs, because the array type created by $_gdb_setting_str
is now one element larger, to account for the null terminator.  I don't
think this change in user-visible behavior is a problem.

Change-Id: If8dd13cce55c70521e34e7c360139064b4e87496
This commit is contained in:
Simon Marchi 2021-07-13 11:31:42 -04:00 committed by Pedro Alves
parent f9e5d80cf7
commit 67ea24cb99
10 changed files with 258 additions and 19 deletions

View File

@ -2146,7 +2146,8 @@ value_from_setting (const cmd_list_element *cmd, struct gdbarch *gdbarch)
case var_filename: case var_filename:
case var_enum: case var_enum:
if (*(char **) cmd->var) if (*(char **) cmd->var)
return value_cstring (*(char **) cmd->var, strlen (*(char **) cmd->var), return value_cstring (*(char **) cmd->var,
strlen (*(char **) cmd->var) + 1,
builtin_type (gdbarch)->builtin_char); builtin_type (gdbarch)->builtin_char);
else else
return value_cstring ("", 1, return value_cstring ("", 1,
@ -2198,7 +2199,7 @@ str_value_from_setting (const cmd_list_element *cmd, struct gdbarch *gdbarch)
{ {
std::string cmd_val = get_setshow_command_value_string (cmd); std::string cmd_val = get_setshow_command_value_string (cmd);
return value_cstring (cmd_val.c_str (), cmd_val.size (), return value_cstring (cmd_val.c_str (), cmd_val.size () + 1,
builtin_type (gdbarch)->builtin_char); builtin_type (gdbarch)->builtin_char);
} }
@ -2212,7 +2213,8 @@ str_value_from_setting (const cmd_list_element *cmd, struct gdbarch *gdbarch)
escaping quotes. So, we directly use the cmd->var string value, escaping quotes. So, we directly use the cmd->var string value,
similarly to the value_from_setting code for these cases. */ similarly to the value_from_setting code for these cases. */
if (*(char **) cmd->var) if (*(char **) cmd->var)
return value_cstring (*(char **) cmd->var, strlen (*(char **) cmd->var), return value_cstring (*(char **) cmd->var,
strlen (*(char **) cmd->var) + 1,
builtin_type (gdbarch)->builtin_char); builtin_type (gdbarch)->builtin_char);
else else
return value_cstring ("", 1, return value_cstring ("", 1,

View File

@ -776,8 +776,6 @@ vlscm_convert_typed_value_from_scheme (const char *func_name,
} }
else if (scm_is_string (obj)) else if (scm_is_string (obj))
{ {
size_t len;
if (type != NULL) if (type != NULL)
{ {
except_scm = gdbscm_make_misc_error (func_name, type_arg_pos, except_scm = gdbscm_make_misc_error (func_name, type_arg_pos,
@ -789,12 +787,12 @@ vlscm_convert_typed_value_from_scheme (const char *func_name,
{ {
/* TODO: Provide option to specify conversion strategy. */ /* TODO: Provide option to specify conversion strategy. */
gdb::unique_xmalloc_ptr<char> s gdb::unique_xmalloc_ptr<char> s
= gdbscm_scm_to_string (obj, &len, = gdbscm_scm_to_string (obj, nullptr,
target_charset (gdbarch), target_charset (gdbarch),
0 /*non-strict*/, 0 /*non-strict*/,
&except_scm); &except_scm);
if (s != NULL) if (s != NULL)
value = value_cstring (s.get (), len, value = value_cstring (s.get (), strlen (s.get ()) + 1,
language_string_char_type (language, language_string_char_type (language,
gdbarch)); gdbarch));
else else

View File

@ -1907,7 +1907,7 @@ convert_value_from_python (PyObject *obj)
gdb::unique_xmalloc_ptr<char> s gdb::unique_xmalloc_ptr<char> s
= python_string_to_target_string (obj); = python_string_to_target_string (obj);
if (s != NULL) if (s != NULL)
value = value_cstring (s.get (), strlen (s.get ()), value = value_cstring (s.get (), strlen (s.get ()) + 1,
builtin_type_pychar); builtin_type_pychar);
} }
else if (PyObject_TypeCheck (obj, &value_object_type)) else if (PyObject_TypeCheck (obj, &value_object_type))

View File

@ -0,0 +1,32 @@
/* This testcase is part of GDB, the GNU debugger.
Copyright 2021 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
static void
trace_me (void)
{}
static void
end (void)
{}
int
main (void)
{
trace_me ();
end ();
return 0;
}

View File

@ -0,0 +1,198 @@
# Copyright 2021 Free Software Foundation, Inc.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# Test that string values are correctly allocated inside GDB when doing
# various operations that yield strings.
#
# The issue that lead to this test was a missing NULL terminator in the C-string
# values. We verify that we can print the null terminator of these strings.
load_lib "trace-support.exp"
load_lib "gdb-guile.exp"
standard_testfile
if {[build_executable "failed to prepare" $testfile $srcfile ]} {
return
}
# Check that the string contained in the convenienced variable $v is
# EXPECTED_STR.
#
# In particular, check that the null terminator is there and that we can't
# access a character past the end of the string.
proc check_v_string { expected_str } {
set len [string length $expected_str]
for { set i 0 } { $i < $len } { incr i } {
set c [string index $expected_str $i]
gdb_test "print \$v\[$i\]" "= $::decimal '$c'"
}
# Check that the string ends with a null terminator.
gdb_test "print \$v\[$i\]" {= 0 '\\000'}
# Check that we can't access a character after the end of the string.
incr i
gdb_test "print \$v\[$i\]" "no such vector element"
}
# Test with string values made by $_gdb_setting & co.
proc_with_prefix test_setting { } {
clean_restart
# This is an internal GDB implementation detail, but the variable backing a
# string setting starts as nullptr (unless explicitly initialized at startup).
# When assigning an empty value, the variable then points to an empty string.
# Test both cases, as it triggers different code paths (in addition to a
# non-empty value).
#
# Use "set trace-user" and "maintenance set test-settings string" as they are
# both not initialized at startup.
with_test_prefix "user setting" {
with_test_prefix "not set" {
foreach_with_prefix conv_func {$_gdb_setting $_gdb_setting_str} {
gdb_test_no_output "set \$v = ${conv_func}(\"trace-user\")"
check_v_string ""
}
}
with_test_prefix "set to empty" {
gdb_test "set trace-user"
foreach_with_prefix conv_func {$_gdb_setting $_gdb_setting_str} {
gdb_test_no_output "set \$v = ${conv_func}(\"trace-user\")"
check_v_string ""
}
}
with_test_prefix "set" {
gdb_test "set trace-user poulet"
foreach_with_prefix conv_func {$_gdb_setting $_gdb_setting_str} {
gdb_test_no_output {set $v = $_gdb_setting("trace-user")}
check_v_string "poulet"
}
}
}
with_test_prefix "maintenance setting" {
with_test_prefix "not set" {
foreach_with_prefix conv_func {$_gdb_maint_setting $_gdb_maint_setting_str} {
gdb_test_no_output "set \$v = ${conv_func}(\"test-settings string\")"
check_v_string ""
}
}
with_test_prefix "set to empty" {
gdb_test "maintenance set test-settings string"
foreach_with_prefix conv_func {$_gdb_maint_setting $_gdb_maint_setting_str} {
gdb_test_no_output "set \$v = ${conv_func}(\"test-settings string\")"
check_v_string ""
}
}
with_test_prefix "set" {
gdb_test "maintenance set test-settings string perchaude"
foreach_with_prefix conv_func {$_gdb_maint_setting $_gdb_maint_setting_str} {
gdb_test_no_output "set \$v = ${conv_func}(\"test-settings string\")"
check_v_string "perchaude"
}
}
}
# Test with a non-string setting, this tests yet another code path.
with_test_prefix "integer setting" {
gdb_test_no_output {set $v = $_gdb_setting_str("remotetimeout")}
check_v_string "2"
}
}
# Test with a string value created by gdb.Value in Python.
proc_with_prefix test_python_value { } {
clean_restart
if {[skip_python_tests]} {
untested "skipping test_python_value"
return
}
gdb_test_no_output "python gdb.set_convenience_variable(\"v\", \"bar\")" \
"set convenience var"
check_v_string "bar"
}
# Test with a string value created by make-value in Guile.
proc_with_prefix test_guile_value { } {
clean_restart
if {[skip_guile_tests]} {
untested "skipping test_guile_value"
return
}
# We can't set a convenience var from Guile, but we can append to history.
# Do that, then transfer to a convenience var with a CLI command.
gdb_test_no_output "guile (use-modules (gdb))"
gdb_test_multiple "guile (history-append! (make-value \"foo\"))" "make value" {
-re -wrap "($::decimal)" {
set histnum $expect_out(1,string)
}
}
gdb_test_no_output "set \$v = \$$histnum"
check_v_string "foo"
}
# Test with a string value coming from a string internal var. The only internal
# vars of this type, at the time of writing, are $trace_func and $trace_file.
# They both require inspecting a trace frame. So if the target is capable start
# tracing, record one trace frame, and use $trace_func.
proc_with_prefix test_internal_var { } {
if {![gdb_trace_common_supports_arch]} {
unsupported "arch does not support trace"
return
}
clean_restart $::binfile
if {![runto_main]} {
fail "could not run to main"
return
}
if {![gdb_target_supports_trace]} {
unsupported "target does not support trace"
return
}
gdb_test "break end" "Breakpoint $::decimal at $::hex.*"
gdb_test "trace trace_me" "Tracepoint $::decimal at $::hex.*"
gdb_test_no_output "tstart"
gdb_test "continue" "Breakpoint $::decimal, end.*"
gdb_test_no_output "tstop"
gdb_test "tfind" "Found trace frame 0, tracepoint $::decimal.*"
gdb_test_no_output "set \$v = \$trace_func"
gdb_test "tfind none" "No longer looking at any trace frame.*"
check_v_string "trace_me"
}
test_setting
test_python_value
test_guile_value
test_internal_var

View File

@ -498,7 +498,7 @@ proc_with_prefix test-enum {} {
gdb_test_no_output "$set_cmd zzz" gdb_test_no_output "$set_cmd zzz"
show_setting "$show_cmd" "zzz" 0 "yyy" show_setting "$show_cmd" "zzz" 0 "yyy"
check_type "test-settings enum" "type = char \\\[3\\\]" check_type "test-settings enum" "type = char \\\[4\\\]"
test_gdb_complete_multiple "$set_cmd " "" "" { test_gdb_complete_multiple "$set_cmd " "" "" {
"xxx" "xxx"

View File

@ -289,7 +289,7 @@ mi_create_dynamic_varobj nstype2 nstype2 1 \
"create nstype2 varobj" "create nstype2 varobj"
mi_list_varobj_children nstype2 { mi_list_varobj_children nstype2 {
{ {nstype2.<error at 0>} {<error at 0>} 6 {char \[6\]} } { {nstype2.<error at 0>} {<error at 0>} 7 {char \[7\]} }
} "list children after setting exception flag" } "list children after setting exception flag"
mi_create_varobj me me \ mi_create_varobj me me \

View File

@ -1747,6 +1747,8 @@ value_array (int lowbound, int highbound, struct value **elemvec)
return val; return val;
} }
/* See value.h. */
struct value * struct value *
value_cstring (const char *ptr, ssize_t len, struct type *char_type) value_cstring (const char *ptr, ssize_t len, struct type *char_type)
{ {
@ -1761,14 +1763,7 @@ value_cstring (const char *ptr, ssize_t len, struct type *char_type)
return val; return val;
} }
/* Create a value for a string constant by allocating space in the /* See value.h. */
inferior, copying the data into that space, and returning the
address with type TYPE_CODE_STRING. PTR points to the string
constant data; LEN is number of characters.
Note that string types are like array of char types with a lower
bound of zero and an upper bound of LEN - 1. Also note that the
string may contain embedded null bytes. */
struct value * struct value *
value_string (const char *ptr, ssize_t len, struct type *char_type) value_string (const char *ptr, ssize_t len, struct type *char_type)

View File

@ -2188,7 +2188,7 @@ value_of_internalvar (struct gdbarch *gdbarch, struct internalvar *var)
break; break;
case INTERNALVAR_STRING: case INTERNALVAR_STRING:
val = value_cstring (var->u.string, strlen (var->u.string), val = value_cstring (var->u.string, strlen (var->u.string) + 1,
builtin_type (gdbarch)->builtin_char); builtin_type (gdbarch)->builtin_char);
break; break;

View File

@ -782,8 +782,22 @@ class scoped_value_mark
const struct value *m_value; const struct value *m_value;
}; };
/* Create a not_lval value with type TYPE_CODE_ARRAY.
PTR points to the string data; LEN is the length of the string including the
NULL terminator. */
extern struct value *value_cstring (const char *ptr, ssize_t len, extern struct value *value_cstring (const char *ptr, ssize_t len,
struct type *char_type); struct type *char_type);
/* Create a not_lval value with type TYPE_CODE_STRING.
PTR points to the string data; LEN is number of characters.
Note that string types are like array of char types with a lower
bound of zero and an upper bound of LEN - 1. Also note that the
string may contain embedded null bytes. */
extern struct value *value_string (const char *ptr, ssize_t len, extern struct value *value_string (const char *ptr, ssize_t len,
struct type *char_type); struct type *char_type);