mirror of
https://github.com/edk2-porting/linux-next.git
synced 2024-12-16 17:23:55 +08:00
perf symbols: Add Rust demangling
Rust demangling is another step after bfd demangling. Add a diagnosis to identify mangled Rust symbols based on the hash that the Rust mangler appends as the last path component, as well as other characteristics. Add a demangler to reconstruct the original symbol. Committer notes: How I tested it: Enabled COPR on Fedora 24 and then installed the 'rust-binary' package, with it: $ cat src/main.rs fn main() { println!("Hello, world!"); } $ cat Cargo.toml [package] name = "hello_world" version = "0.0.1" authors = [ "Arnaldo Carvalho de Melo <acme@kernel.org>" ] $ perf record cargo bench Compiling hello_world v0.0.1 (file:///home/acme/projects/hello_world) Running target/release/hello_world-d4b9dab4b2a47d75 running 0 tests test result: ok. 0 passed; 0 failed; 0 ignored; 0 measured [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.096 MB perf.data (1457 samples) ] $ Before this patch: $ perf report --stdio --dsos librbml-e8edd0fd.so # dso: librbml-e8edd0fd.so # # Total Lost Samples: 0 # # Samples: 1K of event 'cycles:u' # Event count (approx.): 979599126 # # Overhead Command Symbol # ........ ....... ............................................................................................................. # 1.78% rustc [.] rbml::reader::maybe_get_doc::hb9d387df6024b15b 1.50% rustc [.] _$LT$reader..DocsIterator$LT$$u27$a$GT$$u20$as$u20$std..iter..Iterator$GT$::next::hd9af9e60d79a35c8 1.20% rustc [.] rbml::reader::doc_at::hc88107fba445af31 0.46% rustc [.] _$LT$reader..TaggedDocsIterator$LT$$u27$a$GT$$u20$as$u20$std..iter..Iterator$GT$::next::h0cb40e696e4bb489 0.35% rustc [.] rbml::reader::Decoder::_next_int::h66eef7825a398bc3 0.29% rustc [.] rbml::reader::Decoder::_next_sub::h8e5266005580b836 0.15% rustc [.] rbml::reader::get_doc::h094521c645459139 0.14% rustc [.] _$LT$reader..Decoder$LT$$u27$doc$GT$$u20$as$u20$serialize..Decoder$GT$::read_u32::h0acea2fff9669327 0.07% rustc [.] rbml::reader::Decoder::next_doc::h6714d469c9dfaf91 0.07% rustc [.] _ZN4rbml6reader10doc_as_u6417h930b740aa94f1d3aE@plt 0.06% rustc [.] _fini $ After: $ perf report --stdio --dsos librbml-e8edd0fd.so # dso: librbml-e8edd0fd.so # # Total Lost Samples: 0 # # Samples: 1K of event 'cycles:u' # Event count (approx.): 979599126 # # Overhead Command Symbol # ........ ....... ................................................................. # 1.78% rustc [.] rbml::reader::maybe_get_doc 1.50% rustc [.] <reader::DocsIterator<'a> as std::iter::Iterator>::next 1.20% rustc [.] rbml::reader::doc_at 0.46% rustc [.] <reader::TaggedDocsIterator<'a> as std::iter::Iterator>::next 0.35% rustc [.] rbml::reader::Decoder::_next_int 0.29% rustc [.] rbml::reader::Decoder::_next_sub 0.15% rustc [.] rbml::reader::get_doc 0.14% rustc [.] <reader::Decoder<'doc> as serialize::Decoder>::read_u32 0.07% rustc [.] rbml::reader::Decoder::next_doc 0.07% rustc [.] _ZN4rbml6reader10doc_as_u6417h930b740aa94f1d3aE@plt 0.06% rustc [.] _fini $ Signed-off-by: David Tolnay <dtolnay@gmail.com> Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/5780B7FA.3030602@gmail.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
1c1a3a4729
commit
cae15db749
@ -113,6 +113,7 @@ libperf-y += scripting-engines/
|
||||
libperf-$(CONFIG_ZLIB) += zlib.o
|
||||
libperf-$(CONFIG_LZMA) += lzma.o
|
||||
libperf-y += demangle-java.o
|
||||
libperf-y += demangle-rust.o
|
||||
|
||||
ifdef CONFIG_JITDUMP
|
||||
libperf-$(CONFIG_LIBELF) += jitdump.o
|
||||
|
269
tools/perf/util/demangle-rust.c
Normal file
269
tools/perf/util/demangle-rust.c
Normal file
@ -0,0 +1,269 @@
|
||||
#include <string.h>
|
||||
#include "util.h"
|
||||
#include "debug.h"
|
||||
|
||||
#include "demangle-rust.h"
|
||||
|
||||
/*
|
||||
* Mangled Rust symbols look like this:
|
||||
*
|
||||
* _$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a
|
||||
*
|
||||
* The original symbol is:
|
||||
*
|
||||
* <std::sys::fd::FileDesc as core::ops::Drop>::drop
|
||||
*
|
||||
* The last component of the path is a 64-bit hash in lowercase hex, prefixed
|
||||
* with "h". Rust does not have a global namespace between crates, an illusion
|
||||
* which Rust maintains by using the hash to distinguish things that would
|
||||
* otherwise have the same symbol.
|
||||
*
|
||||
* Any path component not starting with a XID_Start character is prefixed with
|
||||
* "_".
|
||||
*
|
||||
* The following escape sequences are used:
|
||||
*
|
||||
* "," => $C$
|
||||
* "@" => $SP$
|
||||
* "*" => $BP$
|
||||
* "&" => $RF$
|
||||
* "<" => $LT$
|
||||
* ">" => $GT$
|
||||
* "(" => $LP$
|
||||
* ")" => $RP$
|
||||
* " " => $u20$
|
||||
* "'" => $u27$
|
||||
* "[" => $u5b$
|
||||
* "]" => $u5d$
|
||||
* "~" => $u7e$
|
||||
*
|
||||
* A double ".." means "::" and a single "." means "-".
|
||||
*
|
||||
* The only characters allowed in the mangled symbol are a-zA-Z0-9 and _.:$
|
||||
*/
|
||||
|
||||
static const char *hash_prefix = "::h";
|
||||
static const size_t hash_prefix_len = 3;
|
||||
static const size_t hash_len = 16;
|
||||
|
||||
static bool is_prefixed_hash(const char *start);
|
||||
static bool looks_like_rust(const char *sym, size_t len);
|
||||
static bool unescape(const char **in, char **out, const char *seq, char value);
|
||||
|
||||
/*
|
||||
* INPUT:
|
||||
* sym: symbol that has been through BFD-demangling
|
||||
*
|
||||
* This function looks for the following indicators:
|
||||
*
|
||||
* 1. The hash must consist of "h" followed by 16 lowercase hex digits.
|
||||
*
|
||||
* 2. As a sanity check, the hash must use between 5 and 15 of the 16 possible
|
||||
* hex digits. This is true of 99.9998% of hashes so once in your life you
|
||||
* may see a false negative. The point is to notice path components that
|
||||
* could be Rust hashes but are probably not, like "haaaaaaaaaaaaaaaa". In
|
||||
* this case a false positive (non-Rust symbol has an important path
|
||||
* component removed because it looks like a Rust hash) is worse than a
|
||||
* false negative (the rare Rust symbol is not demangled) so this sets the
|
||||
* balance in favor of false negatives.
|
||||
*
|
||||
* 3. There must be no characters other than a-zA-Z0-9 and _.:$
|
||||
*
|
||||
* 4. There must be no unrecognized $-sign sequences.
|
||||
*
|
||||
* 5. There must be no sequence of three or more dots in a row ("...").
|
||||
*/
|
||||
bool
|
||||
rust_is_mangled(const char *sym)
|
||||
{
|
||||
size_t len, len_without_hash;
|
||||
|
||||
if (!sym)
|
||||
return false;
|
||||
|
||||
len = strlen(sym);
|
||||
if (len <= hash_prefix_len + hash_len)
|
||||
/* Not long enough to contain "::h" + hash + something else */
|
||||
return false;
|
||||
|
||||
len_without_hash = len - (hash_prefix_len + hash_len);
|
||||
if (!is_prefixed_hash(sym + len_without_hash))
|
||||
return false;
|
||||
|
||||
return looks_like_rust(sym, len_without_hash);
|
||||
}
|
||||
|
||||
/*
|
||||
* A hash is the prefix "::h" followed by 16 lowercase hex digits. The hex
|
||||
* digits must comprise between 5 and 15 (inclusive) distinct digits.
|
||||
*/
|
||||
static bool is_prefixed_hash(const char *str)
|
||||
{
|
||||
const char *end;
|
||||
bool seen[16];
|
||||
size_t i;
|
||||
int count;
|
||||
|
||||
if (strncmp(str, hash_prefix, hash_prefix_len))
|
||||
return false;
|
||||
str += hash_prefix_len;
|
||||
|
||||
memset(seen, false, sizeof(seen));
|
||||
for (end = str + hash_len; str < end; str++)
|
||||
if (*str >= '0' && *str <= '9')
|
||||
seen[*str - '0'] = true;
|
||||
else if (*str >= 'a' && *str <= 'f')
|
||||
seen[*str - 'a' + 10] = true;
|
||||
else
|
||||
return false;
|
||||
|
||||
/* Count how many distinct digits seen */
|
||||
count = 0;
|
||||
for (i = 0; i < 16; i++)
|
||||
if (seen[i])
|
||||
count++;
|
||||
|
||||
return count >= 5 && count <= 15;
|
||||
}
|
||||
|
||||
static bool looks_like_rust(const char *str, size_t len)
|
||||
{
|
||||
const char *end = str + len;
|
||||
|
||||
while (str < end)
|
||||
switch (*str) {
|
||||
case '$':
|
||||
if (!strncmp(str, "$C$", 3))
|
||||
str += 3;
|
||||
else if (!strncmp(str, "$SP$", 4)
|
||||
|| !strncmp(str, "$BP$", 4)
|
||||
|| !strncmp(str, "$RF$", 4)
|
||||
|| !strncmp(str, "$LT$", 4)
|
||||
|| !strncmp(str, "$GT$", 4)
|
||||
|| !strncmp(str, "$LP$", 4)
|
||||
|| !strncmp(str, "$RP$", 4))
|
||||
str += 4;
|
||||
else if (!strncmp(str, "$u20$", 5)
|
||||
|| !strncmp(str, "$u27$", 5)
|
||||
|| !strncmp(str, "$u5b$", 5)
|
||||
|| !strncmp(str, "$u5d$", 5)
|
||||
|| !strncmp(str, "$u7e$", 5))
|
||||
str += 5;
|
||||
else
|
||||
return false;
|
||||
break;
|
||||
case '.':
|
||||
/* Do not allow three or more consecutive dots */
|
||||
if (!strncmp(str, "...", 3))
|
||||
return false;
|
||||
/* Fall through */
|
||||
case 'a' ... 'z':
|
||||
case 'A' ... 'Z':
|
||||
case '0' ... '9':
|
||||
case '_':
|
||||
case ':':
|
||||
str++;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* INPUT:
|
||||
* sym: symbol for which rust_is_mangled(sym) returns true
|
||||
*
|
||||
* The input is demangled in-place because the mangled name is always longer
|
||||
* than the demangled one.
|
||||
*/
|
||||
void
|
||||
rust_demangle_sym(char *sym)
|
||||
{
|
||||
const char *in;
|
||||
char *out;
|
||||
const char *end;
|
||||
|
||||
if (!sym)
|
||||
return;
|
||||
|
||||
in = sym;
|
||||
out = sym;
|
||||
end = sym + strlen(sym) - (hash_prefix_len + hash_len);
|
||||
|
||||
while (in < end)
|
||||
switch (*in) {
|
||||
case '$':
|
||||
if (!(unescape(&in, &out, "$C$", ',')
|
||||
|| unescape(&in, &out, "$SP$", '@')
|
||||
|| unescape(&in, &out, "$BP$", '*')
|
||||
|| unescape(&in, &out, "$RF$", '&')
|
||||
|| unescape(&in, &out, "$LT$", '<')
|
||||
|| unescape(&in, &out, "$GT$", '>')
|
||||
|| unescape(&in, &out, "$LP$", '(')
|
||||
|| unescape(&in, &out, "$RP$", ')')
|
||||
|| unescape(&in, &out, "$u20$", ' ')
|
||||
|| unescape(&in, &out, "$u27$", '\'')
|
||||
|| unescape(&in, &out, "$u5b$", '[')
|
||||
|| unescape(&in, &out, "$u5d$", ']')
|
||||
|| unescape(&in, &out, "$u7e$", '~'))) {
|
||||
pr_err("demangle-rust: unexpected escape sequence");
|
||||
goto done;
|
||||
}
|
||||
break;
|
||||
case '_':
|
||||
/*
|
||||
* If this is the start of a path component and the next
|
||||
* character is an escape sequence, ignore the
|
||||
* underscore. The mangler inserts an underscore to make
|
||||
* sure the path component begins with a XID_Start
|
||||
* character.
|
||||
*/
|
||||
if ((in == sym || in[-1] == ':') && in[1] == '$')
|
||||
in++;
|
||||
else
|
||||
*out++ = *in++;
|
||||
break;
|
||||
case '.':
|
||||
if (in[1] == '.') {
|
||||
/* ".." becomes "::" */
|
||||
*out++ = ':';
|
||||
*out++ = ':';
|
||||
in += 2;
|
||||
} else {
|
||||
/* "." becomes "-" */
|
||||
*out++ = '-';
|
||||
in++;
|
||||
}
|
||||
break;
|
||||
case 'a' ... 'z':
|
||||
case 'A' ... 'Z':
|
||||
case '0' ... '9':
|
||||
case ':':
|
||||
*out++ = *in++;
|
||||
break;
|
||||
default:
|
||||
pr_err("demangle-rust: unexpected character '%c' in symbol\n",
|
||||
*in);
|
||||
goto done;
|
||||
}
|
||||
|
||||
done:
|
||||
*out = '\0';
|
||||
}
|
||||
|
||||
static bool unescape(const char **in, char **out, const char *seq, char value)
|
||||
{
|
||||
size_t len = strlen(seq);
|
||||
|
||||
if (strncmp(*in, seq, len))
|
||||
return false;
|
||||
|
||||
**out = value;
|
||||
|
||||
*in += len;
|
||||
*out += 1;
|
||||
|
||||
return true;
|
||||
}
|
7
tools/perf/util/demangle-rust.h
Normal file
7
tools/perf/util/demangle-rust.h
Normal file
@ -0,0 +1,7 @@
|
||||
#ifndef __PERF_DEMANGLE_RUST
|
||||
#define __PERF_DEMANGLE_RUST 1
|
||||
|
||||
bool rust_is_mangled(const char *str);
|
||||
void rust_demangle_sym(char *str);
|
||||
|
||||
#endif /* __PERF_DEMANGLE_RUST */
|
@ -7,6 +7,7 @@
|
||||
|
||||
#include "symbol.h"
|
||||
#include "demangle-java.h"
|
||||
#include "demangle-rust.h"
|
||||
#include "machine.h"
|
||||
#include "vdso.h"
|
||||
#include <symbol/kallsyms.h>
|
||||
@ -1081,6 +1082,13 @@ new_symbol:
|
||||
demangled = bfd_demangle(NULL, elf_name, demangle_flags);
|
||||
if (demangled == NULL)
|
||||
demangled = java_demangle_sym(elf_name, JAVA_DEMANGLE_NORET);
|
||||
else if (rust_is_mangled(demangled))
|
||||
/*
|
||||
* Input to Rust demangling is the BFD-demangled
|
||||
* name which it Rust-demangles in place.
|
||||
*/
|
||||
rust_demangle_sym(demangled);
|
||||
|
||||
if (demangled != NULL)
|
||||
elf_name = demangled;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user