mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2025-01-27 16:25:32 +08:00
fadump: Convert firmware-assisted cpu state dump data into elf notes.
When registered for firmware assisted dump on powerpc, firmware preserves the registers for the active CPUs during a system crash. This patch reads the cpu register data stored in Firmware-assisted dump format (except for crashing cpu) and converts it into elf notes and updates the PT_NOTE program header accordingly. The exact register state for crashing cpu is saved to fadump crash info structure in scratch area during crash_fadump() and read during second kernel boot. Signed-off-by: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
This commit is contained in:
parent
2df173d9e8
commit
ebaeb5ae24
@ -65,6 +65,18 @@
|
||||
/* Dump status flag */
|
||||
#define FADUMP_ERROR_FLAG 0x2000
|
||||
|
||||
#define FADUMP_CPU_ID_MASK ((1UL << 32) - 1)
|
||||
|
||||
#define CPU_UNKNOWN (~((u32)0))
|
||||
|
||||
/* Utility macros */
|
||||
#define SKIP_TO_NEXT_CPU(reg_entry) \
|
||||
({ \
|
||||
while (reg_entry->reg_id != REG_ID("CPUEND")) \
|
||||
reg_entry++; \
|
||||
reg_entry++; \
|
||||
})
|
||||
|
||||
/* Kernel Dump section info */
|
||||
struct fadump_section {
|
||||
u32 request_flag;
|
||||
@ -119,6 +131,9 @@ struct fw_dump {
|
||||
unsigned long reserve_bootvar;
|
||||
|
||||
unsigned long fadumphdr_addr;
|
||||
unsigned long cpu_notes_buf;
|
||||
unsigned long cpu_notes_buf_size;
|
||||
|
||||
int ibm_configure_kernel_dump;
|
||||
|
||||
unsigned long fadump_enabled:1;
|
||||
@ -143,13 +158,40 @@ static inline u64 str_to_u64(const char *str)
|
||||
return val;
|
||||
}
|
||||
#define STR_TO_HEX(x) str_to_u64(x)
|
||||
#define REG_ID(x) str_to_u64(x)
|
||||
|
||||
#define FADUMP_CRASH_INFO_MAGIC STR_TO_HEX("FADMPINF")
|
||||
#define REGSAVE_AREA_MAGIC STR_TO_HEX("REGSAVE")
|
||||
|
||||
/* The firmware-assisted dump format.
|
||||
*
|
||||
* The register save area is an area in the partition's memory used to preserve
|
||||
* the register contents (CPU state data) for the active CPUs during a firmware
|
||||
* assisted dump. The dump format contains register save area header followed
|
||||
* by register entries. Each list of registers for a CPU starts with
|
||||
* "CPUSTRT" and ends with "CPUEND".
|
||||
*/
|
||||
|
||||
/* Register save area header. */
|
||||
struct fadump_reg_save_area_header {
|
||||
u64 magic_number;
|
||||
u32 version;
|
||||
u32 num_cpu_offset;
|
||||
};
|
||||
|
||||
/* Register entry. */
|
||||
struct fadump_reg_entry {
|
||||
u64 reg_id;
|
||||
u64 reg_value;
|
||||
};
|
||||
|
||||
/* fadump crash info structure */
|
||||
struct fadump_crash_info_header {
|
||||
u64 magic_number;
|
||||
u64 elfcorehdr_addr;
|
||||
u32 crashing_cpu;
|
||||
struct pt_regs regs;
|
||||
struct cpumask cpu_online_mask;
|
||||
};
|
||||
|
||||
/* Crash memory ranges */
|
||||
@ -165,7 +207,9 @@ extern int early_init_dt_scan_fw_dump(unsigned long node,
|
||||
extern int fadump_reserve_mem(void);
|
||||
extern int setup_fadump(void);
|
||||
extern int is_fadump_active(void);
|
||||
extern void crash_fadump(struct pt_regs *, const char *);
|
||||
#else /* CONFIG_FA_DUMP */
|
||||
static inline int is_fadump_active(void) { return 0; }
|
||||
static inline void crash_fadump(struct pt_regs *regs, const char *str) { }
|
||||
#endif
|
||||
#endif
|
||||
|
@ -240,6 +240,7 @@ static unsigned long get_fadump_area_size(void)
|
||||
size += fw_dump.boot_memory_size;
|
||||
size += sizeof(struct fadump_crash_info_header);
|
||||
size += sizeof(struct elfhdr); /* ELF core header.*/
|
||||
size += sizeof(struct elf_phdr); /* place holder for cpu notes */
|
||||
/* Program headers for crash memory regions. */
|
||||
size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2);
|
||||
|
||||
@ -393,6 +394,285 @@ static void register_fw_dump(struct fadump_mem_struct *fdm)
|
||||
}
|
||||
}
|
||||
|
||||
void crash_fadump(struct pt_regs *regs, const char *str)
|
||||
{
|
||||
struct fadump_crash_info_header *fdh = NULL;
|
||||
|
||||
if (!fw_dump.dump_registered || !fw_dump.fadumphdr_addr)
|
||||
return;
|
||||
|
||||
fdh = __va(fw_dump.fadumphdr_addr);
|
||||
crashing_cpu = smp_processor_id();
|
||||
fdh->crashing_cpu = crashing_cpu;
|
||||
crash_save_vmcoreinfo();
|
||||
|
||||
if (regs)
|
||||
fdh->regs = *regs;
|
||||
else
|
||||
ppc_save_regs(&fdh->regs);
|
||||
|
||||
fdh->cpu_online_mask = *cpu_online_mask;
|
||||
|
||||
/* Call ibm,os-term rtas call to trigger firmware assisted dump */
|
||||
rtas_os_term((char *)str);
|
||||
}
|
||||
|
||||
#define GPR_MASK 0xffffff0000000000
|
||||
static inline int fadump_gpr_index(u64 id)
|
||||
{
|
||||
int i = -1;
|
||||
char str[3];
|
||||
|
||||
if ((id & GPR_MASK) == REG_ID("GPR")) {
|
||||
/* get the digits at the end */
|
||||
id &= ~GPR_MASK;
|
||||
id >>= 24;
|
||||
str[2] = '\0';
|
||||
str[1] = id & 0xff;
|
||||
str[0] = (id >> 8) & 0xff;
|
||||
sscanf(str, "%d", &i);
|
||||
if (i > 31)
|
||||
i = -1;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
static inline void fadump_set_regval(struct pt_regs *regs, u64 reg_id,
|
||||
u64 reg_val)
|
||||
{
|
||||
int i;
|
||||
|
||||
i = fadump_gpr_index(reg_id);
|
||||
if (i >= 0)
|
||||
regs->gpr[i] = (unsigned long)reg_val;
|
||||
else if (reg_id == REG_ID("NIA"))
|
||||
regs->nip = (unsigned long)reg_val;
|
||||
else if (reg_id == REG_ID("MSR"))
|
||||
regs->msr = (unsigned long)reg_val;
|
||||
else if (reg_id == REG_ID("CTR"))
|
||||
regs->ctr = (unsigned long)reg_val;
|
||||
else if (reg_id == REG_ID("LR"))
|
||||
regs->link = (unsigned long)reg_val;
|
||||
else if (reg_id == REG_ID("XER"))
|
||||
regs->xer = (unsigned long)reg_val;
|
||||
else if (reg_id == REG_ID("CR"))
|
||||
regs->ccr = (unsigned long)reg_val;
|
||||
else if (reg_id == REG_ID("DAR"))
|
||||
regs->dar = (unsigned long)reg_val;
|
||||
else if (reg_id == REG_ID("DSISR"))
|
||||
regs->dsisr = (unsigned long)reg_val;
|
||||
}
|
||||
|
||||
static struct fadump_reg_entry*
|
||||
fadump_read_registers(struct fadump_reg_entry *reg_entry, struct pt_regs *regs)
|
||||
{
|
||||
memset(regs, 0, sizeof(struct pt_regs));
|
||||
|
||||
while (reg_entry->reg_id != REG_ID("CPUEND")) {
|
||||
fadump_set_regval(regs, reg_entry->reg_id,
|
||||
reg_entry->reg_value);
|
||||
reg_entry++;
|
||||
}
|
||||
reg_entry++;
|
||||
return reg_entry;
|
||||
}
|
||||
|
||||
static u32 *fadump_append_elf_note(u32 *buf, char *name, unsigned type,
|
||||
void *data, size_t data_len)
|
||||
{
|
||||
struct elf_note note;
|
||||
|
||||
note.n_namesz = strlen(name) + 1;
|
||||
note.n_descsz = data_len;
|
||||
note.n_type = type;
|
||||
memcpy(buf, ¬e, sizeof(note));
|
||||
buf += (sizeof(note) + 3)/4;
|
||||
memcpy(buf, name, note.n_namesz);
|
||||
buf += (note.n_namesz + 3)/4;
|
||||
memcpy(buf, data, note.n_descsz);
|
||||
buf += (note.n_descsz + 3)/4;
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
static void fadump_final_note(u32 *buf)
|
||||
{
|
||||
struct elf_note note;
|
||||
|
||||
note.n_namesz = 0;
|
||||
note.n_descsz = 0;
|
||||
note.n_type = 0;
|
||||
memcpy(buf, ¬e, sizeof(note));
|
||||
}
|
||||
|
||||
static u32 *fadump_regs_to_elf_notes(u32 *buf, struct pt_regs *regs)
|
||||
{
|
||||
struct elf_prstatus prstatus;
|
||||
|
||||
memset(&prstatus, 0, sizeof(prstatus));
|
||||
/*
|
||||
* FIXME: How do i get PID? Do I really need it?
|
||||
* prstatus.pr_pid = ????
|
||||
*/
|
||||
elf_core_copy_kernel_regs(&prstatus.pr_reg, regs);
|
||||
buf = fadump_append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS,
|
||||
&prstatus, sizeof(prstatus));
|
||||
return buf;
|
||||
}
|
||||
|
||||
static void fadump_update_elfcore_header(char *bufp)
|
||||
{
|
||||
struct elfhdr *elf;
|
||||
struct elf_phdr *phdr;
|
||||
|
||||
elf = (struct elfhdr *)bufp;
|
||||
bufp += sizeof(struct elfhdr);
|
||||
|
||||
/* First note is a place holder for cpu notes info. */
|
||||
phdr = (struct elf_phdr *)bufp;
|
||||
|
||||
if (phdr->p_type == PT_NOTE) {
|
||||
phdr->p_paddr = fw_dump.cpu_notes_buf;
|
||||
phdr->p_offset = phdr->p_paddr;
|
||||
phdr->p_filesz = fw_dump.cpu_notes_buf_size;
|
||||
phdr->p_memsz = fw_dump.cpu_notes_buf_size;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
static void *fadump_cpu_notes_buf_alloc(unsigned long size)
|
||||
{
|
||||
void *vaddr;
|
||||
struct page *page;
|
||||
unsigned long order, count, i;
|
||||
|
||||
order = get_order(size);
|
||||
vaddr = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
|
||||
if (!vaddr)
|
||||
return NULL;
|
||||
|
||||
count = 1 << order;
|
||||
page = virt_to_page(vaddr);
|
||||
for (i = 0; i < count; i++)
|
||||
SetPageReserved(page + i);
|
||||
return vaddr;
|
||||
}
|
||||
|
||||
static void fadump_cpu_notes_buf_free(unsigned long vaddr, unsigned long size)
|
||||
{
|
||||
struct page *page;
|
||||
unsigned long order, count, i;
|
||||
|
||||
order = get_order(size);
|
||||
count = 1 << order;
|
||||
page = virt_to_page(vaddr);
|
||||
for (i = 0; i < count; i++)
|
||||
ClearPageReserved(page + i);
|
||||
__free_pages(page, order);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read CPU state dump data and convert it into ELF notes.
|
||||
* The CPU dump starts with magic number "REGSAVE". NumCpusOffset should be
|
||||
* used to access the data to allow for additional fields to be added without
|
||||
* affecting compatibility. Each list of registers for a CPU starts with
|
||||
* "CPUSTRT" and ends with "CPUEND". Each register entry is of 16 bytes,
|
||||
* 8 Byte ASCII identifier and 8 Byte register value. The register entry
|
||||
* with identifier "CPUSTRT" and "CPUEND" contains 4 byte cpu id as part
|
||||
* of register value. For more details refer to PAPR document.
|
||||
*
|
||||
* Only for the crashing cpu we ignore the CPU dump data and get exact
|
||||
* state from fadump crash info structure populated by first kernel at the
|
||||
* time of crash.
|
||||
*/
|
||||
static int __init fadump_build_cpu_notes(const struct fadump_mem_struct *fdm)
|
||||
{
|
||||
struct fadump_reg_save_area_header *reg_header;
|
||||
struct fadump_reg_entry *reg_entry;
|
||||
struct fadump_crash_info_header *fdh = NULL;
|
||||
void *vaddr;
|
||||
unsigned long addr;
|
||||
u32 num_cpus, *note_buf;
|
||||
struct pt_regs regs;
|
||||
int i, rc = 0, cpu = 0;
|
||||
|
||||
if (!fdm->cpu_state_data.bytes_dumped)
|
||||
return -EINVAL;
|
||||
|
||||
addr = fdm->cpu_state_data.destination_address;
|
||||
vaddr = __va(addr);
|
||||
|
||||
reg_header = vaddr;
|
||||
if (reg_header->magic_number != REGSAVE_AREA_MAGIC) {
|
||||
printk(KERN_ERR "Unable to read register save area.\n");
|
||||
return -ENOENT;
|
||||
}
|
||||
pr_debug("--------CPU State Data------------\n");
|
||||
pr_debug("Magic Number: %llx\n", reg_header->magic_number);
|
||||
pr_debug("NumCpuOffset: %x\n", reg_header->num_cpu_offset);
|
||||
|
||||
vaddr += reg_header->num_cpu_offset;
|
||||
num_cpus = *((u32 *)(vaddr));
|
||||
pr_debug("NumCpus : %u\n", num_cpus);
|
||||
vaddr += sizeof(u32);
|
||||
reg_entry = (struct fadump_reg_entry *)vaddr;
|
||||
|
||||
/* Allocate buffer to hold cpu crash notes. */
|
||||
fw_dump.cpu_notes_buf_size = num_cpus * sizeof(note_buf_t);
|
||||
fw_dump.cpu_notes_buf_size = PAGE_ALIGN(fw_dump.cpu_notes_buf_size);
|
||||
note_buf = fadump_cpu_notes_buf_alloc(fw_dump.cpu_notes_buf_size);
|
||||
if (!note_buf) {
|
||||
printk(KERN_ERR "Failed to allocate 0x%lx bytes for "
|
||||
"cpu notes buffer\n", fw_dump.cpu_notes_buf_size);
|
||||
return -ENOMEM;
|
||||
}
|
||||
fw_dump.cpu_notes_buf = __pa(note_buf);
|
||||
|
||||
pr_debug("Allocated buffer for cpu notes of size %ld at %p\n",
|
||||
(num_cpus * sizeof(note_buf_t)), note_buf);
|
||||
|
||||
if (fw_dump.fadumphdr_addr)
|
||||
fdh = __va(fw_dump.fadumphdr_addr);
|
||||
|
||||
for (i = 0; i < num_cpus; i++) {
|
||||
if (reg_entry->reg_id != REG_ID("CPUSTRT")) {
|
||||
printk(KERN_ERR "Unable to read CPU state data\n");
|
||||
rc = -ENOENT;
|
||||
goto error_out;
|
||||
}
|
||||
/* Lower 4 bytes of reg_value contains logical cpu id */
|
||||
cpu = reg_entry->reg_value & FADUMP_CPU_ID_MASK;
|
||||
if (!cpumask_test_cpu(cpu, &fdh->cpu_online_mask)) {
|
||||
SKIP_TO_NEXT_CPU(reg_entry);
|
||||
continue;
|
||||
}
|
||||
pr_debug("Reading register data for cpu %d...\n", cpu);
|
||||
if (fdh && fdh->crashing_cpu == cpu) {
|
||||
regs = fdh->regs;
|
||||
note_buf = fadump_regs_to_elf_notes(note_buf, ®s);
|
||||
SKIP_TO_NEXT_CPU(reg_entry);
|
||||
} else {
|
||||
reg_entry++;
|
||||
reg_entry = fadump_read_registers(reg_entry, ®s);
|
||||
note_buf = fadump_regs_to_elf_notes(note_buf, ®s);
|
||||
}
|
||||
}
|
||||
fadump_final_note(note_buf);
|
||||
|
||||
pr_debug("Updating elfcore header (%llx) with cpu notes\n",
|
||||
fdh->elfcorehdr_addr);
|
||||
fadump_update_elfcore_header((char *)__va(fdh->elfcorehdr_addr));
|
||||
return 0;
|
||||
|
||||
error_out:
|
||||
fadump_cpu_notes_buf_free((unsigned long)__va(fw_dump.cpu_notes_buf),
|
||||
fw_dump.cpu_notes_buf_size);
|
||||
fw_dump.cpu_notes_buf = 0;
|
||||
fw_dump.cpu_notes_buf_size = 0;
|
||||
return rc;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Validate and process the dump data stored by firmware before exporting
|
||||
* it through '/proc/vmcore'.
|
||||
@ -400,18 +680,21 @@ static void register_fw_dump(struct fadump_mem_struct *fdm)
|
||||
static int __init process_fadump(const struct fadump_mem_struct *fdm_active)
|
||||
{
|
||||
struct fadump_crash_info_header *fdh;
|
||||
int rc = 0;
|
||||
|
||||
if (!fdm_active || !fw_dump.fadumphdr_addr)
|
||||
return -EINVAL;
|
||||
|
||||
/* Check if the dump data is valid. */
|
||||
if ((fdm_active->header.dump_status_flag == FADUMP_ERROR_FLAG) ||
|
||||
(fdm_active->cpu_state_data.error_flags != 0) ||
|
||||
(fdm_active->rmr_region.error_flags != 0)) {
|
||||
printk(KERN_ERR "Dump taken by platform is not valid\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (fdm_active->rmr_region.bytes_dumped !=
|
||||
fdm_active->rmr_region.source_len) {
|
||||
if ((fdm_active->rmr_region.bytes_dumped !=
|
||||
fdm_active->rmr_region.source_len) ||
|
||||
!fdm_active->cpu_state_data.bytes_dumped) {
|
||||
printk(KERN_ERR "Dump taken by platform is incomplete\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
@ -423,6 +706,10 @@ static int __init process_fadump(const struct fadump_mem_struct *fdm_active)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
rc = fadump_build_cpu_notes(fdm_active);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
/*
|
||||
* We are done validating dump info and elfcore header is now ready
|
||||
* to be exported. set elfcorehdr_addr so that vmcore module will
|
||||
@ -537,6 +824,27 @@ static int fadump_create_elfcore_headers(char *bufp)
|
||||
elf = (struct elfhdr *)bufp;
|
||||
bufp += sizeof(struct elfhdr);
|
||||
|
||||
/*
|
||||
* setup ELF PT_NOTE, place holder for cpu notes info. The notes info
|
||||
* will be populated during second kernel boot after crash. Hence
|
||||
* this PT_NOTE will always be the first elf note.
|
||||
*
|
||||
* NOTE: Any new ELF note addition should be placed after this note.
|
||||
*/
|
||||
phdr = (struct elf_phdr *)bufp;
|
||||
bufp += sizeof(struct elf_phdr);
|
||||
phdr->p_type = PT_NOTE;
|
||||
phdr->p_flags = 0;
|
||||
phdr->p_vaddr = 0;
|
||||
phdr->p_align = 0;
|
||||
|
||||
phdr->p_offset = 0;
|
||||
phdr->p_paddr = 0;
|
||||
phdr->p_filesz = 0;
|
||||
phdr->p_memsz = 0;
|
||||
|
||||
(elf->e_phnum)++;
|
||||
|
||||
/* setup PT_LOAD sections. */
|
||||
|
||||
for (i = 0; i < crash_mem_ranges; i++) {
|
||||
@ -588,6 +896,8 @@ static unsigned long init_fadump_header(unsigned long addr)
|
||||
memset(fdh, 0, sizeof(struct fadump_crash_info_header));
|
||||
fdh->magic_number = FADUMP_CRASH_INFO_MAGIC;
|
||||
fdh->elfcorehdr_addr = addr;
|
||||
/* We will set the crashing cpu id in crash_fadump() during crash. */
|
||||
fdh->crashing_cpu = CPU_UNKNOWN;
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
@ -61,6 +61,7 @@
|
||||
#include <asm/xmon.h>
|
||||
#include <asm/cputhreads.h>
|
||||
#include <mm/mmu_decl.h>
|
||||
#include <asm/fadump.h>
|
||||
|
||||
#include "setup.h"
|
||||
|
||||
@ -639,6 +640,11 @@ EXPORT_SYMBOL(check_legacy_ioport);
|
||||
static int ppc_panic_event(struct notifier_block *this,
|
||||
unsigned long event, void *ptr)
|
||||
{
|
||||
/*
|
||||
* If firmware-assisted dump has been registered then trigger
|
||||
* firmware-assisted dump and let firmware handle everything else.
|
||||
*/
|
||||
crash_fadump(NULL, ptr);
|
||||
ppc_md.panic(ptr); /* May not return */
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
@ -57,6 +57,7 @@
|
||||
#include <asm/kexec.h>
|
||||
#include <asm/ppc-opcode.h>
|
||||
#include <asm/rio.h>
|
||||
#include <asm/fadump.h>
|
||||
|
||||
#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
|
||||
int (*__debugger)(struct pt_regs *regs) __read_mostly;
|
||||
@ -145,6 +146,8 @@ static void __kprobes oops_end(unsigned long flags, struct pt_regs *regs,
|
||||
arch_spin_unlock(&die_lock);
|
||||
raw_local_irq_restore(flags);
|
||||
|
||||
crash_fadump(regs, "die oops");
|
||||
|
||||
/*
|
||||
* A system reset (0x100) is a request to dump, so we always send
|
||||
* it through the crashdump code.
|
||||
|
Loading…
Reference in New Issue
Block a user