linux/arch/powerpc/kernel/machine_kexec_file_64.c

321 lines
8.8 KiB
C
Raw Normal View History

/*
* ppc64 code to implement the kexec_file_load syscall
*
* Copyright (C) 2004 Adam Litke (agl@us.ibm.com)
* Copyright (C) 2004 IBM Corp.
* Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation
* Copyright (C) 2005 R Sharada (sharada@in.ibm.com)
* Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com)
* Copyright (C) 2016 IBM Corporation
*
* Based on kexec-tools' kexec-elf-ppc64.c, fs2dt.c.
* Heavily modified for the kernel by
* Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation (version 2 of the License).
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include <linux/slab.h>
#include <linux/kexec.h>
#include <linux/memblock.h>
#include <linux/of_fdt.h>
#include <linux/libfdt.h>
powerpc: ima: get the kexec buffer passed by the previous kernel Patch series "ima: carry the measurement list across kexec", v8. The TPM PCRs are only reset on a hard reboot. In order to validate a TPM's quote after a soft reboot (eg. kexec -e), the IMA measurement list of the running kernel must be saved and then restored on the subsequent boot, possibly of a different architecture. The existing securityfs binary_runtime_measurements file conveniently provides a serialized format of the IMA measurement list. This patch set serializes the measurement list in this format and restores it. Up to now, the binary_runtime_measurements was defined as architecture native format. The assumption being that userspace could and would handle any architecture conversions. With the ability of carrying the measurement list across kexec, possibly from one architecture to a different one, the per boot architecture information is lost and with it the ability of recalculating the template digest hash. To resolve this problem, without breaking the existing ABI, this patch set introduces the boot command line option "ima_canonical_fmt", which is arbitrarily defined as little endian. The need for this boot command line option will be limited to the existing version 1 format of the binary_runtime_measurements. Subsequent formats will be defined as canonical format (eg. TPM 2.0 support for larger digests). A simplified method of Thiago Bauermann's "kexec buffer handover" patch series for carrying the IMA measurement list across kexec is included in this patch set. The simplified method requires all file measurements be taken prior to executing the kexec load, as subsequent measurements will not be carried across the kexec and restored. This patch (of 10): The IMA kexec buffer allows the currently running kernel to pass the measurement list via a kexec segment to the kernel that will be kexec'd. The second kernel can check whether the previous kernel sent the buffer and retrieve it. This is the architecture-specific part which enables IMA to receive the measurement list passed by the previous kernel. It will be used in the next patch. The change in machine_kexec_64.c is to factor out the logic of removing an FDT memory reservation so that it can be used by remove_ima_buffer. Link: http://lkml.kernel.org/r/1480554346-29071-2-git-send-email-zohar@linux.vnet.ibm.com Signed-off-by: Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com> Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com> Acked-by: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Andreas Steffen <andreas.steffen@strongswan.org> Cc: Dmitry Kasatkin <dmitry.kasatkin@gmail.com> Cc: Josh Sklar <sklar@linux.vnet.ibm.com> Cc: Dave Young <dyoung@redhat.com> Cc: Vivek Goyal <vgoyal@redhat.com> Cc: Baoquan He <bhe@redhat.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Stewart Smith <stewart@linux.vnet.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-12-20 08:22:32 +08:00
#include <asm/ima.h>
#define SLAVE_CODE_SIZE 256
const struct kexec_file_ops * const kexec_file_loaders[] = {
&kexec_elf64_ops,
NULL
};
int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
unsigned long buf_len)
{
/* We don't support crash kernels yet. */
if (image->type == KEXEC_TYPE_CRASH)
return -EOPNOTSUPP;
return kexec_image_probe_default(image, buf, buf_len);
}
/**
* arch_kexec_walk_mem - call func(data) for each unreserved memory block
* @kbuf: Context info for the search. Also passed to @func.
* @func: Function to call for each memory block.
*
* This function is used by kexec_add_buffer and kexec_locate_mem_hole
* to find unreserved memory to load kexec segments into.
*
* Return: The memory walk will stop when func returns a non-zero value
* and that value will be returned. If all free regions are visited without
* func returning non-zero, then zero will be returned.
*/
int arch_kexec_walk_mem(struct kexec_buf *kbuf,
int (*func)(struct resource *, void *))
{
int ret = 0;
u64 i;
phys_addr_t mstart, mend;
struct resource res = { };
if (kbuf->top_down) {
for_each_free_mem_range_reverse(i, NUMA_NO_NODE, 0,
&mstart, &mend, NULL) {
/*
* In memblock, end points to the first byte after the
* range while in kexec, end points to the last byte
* in the range.
*/
res.start = mstart;
res.end = mend - 1;
ret = func(&res, kbuf);
if (ret)
break;
}
} else {
for_each_free_mem_range(i, NUMA_NO_NODE, 0, &mstart, &mend,
NULL) {
/*
* In memblock, end points to the first byte after the
* range while in kexec, end points to the last byte
* in the range.
*/
res.start = mstart;
res.end = mend - 1;
ret = func(&res, kbuf);
if (ret)
break;
}
}
return ret;
}
/**
* setup_purgatory - initialize the purgatory's global variables
* @image: kexec image.
* @slave_code: Slave code for the purgatory.
* @fdt: Flattened device tree for the next kernel.
* @kernel_load_addr: Address where the kernel is loaded.
* @fdt_load_addr: Address where the flattened device tree is loaded.
*
* Return: 0 on success, or negative errno on error.
*/
int setup_purgatory(struct kimage *image, const void *slave_code,
const void *fdt, unsigned long kernel_load_addr,
unsigned long fdt_load_addr)
{
unsigned int *slave_code_buf, master_entry;
int ret;
slave_code_buf = kmalloc(SLAVE_CODE_SIZE, GFP_KERNEL);
if (!slave_code_buf)
return -ENOMEM;
/* Get the slave code from the new kernel and put it in purgatory. */
ret = kexec_purgatory_get_set_symbol(image, "purgatory_start",
slave_code_buf, SLAVE_CODE_SIZE,
true);
if (ret) {
kfree(slave_code_buf);
return ret;
}
master_entry = slave_code_buf[0];
memcpy(slave_code_buf, slave_code, SLAVE_CODE_SIZE);
slave_code_buf[0] = master_entry;
ret = kexec_purgatory_get_set_symbol(image, "purgatory_start",
slave_code_buf, SLAVE_CODE_SIZE,
false);
kfree(slave_code_buf);
ret = kexec_purgatory_get_set_symbol(image, "kernel", &kernel_load_addr,
sizeof(kernel_load_addr), false);
if (ret)
return ret;
ret = kexec_purgatory_get_set_symbol(image, "dt_offset", &fdt_load_addr,
sizeof(fdt_load_addr), false);
if (ret)
return ret;
return 0;
}
/**
* delete_fdt_mem_rsv - delete memory reservation with given address and size
*
* Return: 0 on success, or negative errno on error.
*/
powerpc: ima: get the kexec buffer passed by the previous kernel Patch series "ima: carry the measurement list across kexec", v8. The TPM PCRs are only reset on a hard reboot. In order to validate a TPM's quote after a soft reboot (eg. kexec -e), the IMA measurement list of the running kernel must be saved and then restored on the subsequent boot, possibly of a different architecture. The existing securityfs binary_runtime_measurements file conveniently provides a serialized format of the IMA measurement list. This patch set serializes the measurement list in this format and restores it. Up to now, the binary_runtime_measurements was defined as architecture native format. The assumption being that userspace could and would handle any architecture conversions. With the ability of carrying the measurement list across kexec, possibly from one architecture to a different one, the per boot architecture information is lost and with it the ability of recalculating the template digest hash. To resolve this problem, without breaking the existing ABI, this patch set introduces the boot command line option "ima_canonical_fmt", which is arbitrarily defined as little endian. The need for this boot command line option will be limited to the existing version 1 format of the binary_runtime_measurements. Subsequent formats will be defined as canonical format (eg. TPM 2.0 support for larger digests). A simplified method of Thiago Bauermann's "kexec buffer handover" patch series for carrying the IMA measurement list across kexec is included in this patch set. The simplified method requires all file measurements be taken prior to executing the kexec load, as subsequent measurements will not be carried across the kexec and restored. This patch (of 10): The IMA kexec buffer allows the currently running kernel to pass the measurement list via a kexec segment to the kernel that will be kexec'd. The second kernel can check whether the previous kernel sent the buffer and retrieve it. This is the architecture-specific part which enables IMA to receive the measurement list passed by the previous kernel. It will be used in the next patch. The change in machine_kexec_64.c is to factor out the logic of removing an FDT memory reservation so that it can be used by remove_ima_buffer. Link: http://lkml.kernel.org/r/1480554346-29071-2-git-send-email-zohar@linux.vnet.ibm.com Signed-off-by: Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com> Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com> Acked-by: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Andreas Steffen <andreas.steffen@strongswan.org> Cc: Dmitry Kasatkin <dmitry.kasatkin@gmail.com> Cc: Josh Sklar <sklar@linux.vnet.ibm.com> Cc: Dave Young <dyoung@redhat.com> Cc: Vivek Goyal <vgoyal@redhat.com> Cc: Baoquan He <bhe@redhat.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Stewart Smith <stewart@linux.vnet.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-12-20 08:22:32 +08:00
int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size)
{
int i, ret, num_rsvs = fdt_num_mem_rsv(fdt);
for (i = 0; i < num_rsvs; i++) {
uint64_t rsv_start, rsv_size;
ret = fdt_get_mem_rsv(fdt, i, &rsv_start, &rsv_size);
if (ret) {
pr_err("Malformed device tree.\n");
return -EINVAL;
}
if (rsv_start == start && rsv_size == size) {
ret = fdt_del_mem_rsv(fdt, i);
if (ret) {
pr_err("Error deleting device tree reservation.\n");
return -EINVAL;
}
return 0;
}
}
return -ENOENT;
}
/*
* setup_new_fdt - modify /chosen and memory reservation for the next kernel
* @image: kexec image being loaded.
* @fdt: Flattened device tree for the next kernel.
* @initrd_load_addr: Address where the next initrd will be loaded.
* @initrd_len: Size of the next initrd, or 0 if there will be none.
* @cmdline: Command line for the next kernel, or NULL if there will
* be none.
*
* Return: 0 on success, or negative errno on error.
*/
int setup_new_fdt(const struct kimage *image, void *fdt,
unsigned long initrd_load_addr, unsigned long initrd_len,
const char *cmdline)
{
int ret, chosen_node;
const void *prop;
/* Remove memory reservation for the current device tree. */
ret = delete_fdt_mem_rsv(fdt, __pa(initial_boot_params),
fdt_totalsize(initial_boot_params));
if (ret == 0)
pr_debug("Removed old device tree reservation.\n");
else if (ret != -ENOENT)
return ret;
chosen_node = fdt_path_offset(fdt, "/chosen");
if (chosen_node == -FDT_ERR_NOTFOUND) {
chosen_node = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"),
"chosen");
if (chosen_node < 0) {
pr_err("Error creating /chosen.\n");
return -EINVAL;
}
} else if (chosen_node < 0) {
pr_err("Malformed device tree: error reading /chosen.\n");
return -EINVAL;
}
/* Did we boot using an initrd? */
prop = fdt_getprop(fdt, chosen_node, "linux,initrd-start", NULL);
if (prop) {
uint64_t tmp_start, tmp_end, tmp_size;
tmp_start = fdt64_to_cpu(*((const fdt64_t *) prop));
prop = fdt_getprop(fdt, chosen_node, "linux,initrd-end", NULL);
if (!prop) {
pr_err("Malformed device tree.\n");
return -EINVAL;
}
tmp_end = fdt64_to_cpu(*((const fdt64_t *) prop));
/*
* kexec reserves exact initrd size, while firmware may
* reserve a multiple of PAGE_SIZE, so check for both.
*/
tmp_size = tmp_end - tmp_start;
ret = delete_fdt_mem_rsv(fdt, tmp_start, tmp_size);
if (ret == -ENOENT)
ret = delete_fdt_mem_rsv(fdt, tmp_start,
round_up(tmp_size, PAGE_SIZE));
if (ret == 0)
pr_debug("Removed old initrd reservation.\n");
else if (ret != -ENOENT)
return ret;
/* If there's no new initrd, delete the old initrd's info. */
if (initrd_len == 0) {
ret = fdt_delprop(fdt, chosen_node,
"linux,initrd-start");
if (ret) {
pr_err("Error deleting linux,initrd-start.\n");
return -EINVAL;
}
ret = fdt_delprop(fdt, chosen_node, "linux,initrd-end");
if (ret) {
pr_err("Error deleting linux,initrd-end.\n");
return -EINVAL;
}
}
}
if (initrd_len) {
ret = fdt_setprop_u64(fdt, chosen_node,
"linux,initrd-start",
initrd_load_addr);
if (ret < 0) {
pr_err("Error setting up the new device tree.\n");
return -EINVAL;
}
/* initrd-end is the first address after the initrd image. */
ret = fdt_setprop_u64(fdt, chosen_node, "linux,initrd-end",
initrd_load_addr + initrd_len);
if (ret < 0) {
pr_err("Error setting up the new device tree.\n");
return -EINVAL;
}
ret = fdt_add_mem_rsv(fdt, initrd_load_addr, initrd_len);
if (ret) {
pr_err("Error reserving initrd memory: %s\n",
fdt_strerror(ret));
return -EINVAL;
}
}
if (cmdline != NULL) {
ret = fdt_setprop_string(fdt, chosen_node, "bootargs", cmdline);
if (ret < 0) {
pr_err("Error setting up the new device tree.\n");
return -EINVAL;
}
} else {
ret = fdt_delprop(fdt, chosen_node, "bootargs");
if (ret && ret != -FDT_ERR_NOTFOUND) {
pr_err("Error deleting bootargs.\n");
return -EINVAL;
}
}
ret = setup_ima_buffer(image, fdt, chosen_node);
if (ret) {
pr_err("Error setting up the new device tree.\n");
return ret;
}
powerpc: ima: get the kexec buffer passed by the previous kernel Patch series "ima: carry the measurement list across kexec", v8. The TPM PCRs are only reset on a hard reboot. In order to validate a TPM's quote after a soft reboot (eg. kexec -e), the IMA measurement list of the running kernel must be saved and then restored on the subsequent boot, possibly of a different architecture. The existing securityfs binary_runtime_measurements file conveniently provides a serialized format of the IMA measurement list. This patch set serializes the measurement list in this format and restores it. Up to now, the binary_runtime_measurements was defined as architecture native format. The assumption being that userspace could and would handle any architecture conversions. With the ability of carrying the measurement list across kexec, possibly from one architecture to a different one, the per boot architecture information is lost and with it the ability of recalculating the template digest hash. To resolve this problem, without breaking the existing ABI, this patch set introduces the boot command line option "ima_canonical_fmt", which is arbitrarily defined as little endian. The need for this boot command line option will be limited to the existing version 1 format of the binary_runtime_measurements. Subsequent formats will be defined as canonical format (eg. TPM 2.0 support for larger digests). A simplified method of Thiago Bauermann's "kexec buffer handover" patch series for carrying the IMA measurement list across kexec is included in this patch set. The simplified method requires all file measurements be taken prior to executing the kexec load, as subsequent measurements will not be carried across the kexec and restored. This patch (of 10): The IMA kexec buffer allows the currently running kernel to pass the measurement list via a kexec segment to the kernel that will be kexec'd. The second kernel can check whether the previous kernel sent the buffer and retrieve it. This is the architecture-specific part which enables IMA to receive the measurement list passed by the previous kernel. It will be used in the next patch. The change in machine_kexec_64.c is to factor out the logic of removing an FDT memory reservation so that it can be used by remove_ima_buffer. Link: http://lkml.kernel.org/r/1480554346-29071-2-git-send-email-zohar@linux.vnet.ibm.com Signed-off-by: Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com> Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com> Acked-by: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Andreas Steffen <andreas.steffen@strongswan.org> Cc: Dmitry Kasatkin <dmitry.kasatkin@gmail.com> Cc: Josh Sklar <sklar@linux.vnet.ibm.com> Cc: Dave Young <dyoung@redhat.com> Cc: Vivek Goyal <vgoyal@redhat.com> Cc: Baoquan He <bhe@redhat.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Stewart Smith <stewart@linux.vnet.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2016-12-20 08:22:32 +08:00
ret = fdt_setprop(fdt, chosen_node, "linux,booted-from-kexec", NULL, 0);
if (ret) {
pr_err("Error setting up the new device tree.\n");
return -EINVAL;
}
return 0;
}