linux/arch/x86/kernel/cpu/sgx/ioctl.c
Lorenzo Stoakes 54d020692b mm/gup: remove unused vmas parameter from get_user_pages()
Patch series "remove the vmas parameter from GUP APIs", v6.

(pin_/get)_user_pages[_remote]() each provide an optional output parameter
for an array of VMA objects associated with each page in the input range.

These provide the means for VMAs to be returned, as long as mm->mmap_lock
is never released during the GUP operation (i.e.  the internal flag
FOLL_UNLOCKABLE is not specified).

In addition, these VMAs can only be accessed with the mmap_lock held and
become invalidated the moment it is released.

The vast majority of invocations do not use this functionality and of
those that do, all but one case retrieve a single VMA to perform checks
upon.

It is not egregious in the single VMA cases to simply replace the
operation with a vma_lookup().  In these cases we duplicate the (fast)
lookup on a slow path already under the mmap_lock, abstracted to a new
get_user_page_vma_remote() inline helper function which also performs
error checking and reference count maintenance.

The special case is io_uring, where io_pin_pages() specifically needs to
assert that the VMAs underlying the range do not result in broken
long-term GUP file-backed mappings.

As GUP now internally asserts that FOLL_LONGTERM mappings are not
file-backed in a broken fashion (i.e.  requiring dirty tracking) - as
implemented in "mm/gup: disallow FOLL_LONGTERM GUP-nonfast writing to
file-backed mappings" - this logic is no longer required and so we can
simply remove it altogether from io_uring.

Eliminating the vmas parameter eliminates an entire class of danging
pointer errors that might have occured should the lock have been
incorrectly released.

In addition, the API is simplified and now clearly expresses what it is
intended for - applying the specified GUP flags and (if pinning) returning
pinned pages.

This change additionally opens the door to further potential improvements
in GUP and the possible marrying of disparate code paths.

I have run this series against gup_test with no issues.

Thanks to Matthew Wilcox for suggesting this refactoring!


This patch (of 6):

No invocation of get_user_pages() use the vmas parameter, so remove it.

The GUP API is confusing and caveated.  Recent changes have done much to
improve that, however there is more we can do.  Exporting vmas is a prime
target as the caller has to be extremely careful to preclude their use
after the mmap_lock has expired or otherwise be left with dangling
pointers.

Removing the vmas parameter focuses the GUP functions upon their primary
purpose - pinning (and outputting) pages as well as performing the actions
implied by the input flags.

This is part of a patch series aiming to remove the vmas parameter
altogether.

Link: https://lkml.kernel.org/r/cover.1684350871.git.lstoakes@gmail.com
Link: https://lkml.kernel.org/r/589e0c64794668ffc799651e8d85e703262b1e9d.1684350871.git.lstoakes@gmail.com
Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
Suggested-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Acked-by: Christian König <christian.koenig@amd.com> (for radeon parts)
Acked-by: Jarkko Sakkinen <jarkko@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Sean Christopherson <seanjc@google.com> (KVM)
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
Cc: Janosch Frank <frankja@linux.ibm.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2023-06-09 16:25:25 -07:00

1264 lines
32 KiB
C

// SPDX-License-Identifier: GPL-2.0
/* Copyright(c) 2016-20 Intel Corporation. */
#include <asm/mman.h>
#include <asm/sgx.h>
#include <linux/mman.h>
#include <linux/delay.h>
#include <linux/file.h>
#include <linux/hashtable.h>
#include <linux/highmem.h>
#include <linux/ratelimit.h>
#include <linux/sched/signal.h>
#include <linux/shmem_fs.h>
#include <linux/slab.h>
#include <linux/suspend.h>
#include "driver.h"
#include "encl.h"
#include "encls.h"
struct sgx_va_page *sgx_encl_grow(struct sgx_encl *encl, bool reclaim)
{
struct sgx_va_page *va_page = NULL;
void *err;
BUILD_BUG_ON(SGX_VA_SLOT_COUNT !=
(SGX_ENCL_PAGE_VA_OFFSET_MASK >> 3) + 1);
if (!(encl->page_cnt % SGX_VA_SLOT_COUNT)) {
va_page = kzalloc(sizeof(*va_page), GFP_KERNEL);
if (!va_page)
return ERR_PTR(-ENOMEM);
va_page->epc_page = sgx_alloc_va_page(reclaim);
if (IS_ERR(va_page->epc_page)) {
err = ERR_CAST(va_page->epc_page);
kfree(va_page);
return err;
}
WARN_ON_ONCE(encl->page_cnt % SGX_VA_SLOT_COUNT);
}
encl->page_cnt++;
return va_page;
}
void sgx_encl_shrink(struct sgx_encl *encl, struct sgx_va_page *va_page)
{
encl->page_cnt--;
if (va_page) {
sgx_encl_free_epc_page(va_page->epc_page);
list_del(&va_page->list);
kfree(va_page);
}
}
static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs)
{
struct sgx_epc_page *secs_epc;
struct sgx_va_page *va_page;
struct sgx_pageinfo pginfo;
struct sgx_secinfo secinfo;
unsigned long encl_size;
struct file *backing;
long ret;
va_page = sgx_encl_grow(encl, true);
if (IS_ERR(va_page))
return PTR_ERR(va_page);
else if (va_page)
list_add(&va_page->list, &encl->va_pages);
/* else the tail page of the VA page list had free slots. */
/* The extra page goes to SECS. */
encl_size = secs->size + PAGE_SIZE;
backing = shmem_file_setup("SGX backing", encl_size + (encl_size >> 5),
VM_NORESERVE);
if (IS_ERR(backing)) {
ret = PTR_ERR(backing);
goto err_out_shrink;
}
encl->backing = backing;
secs_epc = sgx_alloc_epc_page(&encl->secs, true);
if (IS_ERR(secs_epc)) {
ret = PTR_ERR(secs_epc);
goto err_out_backing;
}
encl->secs.epc_page = secs_epc;
pginfo.addr = 0;
pginfo.contents = (unsigned long)secs;
pginfo.metadata = (unsigned long)&secinfo;
pginfo.secs = 0;
memset(&secinfo, 0, sizeof(secinfo));
ret = __ecreate((void *)&pginfo, sgx_get_epc_virt_addr(secs_epc));
if (ret) {
ret = -EIO;
goto err_out;
}
if (secs->attributes & SGX_ATTR_DEBUG)
set_bit(SGX_ENCL_DEBUG, &encl->flags);
encl->secs.encl = encl;
encl->secs.type = SGX_PAGE_TYPE_SECS;
encl->base = secs->base;
encl->size = secs->size;
encl->attributes = secs->attributes;
encl->attributes_mask = SGX_ATTR_UNPRIV_MASK;
/* Set only after completion, as encl->lock has not been taken. */
set_bit(SGX_ENCL_CREATED, &encl->flags);
return 0;
err_out:
sgx_encl_free_epc_page(encl->secs.epc_page);
encl->secs.epc_page = NULL;
err_out_backing:
fput(encl->backing);
encl->backing = NULL;
err_out_shrink:
sgx_encl_shrink(encl, va_page);
return ret;
}
/**
* sgx_ioc_enclave_create() - handler for %SGX_IOC_ENCLAVE_CREATE
* @encl: An enclave pointer.
* @arg: The ioctl argument.
*
* Allocate kernel data structures for the enclave and invoke ECREATE.
*
* Return:
* - 0: Success.
* - -EIO: ECREATE failed.
* - -errno: POSIX error.
*/
static long sgx_ioc_enclave_create(struct sgx_encl *encl, void __user *arg)
{
struct sgx_enclave_create create_arg;
void *secs;
int ret;
if (test_bit(SGX_ENCL_CREATED, &encl->flags))
return -EINVAL;
if (copy_from_user(&create_arg, arg, sizeof(create_arg)))
return -EFAULT;
secs = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!secs)
return -ENOMEM;
if (copy_from_user(secs, (void __user *)create_arg.src, PAGE_SIZE))
ret = -EFAULT;
else
ret = sgx_encl_create(encl, secs);
kfree(secs);
return ret;
}
static int sgx_validate_secinfo(struct sgx_secinfo *secinfo)
{
u64 perm = secinfo->flags & SGX_SECINFO_PERMISSION_MASK;
u64 pt = secinfo->flags & SGX_SECINFO_PAGE_TYPE_MASK;
if (pt != SGX_SECINFO_REG && pt != SGX_SECINFO_TCS)
return -EINVAL;
if ((perm & SGX_SECINFO_W) && !(perm & SGX_SECINFO_R))
return -EINVAL;
/*
* CPU will silently overwrite the permissions as zero, which means
* that we need to validate it ourselves.
*/
if (pt == SGX_SECINFO_TCS && perm)
return -EINVAL;
if (secinfo->flags & SGX_SECINFO_RESERVED_MASK)
return -EINVAL;
if (memchr_inv(secinfo->reserved, 0, sizeof(secinfo->reserved)))
return -EINVAL;
return 0;
}
static int __sgx_encl_add_page(struct sgx_encl *encl,
struct sgx_encl_page *encl_page,
struct sgx_epc_page *epc_page,
struct sgx_secinfo *secinfo, unsigned long src)
{
struct sgx_pageinfo pginfo;
struct vm_area_struct *vma;
struct page *src_page;
int ret;
/* Deny noexec. */
vma = find_vma(current->mm, src);
if (!vma)
return -EFAULT;
if (!(vma->vm_flags & VM_MAYEXEC))
return -EACCES;
ret = get_user_pages(src, 1, 0, &src_page);
if (ret < 1)
return -EFAULT;
pginfo.secs = (unsigned long)sgx_get_epc_virt_addr(encl->secs.epc_page);
pginfo.addr = encl_page->desc & PAGE_MASK;
pginfo.metadata = (unsigned long)secinfo;
pginfo.contents = (unsigned long)kmap_local_page(src_page);
ret = __eadd(&pginfo, sgx_get_epc_virt_addr(epc_page));
kunmap_local((void *)pginfo.contents);
put_page(src_page);
return ret ? -EIO : 0;
}
/*
* If the caller requires measurement of the page as a proof for the content,
* use EEXTEND to add a measurement for 256 bytes of the page. Repeat this
* operation until the entire page is measured."
*/
static int __sgx_encl_extend(struct sgx_encl *encl,
struct sgx_epc_page *epc_page)
{
unsigned long offset;
int ret;
for (offset = 0; offset < PAGE_SIZE; offset += SGX_EEXTEND_BLOCK_SIZE) {
ret = __eextend(sgx_get_epc_virt_addr(encl->secs.epc_page),
sgx_get_epc_virt_addr(epc_page) + offset);
if (ret) {
if (encls_failed(ret))
ENCLS_WARN(ret, "EEXTEND");
return -EIO;
}
}
return 0;
}
static int sgx_encl_add_page(struct sgx_encl *encl, unsigned long src,
unsigned long offset, struct sgx_secinfo *secinfo,
unsigned long flags)
{
struct sgx_encl_page *encl_page;
struct sgx_epc_page *epc_page;
struct sgx_va_page *va_page;
int ret;
encl_page = sgx_encl_page_alloc(encl, offset, secinfo->flags);
if (IS_ERR(encl_page))
return PTR_ERR(encl_page);
epc_page = sgx_alloc_epc_page(encl_page, true);
if (IS_ERR(epc_page)) {
kfree(encl_page);
return PTR_ERR(epc_page);
}
va_page = sgx_encl_grow(encl, true);
if (IS_ERR(va_page)) {
ret = PTR_ERR(va_page);
goto err_out_free;
}
mmap_read_lock(current->mm);
mutex_lock(&encl->lock);
/*
* Adding to encl->va_pages must be done under encl->lock. Ditto for
* deleting (via sgx_encl_shrink()) in the error path.
*/
if (va_page)
list_add(&va_page->list, &encl->va_pages);
/*
* Insert prior to EADD in case of OOM. EADD modifies MRENCLAVE, i.e.
* can't be gracefully unwound, while failure on EADD/EXTEND is limited
* to userspace errors (or kernel/hardware bugs).
*/
ret = xa_insert(&encl->page_array, PFN_DOWN(encl_page->desc),
encl_page, GFP_KERNEL);
if (ret)
goto err_out_unlock;
ret = __sgx_encl_add_page(encl, encl_page, epc_page, secinfo,
src);
if (ret)
goto err_out;
/*
* Complete the "add" before doing the "extend" so that the "add"
* isn't in a half-baked state in the extremely unlikely scenario
* the enclave will be destroyed in response to EEXTEND failure.
*/
encl_page->encl = encl;
encl_page->epc_page = epc_page;
encl_page->type = (secinfo->flags & SGX_SECINFO_PAGE_TYPE_MASK) >> 8;
encl->secs_child_cnt++;
if (flags & SGX_PAGE_MEASURE) {
ret = __sgx_encl_extend(encl, epc_page);
if (ret)
goto err_out;
}
sgx_mark_page_reclaimable(encl_page->epc_page);
mutex_unlock(&encl->lock);
mmap_read_unlock(current->mm);
return ret;
err_out:
xa_erase(&encl->page_array, PFN_DOWN(encl_page->desc));
err_out_unlock:
sgx_encl_shrink(encl, va_page);
mutex_unlock(&encl->lock);
mmap_read_unlock(current->mm);
err_out_free:
sgx_encl_free_epc_page(epc_page);
kfree(encl_page);
return ret;
}
/*
* Ensure user provided offset and length values are valid for
* an enclave.
*/
static int sgx_validate_offset_length(struct sgx_encl *encl,
unsigned long offset,
unsigned long length)
{
if (!IS_ALIGNED(offset, PAGE_SIZE))
return -EINVAL;
if (!length || !IS_ALIGNED(length, PAGE_SIZE))
return -EINVAL;
if (offset + length < offset)
return -EINVAL;
if (offset + length - PAGE_SIZE >= encl->size)
return -EINVAL;
return 0;
}
/**
* sgx_ioc_enclave_add_pages() - The handler for %SGX_IOC_ENCLAVE_ADD_PAGES
* @encl: an enclave pointer
* @arg: a user pointer to a struct sgx_enclave_add_pages instance
*
* Add one or more pages to an uninitialized enclave, and optionally extend the
* measurement with the contents of the page. The SECINFO and measurement mask
* are applied to all pages.
*
* A SECINFO for a TCS is required to always contain zero permissions because
* CPU silently zeros them. Allowing anything else would cause a mismatch in
* the measurement.
*
* mmap()'s protection bits are capped by the page permissions. For each page
* address, the maximum protection bits are computed with the following
* heuristics:
*
* 1. A regular page: PROT_R, PROT_W and PROT_X match the SECINFO permissions.
* 2. A TCS page: PROT_R | PROT_W.
*
* mmap() is not allowed to surpass the minimum of the maximum protection bits
* within the given address range.
*
* The function deinitializes kernel data structures for enclave and returns
* -EIO in any of the following conditions:
*
* - Enclave Page Cache (EPC), the physical memory holding enclaves, has
* been invalidated. This will cause EADD and EEXTEND to fail.
* - If the source address is corrupted somehow when executing EADD.
*
* Return:
* - 0: Success.
* - -EACCES: The source page is located in a noexec partition.
* - -ENOMEM: Out of EPC pages.
* - -EINTR: The call was interrupted before data was processed.
* - -EIO: Either EADD or EEXTEND failed because invalid source address
* or power cycle.
* - -errno: POSIX error.
*/
static long sgx_ioc_enclave_add_pages(struct sgx_encl *encl, void __user *arg)
{
struct sgx_enclave_add_pages add_arg;
struct sgx_secinfo secinfo;
unsigned long c;
int ret;
if (!test_bit(SGX_ENCL_CREATED, &encl->flags) ||
test_bit(SGX_ENCL_INITIALIZED, &encl->flags))
return -EINVAL;
if (copy_from_user(&add_arg, arg, sizeof(add_arg)))
return -EFAULT;
if (!IS_ALIGNED(add_arg.src, PAGE_SIZE))
return -EINVAL;
if (sgx_validate_offset_length(encl, add_arg.offset, add_arg.length))
return -EINVAL;
if (copy_from_user(&secinfo, (void __user *)add_arg.secinfo,
sizeof(secinfo)))
return -EFAULT;
if (sgx_validate_secinfo(&secinfo))
return -EINVAL;
for (c = 0 ; c < add_arg.length; c += PAGE_SIZE) {
if (signal_pending(current)) {
if (!c)
ret = -ERESTARTSYS;
break;
}
if (need_resched())
cond_resched();
ret = sgx_encl_add_page(encl, add_arg.src + c, add_arg.offset + c,
&secinfo, add_arg.flags);
if (ret)
break;
}
add_arg.count = c;
if (copy_to_user(arg, &add_arg, sizeof(add_arg)))
return -EFAULT;
return ret;
}
static int __sgx_get_key_hash(struct crypto_shash *tfm, const void *modulus,
void *hash)
{
SHASH_DESC_ON_STACK(shash, tfm);
shash->tfm = tfm;
return crypto_shash_digest(shash, modulus, SGX_MODULUS_SIZE, hash);
}
static int sgx_get_key_hash(const void *modulus, void *hash)
{
struct crypto_shash *tfm;
int ret;
tfm = crypto_alloc_shash("sha256", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(tfm))
return PTR_ERR(tfm);
ret = __sgx_get_key_hash(tfm, modulus, hash);
crypto_free_shash(tfm);
return ret;
}
static int sgx_encl_init(struct sgx_encl *encl, struct sgx_sigstruct *sigstruct,
void *token)
{
u64 mrsigner[4];
int i, j;
void *addr;
int ret;
/*
* Deny initializing enclaves with attributes (namely provisioning)
* that have not been explicitly allowed.
*/
if (encl->attributes & ~encl->attributes_mask)
return -EACCES;
/*
* Attributes should not be enforced *only* against what's available on
* platform (done in sgx_encl_create) but checked and enforced against
* the mask for enforcement in sigstruct. For example an enclave could
* opt to sign with AVX bit in xfrm, but still be loadable on a platform
* without it if the sigstruct->body.attributes_mask does not turn that
* bit on.
*/
if (sigstruct->body.attributes & sigstruct->body.attributes_mask &
sgx_attributes_reserved_mask)
return -EINVAL;
if (sigstruct->body.miscselect & sigstruct->body.misc_mask &
sgx_misc_reserved_mask)
return -EINVAL;
if (sigstruct->body.xfrm & sigstruct->body.xfrm_mask &
sgx_xfrm_reserved_mask)
return -EINVAL;
ret = sgx_get_key_hash(sigstruct->modulus, mrsigner);
if (ret)
return ret;
mutex_lock(&encl->lock);
/*
* ENCLS[EINIT] is interruptible because it has such a high latency,
* e.g. 50k+ cycles on success. If an IRQ/NMI/SMI becomes pending,
* EINIT may fail with SGX_UNMASKED_EVENT so that the event can be
* serviced.
*/
for (i = 0; i < SGX_EINIT_SLEEP_COUNT; i++) {
for (j = 0; j < SGX_EINIT_SPIN_COUNT; j++) {
addr = sgx_get_epc_virt_addr(encl->secs.epc_page);
preempt_disable();
sgx_update_lepubkeyhash(mrsigner);
ret = __einit(sigstruct, token, addr);
preempt_enable();
if (ret == SGX_UNMASKED_EVENT)
continue;
else
break;
}
if (ret != SGX_UNMASKED_EVENT)
break;
msleep_interruptible(SGX_EINIT_SLEEP_TIME);
if (signal_pending(current)) {
ret = -ERESTARTSYS;
goto err_out;
}
}
if (encls_faulted(ret)) {
if (encls_failed(ret))
ENCLS_WARN(ret, "EINIT");
ret = -EIO;
} else if (ret) {
pr_debug("EINIT returned %d\n", ret);
ret = -EPERM;
} else {
set_bit(SGX_ENCL_INITIALIZED, &encl->flags);
}
err_out:
mutex_unlock(&encl->lock);
return ret;
}
/**
* sgx_ioc_enclave_init() - handler for %SGX_IOC_ENCLAVE_INIT
* @encl: an enclave pointer
* @arg: userspace pointer to a struct sgx_enclave_init instance
*
* Flush any outstanding enqueued EADD operations and perform EINIT. The
* Launch Enclave Public Key Hash MSRs are rewritten as necessary to match
* the enclave's MRSIGNER, which is caculated from the provided sigstruct.
*
* Return:
* - 0: Success.
* - -EPERM: Invalid SIGSTRUCT.
* - -EIO: EINIT failed because of a power cycle.
* - -errno: POSIX error.
*/
static long sgx_ioc_enclave_init(struct sgx_encl *encl, void __user *arg)
{
struct sgx_sigstruct *sigstruct;
struct sgx_enclave_init init_arg;
void *token;
int ret;
if (!test_bit(SGX_ENCL_CREATED, &encl->flags) ||
test_bit(SGX_ENCL_INITIALIZED, &encl->flags))
return -EINVAL;
if (copy_from_user(&init_arg, arg, sizeof(init_arg)))
return -EFAULT;
/*
* 'sigstruct' must be on a page boundary and 'token' on a 512 byte
* boundary. kmalloc() will give this alignment when allocating
* PAGE_SIZE bytes.
*/
sigstruct = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!sigstruct)
return -ENOMEM;
token = (void *)((unsigned long)sigstruct + PAGE_SIZE / 2);
memset(token, 0, SGX_LAUNCH_TOKEN_SIZE);
if (copy_from_user(sigstruct, (void __user *)init_arg.sigstruct,
sizeof(*sigstruct))) {
ret = -EFAULT;
goto out;
}
/*
* A legacy field used with Intel signed enclaves. These used to mean
* regular and architectural enclaves. The CPU only accepts these values
* but they do not have any other meaning.
*
* Thus, reject any other values.
*/
if (sigstruct->header.vendor != 0x0000 &&
sigstruct->header.vendor != 0x8086) {
ret = -EINVAL;
goto out;
}
ret = sgx_encl_init(encl, sigstruct, token);
out:
kfree(sigstruct);
return ret;
}
/**
* sgx_ioc_enclave_provision() - handler for %SGX_IOC_ENCLAVE_PROVISION
* @encl: an enclave pointer
* @arg: userspace pointer to a struct sgx_enclave_provision instance
*
* Allow ATTRIBUTE.PROVISION_KEY for an enclave by providing a file handle to
* /dev/sgx_provision.
*
* Return:
* - 0: Success.
* - -errno: Otherwise.
*/
static long sgx_ioc_enclave_provision(struct sgx_encl *encl, void __user *arg)
{
struct sgx_enclave_provision params;
if (copy_from_user(&params, arg, sizeof(params)))
return -EFAULT;
return sgx_set_attribute(&encl->attributes_mask, params.fd);
}
/*
* Ensure enclave is ready for SGX2 functions. Readiness is checked
* by ensuring the hardware supports SGX2 and the enclave is initialized
* and thus able to handle requests to modify pages within it.
*/
static int sgx_ioc_sgx2_ready(struct sgx_encl *encl)
{
if (!(cpu_feature_enabled(X86_FEATURE_SGX2)))
return -ENODEV;
if (!test_bit(SGX_ENCL_INITIALIZED, &encl->flags))
return -EINVAL;
return 0;
}
/*
* Some SGX functions require that no cached linear-to-physical address
* mappings are present before they can succeed. Collaborate with
* hardware via ENCLS[ETRACK] to ensure that all cached
* linear-to-physical address mappings belonging to all threads of
* the enclave are cleared. See sgx_encl_cpumask() for details.
*
* Must be called with enclave's mutex held from the time the
* SGX function requiring that no cached linear-to-physical mappings
* are present is executed until this ETRACK flow is complete.
*/
static int sgx_enclave_etrack(struct sgx_encl *encl)
{
void *epc_virt;
int ret;
epc_virt = sgx_get_epc_virt_addr(encl->secs.epc_page);
ret = __etrack(epc_virt);
if (ret) {
/*
* ETRACK only fails when there is an OS issue. For
* example, two consecutive ETRACK was sent without
* completed IPI between.
*/
pr_err_once("ETRACK returned %d (0x%x)", ret, ret);
/*
* Send IPIs to kick CPUs out of the enclave and
* try ETRACK again.
*/
on_each_cpu_mask(sgx_encl_cpumask(encl), sgx_ipi_cb, NULL, 1);
ret = __etrack(epc_virt);
if (ret) {
pr_err_once("ETRACK repeat returned %d (0x%x)",
ret, ret);
return -EFAULT;
}
}
on_each_cpu_mask(sgx_encl_cpumask(encl), sgx_ipi_cb, NULL, 1);
return 0;
}
/**
* sgx_enclave_restrict_permissions() - Restrict EPCM permissions
* @encl: Enclave to which the pages belong.
* @modp: Checked parameters from user on which pages need modifying and
* their new permissions.
*
* Return:
* - 0: Success.
* - -errno: Otherwise.
*/
static long
sgx_enclave_restrict_permissions(struct sgx_encl *encl,
struct sgx_enclave_restrict_permissions *modp)
{
struct sgx_encl_page *entry;
struct sgx_secinfo secinfo;
unsigned long addr;
unsigned long c;
void *epc_virt;
int ret;
memset(&secinfo, 0, sizeof(secinfo));
secinfo.flags = modp->permissions & SGX_SECINFO_PERMISSION_MASK;
for (c = 0 ; c < modp->length; c += PAGE_SIZE) {
addr = encl->base + modp->offset + c;
sgx_reclaim_direct();
mutex_lock(&encl->lock);
entry = sgx_encl_load_page(encl, addr);
if (IS_ERR(entry)) {
ret = PTR_ERR(entry) == -EBUSY ? -EAGAIN : -EFAULT;
goto out_unlock;
}
/*
* Changing EPCM permissions is only supported on regular
* SGX pages. Attempting this change on other pages will
* result in #PF.
*/
if (entry->type != SGX_PAGE_TYPE_REG) {
ret = -EINVAL;
goto out_unlock;
}
/*
* Apart from ensuring that read-access remains, do not verify
* the permission bits requested. Kernel has no control over
* how EPCM permissions can be relaxed from within the enclave.
* ENCLS[EMODPR] can only remove existing EPCM permissions,
* attempting to set new permissions will be ignored by the
* hardware.
*/
/* Change EPCM permissions. */
epc_virt = sgx_get_epc_virt_addr(entry->epc_page);
ret = __emodpr(&secinfo, epc_virt);
if (encls_faulted(ret)) {
/*
* All possible faults should be avoidable:
* parameters have been checked, will only change
* permissions of a regular page, and no concurrent
* SGX1/SGX2 ENCLS instructions since these
* are protected with mutex.
*/
pr_err_once("EMODPR encountered exception %d\n",
ENCLS_TRAPNR(ret));
ret = -EFAULT;
goto out_unlock;
}
if (encls_failed(ret)) {
modp->result = ret;
ret = -EFAULT;
goto out_unlock;
}
ret = sgx_enclave_etrack(encl);
if (ret) {
ret = -EFAULT;
goto out_unlock;
}
mutex_unlock(&encl->lock);
}
ret = 0;
goto out;
out_unlock:
mutex_unlock(&encl->lock);
out:
modp->count = c;
return ret;
}
/**
* sgx_ioc_enclave_restrict_permissions() - handler for
* %SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS
* @encl: an enclave pointer
* @arg: userspace pointer to a &struct sgx_enclave_restrict_permissions
* instance
*
* SGX2 distinguishes between relaxing and restricting the enclave page
* permissions maintained by the hardware (EPCM permissions) of pages
* belonging to an initialized enclave (after SGX_IOC_ENCLAVE_INIT).
*
* EPCM permissions cannot be restricted from within the enclave, the enclave
* requires the kernel to run the privileged level 0 instructions ENCLS[EMODPR]
* and ENCLS[ETRACK]. An attempt to relax EPCM permissions with this call
* will be ignored by the hardware.
*
* Return:
* - 0: Success
* - -errno: Otherwise
*/
static long sgx_ioc_enclave_restrict_permissions(struct sgx_encl *encl,
void __user *arg)
{
struct sgx_enclave_restrict_permissions params;
long ret;
ret = sgx_ioc_sgx2_ready(encl);
if (ret)
return ret;
if (copy_from_user(&params, arg, sizeof(params)))
return -EFAULT;
if (sgx_validate_offset_length(encl, params.offset, params.length))
return -EINVAL;
if (params.permissions & ~SGX_SECINFO_PERMISSION_MASK)
return -EINVAL;
/*
* Fail early if invalid permissions requested to prevent ENCLS[EMODPR]
* from faulting later when the CPU does the same check.
*/
if ((params.permissions & SGX_SECINFO_W) &&
!(params.permissions & SGX_SECINFO_R))
return -EINVAL;
if (params.result || params.count)
return -EINVAL;
ret = sgx_enclave_restrict_permissions(encl, &params);
if (copy_to_user(arg, &params, sizeof(params)))
return -EFAULT;
return ret;
}
/**
* sgx_enclave_modify_types() - Modify type of SGX enclave pages
* @encl: Enclave to which the pages belong.
* @modt: Checked parameters from user about which pages need modifying
* and their new page type.
*
* Return:
* - 0: Success
* - -errno: Otherwise
*/
static long sgx_enclave_modify_types(struct sgx_encl *encl,
struct sgx_enclave_modify_types *modt)
{
unsigned long max_prot_restore;
enum sgx_page_type page_type;
struct sgx_encl_page *entry;
struct sgx_secinfo secinfo;
unsigned long prot;
unsigned long addr;
unsigned long c;
void *epc_virt;
int ret;
page_type = modt->page_type & SGX_PAGE_TYPE_MASK;
/*
* The only new page types allowed by hardware are PT_TCS and PT_TRIM.
*/
if (page_type != SGX_PAGE_TYPE_TCS && page_type != SGX_PAGE_TYPE_TRIM)
return -EINVAL;
memset(&secinfo, 0, sizeof(secinfo));
secinfo.flags = page_type << 8;
for (c = 0 ; c < modt->length; c += PAGE_SIZE) {
addr = encl->base + modt->offset + c;
sgx_reclaim_direct();
mutex_lock(&encl->lock);
entry = sgx_encl_load_page(encl, addr);
if (IS_ERR(entry)) {
ret = PTR_ERR(entry) == -EBUSY ? -EAGAIN : -EFAULT;
goto out_unlock;
}
/*
* Borrow the logic from the Intel SDM. Regular pages
* (SGX_PAGE_TYPE_REG) can change type to SGX_PAGE_TYPE_TCS
* or SGX_PAGE_TYPE_TRIM but TCS pages can only be trimmed.
* CET pages not supported yet.
*/
if (!(entry->type == SGX_PAGE_TYPE_REG ||
(entry->type == SGX_PAGE_TYPE_TCS &&
page_type == SGX_PAGE_TYPE_TRIM))) {
ret = -EINVAL;
goto out_unlock;
}
max_prot_restore = entry->vm_max_prot_bits;
/*
* Once a regular page becomes a TCS page it cannot be
* changed back. So the maximum allowed protection reflects
* the TCS page that is always RW from kernel perspective but
* will be inaccessible from within enclave. Before doing
* so, do make sure that the new page type continues to
* respect the originally vetted page permissions.
*/
if (entry->type == SGX_PAGE_TYPE_REG &&
page_type == SGX_PAGE_TYPE_TCS) {
if (~entry->vm_max_prot_bits & (VM_READ | VM_WRITE)) {
ret = -EPERM;
goto out_unlock;
}
prot = PROT_READ | PROT_WRITE;
entry->vm_max_prot_bits = calc_vm_prot_bits(prot, 0);
/*
* Prevent page from being reclaimed while mutex
* is released.
*/
if (sgx_unmark_page_reclaimable(entry->epc_page)) {
ret = -EAGAIN;
goto out_entry_changed;
}
/*
* Do not keep encl->lock because of dependency on
* mmap_lock acquired in sgx_zap_enclave_ptes().
*/
mutex_unlock(&encl->lock);
sgx_zap_enclave_ptes(encl, addr);
mutex_lock(&encl->lock);
sgx_mark_page_reclaimable(entry->epc_page);
}
/* Change EPC type */
epc_virt = sgx_get_epc_virt_addr(entry->epc_page);
ret = __emodt(&secinfo, epc_virt);
if (encls_faulted(ret)) {
/*
* All possible faults should be avoidable:
* parameters have been checked, will only change
* valid page types, and no concurrent
* SGX1/SGX2 ENCLS instructions since these are
* protected with mutex.
*/
pr_err_once("EMODT encountered exception %d\n",
ENCLS_TRAPNR(ret));
ret = -EFAULT;
goto out_entry_changed;
}
if (encls_failed(ret)) {
modt->result = ret;
ret = -EFAULT;
goto out_entry_changed;
}
ret = sgx_enclave_etrack(encl);
if (ret) {
ret = -EFAULT;
goto out_unlock;
}
entry->type = page_type;
mutex_unlock(&encl->lock);
}
ret = 0;
goto out;
out_entry_changed:
entry->vm_max_prot_bits = max_prot_restore;
out_unlock:
mutex_unlock(&encl->lock);
out:
modt->count = c;
return ret;
}
/**
* sgx_ioc_enclave_modify_types() - handler for %SGX_IOC_ENCLAVE_MODIFY_TYPES
* @encl: an enclave pointer
* @arg: userspace pointer to a &struct sgx_enclave_modify_types instance
*
* Ability to change the enclave page type supports the following use cases:
*
* * It is possible to add TCS pages to an enclave by changing the type of
* regular pages (%SGX_PAGE_TYPE_REG) to TCS (%SGX_PAGE_TYPE_TCS) pages.
* With this support the number of threads supported by an initialized
* enclave can be increased dynamically.
*
* * Regular or TCS pages can dynamically be removed from an initialized
* enclave by changing the page type to %SGX_PAGE_TYPE_TRIM. Changing the
* page type to %SGX_PAGE_TYPE_TRIM marks the page for removal with actual
* removal done by handler of %SGX_IOC_ENCLAVE_REMOVE_PAGES ioctl() called
* after ENCLU[EACCEPT] is run on %SGX_PAGE_TYPE_TRIM page from within the
* enclave.
*
* Return:
* - 0: Success
* - -errno: Otherwise
*/
static long sgx_ioc_enclave_modify_types(struct sgx_encl *encl,
void __user *arg)
{
struct sgx_enclave_modify_types params;
long ret;
ret = sgx_ioc_sgx2_ready(encl);
if (ret)
return ret;
if (copy_from_user(&params, arg, sizeof(params)))
return -EFAULT;
if (sgx_validate_offset_length(encl, params.offset, params.length))
return -EINVAL;
if (params.page_type & ~SGX_PAGE_TYPE_MASK)
return -EINVAL;
if (params.result || params.count)
return -EINVAL;
ret = sgx_enclave_modify_types(encl, &params);
if (copy_to_user(arg, &params, sizeof(params)))
return -EFAULT;
return ret;
}
/**
* sgx_encl_remove_pages() - Remove trimmed pages from SGX enclave
* @encl: Enclave to which the pages belong
* @params: Checked parameters from user on which pages need to be removed
*
* Return:
* - 0: Success.
* - -errno: Otherwise.
*/
static long sgx_encl_remove_pages(struct sgx_encl *encl,
struct sgx_enclave_remove_pages *params)
{
struct sgx_encl_page *entry;
struct sgx_secinfo secinfo;
unsigned long addr;
unsigned long c;
void *epc_virt;
int ret;
memset(&secinfo, 0, sizeof(secinfo));
secinfo.flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_X;
for (c = 0 ; c < params->length; c += PAGE_SIZE) {
addr = encl->base + params->offset + c;
sgx_reclaim_direct();
mutex_lock(&encl->lock);
entry = sgx_encl_load_page(encl, addr);
if (IS_ERR(entry)) {
ret = PTR_ERR(entry) == -EBUSY ? -EAGAIN : -EFAULT;
goto out_unlock;
}
if (entry->type != SGX_PAGE_TYPE_TRIM) {
ret = -EPERM;
goto out_unlock;
}
/*
* ENCLS[EMODPR] is a no-op instruction used to inform if
* ENCLU[EACCEPT] was run from within the enclave. If
* ENCLS[EMODPR] is run with RWX on a trimmed page that is
* not yet accepted then it will return
* %SGX_PAGE_NOT_MODIFIABLE, after the trimmed page is
* accepted the instruction will encounter a page fault.
*/
epc_virt = sgx_get_epc_virt_addr(entry->epc_page);
ret = __emodpr(&secinfo, epc_virt);
if (!encls_faulted(ret) || ENCLS_TRAPNR(ret) != X86_TRAP_PF) {
ret = -EPERM;
goto out_unlock;
}
if (sgx_unmark_page_reclaimable(entry->epc_page)) {
ret = -EBUSY;
goto out_unlock;
}
/*
* Do not keep encl->lock because of dependency on
* mmap_lock acquired in sgx_zap_enclave_ptes().
*/
mutex_unlock(&encl->lock);
sgx_zap_enclave_ptes(encl, addr);
mutex_lock(&encl->lock);
sgx_encl_free_epc_page(entry->epc_page);
encl->secs_child_cnt--;
entry->epc_page = NULL;
xa_erase(&encl->page_array, PFN_DOWN(entry->desc));
sgx_encl_shrink(encl, NULL);
kfree(entry);
mutex_unlock(&encl->lock);
}
ret = 0;
goto out;
out_unlock:
mutex_unlock(&encl->lock);
out:
params->count = c;
return ret;
}
/**
* sgx_ioc_enclave_remove_pages() - handler for %SGX_IOC_ENCLAVE_REMOVE_PAGES
* @encl: an enclave pointer
* @arg: userspace pointer to &struct sgx_enclave_remove_pages instance
*
* Final step of the flow removing pages from an initialized enclave. The
* complete flow is:
*
* 1) User changes the type of the pages to be removed to %SGX_PAGE_TYPE_TRIM
* using the %SGX_IOC_ENCLAVE_MODIFY_TYPES ioctl().
* 2) User approves the page removal by running ENCLU[EACCEPT] from within
* the enclave.
* 3) User initiates actual page removal using the
* %SGX_IOC_ENCLAVE_REMOVE_PAGES ioctl() that is handled here.
*
* First remove any page table entries pointing to the page and then proceed
* with the actual removal of the enclave page and data in support of it.
*
* VA pages are not affected by this removal. It is thus possible that the
* enclave may end up with more VA pages than needed to support all its
* pages.
*
* Return:
* - 0: Success
* - -errno: Otherwise
*/
static long sgx_ioc_enclave_remove_pages(struct sgx_encl *encl,
void __user *arg)
{
struct sgx_enclave_remove_pages params;
long ret;
ret = sgx_ioc_sgx2_ready(encl);
if (ret)
return ret;
if (copy_from_user(&params, arg, sizeof(params)))
return -EFAULT;
if (sgx_validate_offset_length(encl, params.offset, params.length))
return -EINVAL;
if (params.count)
return -EINVAL;
ret = sgx_encl_remove_pages(encl, &params);
if (copy_to_user(arg, &params, sizeof(params)))
return -EFAULT;
return ret;
}
long sgx_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
{
struct sgx_encl *encl = filep->private_data;
int ret;
if (test_and_set_bit(SGX_ENCL_IOCTL, &encl->flags))
return -EBUSY;
switch (cmd) {
case SGX_IOC_ENCLAVE_CREATE:
ret = sgx_ioc_enclave_create(encl, (void __user *)arg);
break;
case SGX_IOC_ENCLAVE_ADD_PAGES:
ret = sgx_ioc_enclave_add_pages(encl, (void __user *)arg);
break;
case SGX_IOC_ENCLAVE_INIT:
ret = sgx_ioc_enclave_init(encl, (void __user *)arg);
break;
case SGX_IOC_ENCLAVE_PROVISION:
ret = sgx_ioc_enclave_provision(encl, (void __user *)arg);
break;
case SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS:
ret = sgx_ioc_enclave_restrict_permissions(encl,
(void __user *)arg);
break;
case SGX_IOC_ENCLAVE_MODIFY_TYPES:
ret = sgx_ioc_enclave_modify_types(encl, (void __user *)arg);
break;
case SGX_IOC_ENCLAVE_REMOVE_PAGES:
ret = sgx_ioc_enclave_remove_pages(encl, (void __user *)arg);
break;
default:
ret = -ENOIOCTLCMD;
break;
}
clear_bit(SGX_ENCL_IOCTL, &encl->flags);
return ret;
}