2018-06-14 06:48:24 +08:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0 */
|
|
|
|
#ifndef _ASM_PGTABLE_INVERT_H
|
|
|
|
#define _ASM_PGTABLE_INVERT_H 1
|
|
|
|
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
|
x86/speculation/l1tf: Exempt zeroed PTEs from inversion
It turns out that we should *not* invert all not-present mappings,
because the all zeroes case is obviously special.
clear_page() does not undergo the XOR logic to invert the address bits,
i.e. PTE, PMD and PUD entries that have not been individually written
will have val=0 and so will trigger __pte_needs_invert(). As a result,
{pte,pmd,pud}_pfn() will return the wrong PFN value, i.e. all ones
(adjusted by the max PFN mask) instead of zero. A zeroed entry is ok
because the page at physical address 0 is reserved early in boot
specifically to mitigate L1TF, so explicitly exempt them from the
inversion when reading the PFN.
Manifested as an unexpected mprotect(..., PROT_NONE) failure when called
on a VMA that has VM_PFNMAP and was mmap'd to as something other than
PROT_NONE but never used. mprotect() sends the PROT_NONE request down
prot_none_walk(), which walks the PTEs to check the PFNs.
prot_none_pte_entry() gets the bogus PFN from pte_pfn() and returns
-EACCES because it thinks mprotect() is trying to adjust a high MMIO
address.
[ This is a very modified version of Sean's original patch, but all
credit goes to Sean for doing this and also pointing out that
sometimes the __pte_needs_invert() function only gets the protection
bits, not the full eventual pte. But zero remains special even in
just protection bits, so that's ok. - Linus ]
Fixes: f22cc87f6c1f ("x86/speculation/l1tf: Invert all not present mappings")
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-08-18 01:27:36 +08:00
|
|
|
/*
|
|
|
|
* A clear pte value is special, and doesn't get inverted.
|
|
|
|
*
|
|
|
|
* Note that even users that only pass a pgprot_t (rather
|
|
|
|
* than a full pte) won't trigger the special zero case,
|
|
|
|
* because even PAGE_NONE has _PAGE_PROTNONE | _PAGE_ACCESSED
|
|
|
|
* set. So the all zero case really is limited to just the
|
|
|
|
* cleared page table entry case.
|
|
|
|
*/
|
2018-06-14 06:48:24 +08:00
|
|
|
static inline bool __pte_needs_invert(u64 val)
|
|
|
|
{
|
x86/speculation/l1tf: Exempt zeroed PTEs from inversion
It turns out that we should *not* invert all not-present mappings,
because the all zeroes case is obviously special.
clear_page() does not undergo the XOR logic to invert the address bits,
i.e. PTE, PMD and PUD entries that have not been individually written
will have val=0 and so will trigger __pte_needs_invert(). As a result,
{pte,pmd,pud}_pfn() will return the wrong PFN value, i.e. all ones
(adjusted by the max PFN mask) instead of zero. A zeroed entry is ok
because the page at physical address 0 is reserved early in boot
specifically to mitigate L1TF, so explicitly exempt them from the
inversion when reading the PFN.
Manifested as an unexpected mprotect(..., PROT_NONE) failure when called
on a VMA that has VM_PFNMAP and was mmap'd to as something other than
PROT_NONE but never used. mprotect() sends the PROT_NONE request down
prot_none_walk(), which walks the PTEs to check the PFNs.
prot_none_pte_entry() gets the bogus PFN from pte_pfn() and returns
-EACCES because it thinks mprotect() is trying to adjust a high MMIO
address.
[ This is a very modified version of Sean's original patch, but all
credit goes to Sean for doing this and also pointing out that
sometimes the __pte_needs_invert() function only gets the protection
bits, not the full eventual pte. But zero remains special even in
just protection bits, so that's ok. - Linus ]
Fixes: f22cc87f6c1f ("x86/speculation/l1tf: Invert all not present mappings")
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Acked-by: Andi Kleen <ak@linux.intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2018-08-18 01:27:36 +08:00
|
|
|
return val && !(val & _PAGE_PRESENT);
|
2018-06-14 06:48:24 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Get a mask to xor with the page table entry to get the correct pfn. */
|
|
|
|
static inline u64 protnone_mask(u64 val)
|
|
|
|
{
|
|
|
|
return __pte_needs_invert(val) ? ~0ull : 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline u64 flip_protnone_guard(u64 oldval, u64 val, u64 mask)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* When a PTE transitions from NONE to !NONE or vice-versa
|
|
|
|
* invert the PFN part to stop speculation.
|
|
|
|
* pte_pfn undoes this when needed.
|
|
|
|
*/
|
|
|
|
if (__pte_needs_invert(oldval) != __pte_needs_invert(val))
|
|
|
|
val = (val & ~mask) | (~val & mask);
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
|
|
|
|
#endif
|