mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-12-11 13:04:03 +08:00
Merge branch 'locking/core' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip into io_uring-futex
Pull in locking/core from the tip tree, to get the futex2 dependencies from Peter Zijlstra. * 'locking/core' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits) locking/ww_mutex/test: Make sure we bail out instead of livelock locking/ww_mutex/test: Fix potential workqueue corruption locking/ww_mutex/test: Use prng instead of rng to avoid hangs at bootup futex: Add sys_futex_requeue() futex: Add flags2 argument to futex_requeue() futex: Propagate flags into get_futex_key() futex: Add sys_futex_wait() futex: FLAGS_STRICT futex: Add sys_futex_wake() futex: Validate futex value against futex size futex: Flag conversion futex: Extend the FUTEX2 flags futex: Clarify FUTEX2 flags asm-generic: ticket-lock: Optimize arch_spin_value_unlocked() futex/pi: Fix recursive rt_mutex waiter state locking/rtmutex: Add a lockdep assert to catch potential nested blocking locking/rtmutex: Use rt_mutex specific scheduler helpers sched: Provide rt_mutex specific scheduler helpers sched: Extract __schedule_loop() locking/rtmutex: Avoid unconditional slowpath for DEBUG_RT_MUTEXES ...
This commit is contained in:
commit
52e856c387
@ -492,3 +492,6 @@
|
||||
560 common set_mempolicy_home_node sys_ni_syscall
|
||||
561 common cachestat sys_cachestat
|
||||
562 common fchmodat2 sys_fchmodat2
|
||||
563 common futex_wake sys_futex_wake
|
||||
564 common futex_wait sys_futex_wait
|
||||
565 common futex_requeue sys_futex_requeue
|
||||
|
@ -466,3 +466,6 @@
|
||||
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 common cachestat sys_cachestat
|
||||
452 common fchmodat2 sys_fchmodat2
|
||||
454 common futex_wake sys_futex_wake
|
||||
455 common futex_wait sys_futex_wait
|
||||
456 common futex_requeue sys_futex_requeue
|
||||
|
@ -39,7 +39,7 @@
|
||||
#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5)
|
||||
#define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800)
|
||||
|
||||
#define __NR_compat_syscalls 453
|
||||
#define __NR_compat_syscalls 457
|
||||
#endif
|
||||
|
||||
#define __ARCH_WANT_SYS_CLONE
|
||||
|
@ -911,6 +911,12 @@ __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node)
|
||||
__SYSCALL(__NR_cachestat, sys_cachestat)
|
||||
#define __NR_fchmodat2 452
|
||||
__SYSCALL(__NR_fchmodat2, sys_fchmodat2)
|
||||
#define __NR_futex_wake 454
|
||||
__SYSCALL(__NR_futex_wake, sys_futex_wake)
|
||||
#define __NR_futex_wait 455
|
||||
__SYSCALL(__NR_futex_wait, sys_futex_wait)
|
||||
#define __NR_futex_requeue 456
|
||||
__SYSCALL(__NR_futex_requeue, sys_futex_requeue)
|
||||
|
||||
/*
|
||||
* Please add new compat syscalls above this comment and update
|
||||
|
@ -373,3 +373,6 @@
|
||||
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 common cachestat sys_cachestat
|
||||
452 common fchmodat2 sys_fchmodat2
|
||||
454 common futex_wake sys_futex_wake
|
||||
455 common futex_wait sys_futex_wait
|
||||
456 common futex_requeue sys_futex_requeue
|
||||
|
@ -452,3 +452,6 @@
|
||||
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 common cachestat sys_cachestat
|
||||
452 common fchmodat2 sys_fchmodat2
|
||||
454 common futex_wake sys_futex_wake
|
||||
455 common futex_wait sys_futex_wait
|
||||
456 common futex_requeue sys_futex_requeue
|
||||
|
@ -458,3 +458,6 @@
|
||||
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 common cachestat sys_cachestat
|
||||
452 common fchmodat2 sys_fchmodat2
|
||||
454 common futex_wake sys_futex_wake
|
||||
455 common futex_wait sys_futex_wait
|
||||
456 common futex_requeue sys_futex_requeue
|
||||
|
@ -391,3 +391,6 @@
|
||||
450 n32 set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 n32 cachestat sys_cachestat
|
||||
452 n32 fchmodat2 sys_fchmodat2
|
||||
454 n32 futex_wake sys_futex_wake
|
||||
455 n32 futex_wait sys_futex_wait
|
||||
456 n32 futex_requeue sys_futex_requeue
|
||||
|
@ -367,3 +367,6 @@
|
||||
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 n64 cachestat sys_cachestat
|
||||
452 n64 fchmodat2 sys_fchmodat2
|
||||
454 n64 futex_wake sys_futex_wake
|
||||
455 n64 futex_wait sys_futex_wait
|
||||
456 n64 futex_requeue sys_futex_requeue
|
||||
|
@ -440,3 +440,6 @@
|
||||
450 o32 set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 o32 cachestat sys_cachestat
|
||||
452 o32 fchmodat2 sys_fchmodat2
|
||||
454 o32 futex_wake sys_futex_wake
|
||||
455 o32 futex_wait sys_futex_wait
|
||||
456 o32 futex_requeue sys_futex_requeue
|
||||
|
@ -451,3 +451,6 @@
|
||||
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 common cachestat sys_cachestat
|
||||
452 common fchmodat2 sys_fchmodat2
|
||||
454 common futex_wake sys_futex_wake
|
||||
455 common futex_wait sys_futex_wait
|
||||
456 common futex_requeue sys_futex_requeue
|
||||
|
@ -539,3 +539,6 @@
|
||||
450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 common cachestat sys_cachestat
|
||||
452 common fchmodat2 sys_fchmodat2
|
||||
454 common futex_wake sys_futex_wake
|
||||
455 common futex_wait sys_futex_wait
|
||||
456 common futex_requeue sys_futex_requeue
|
||||
|
@ -455,3 +455,6 @@
|
||||
450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 common cachestat sys_cachestat sys_cachestat
|
||||
452 common fchmodat2 sys_fchmodat2 sys_fchmodat2
|
||||
454 common futex_wake sys_futex_wake sys_futex_wake
|
||||
455 common futex_wait sys_futex_wait sys_futex_wait
|
||||
456 common futex_requeue sys_futex_requeue sys_futex_requeue
|
||||
|
@ -455,3 +455,6 @@
|
||||
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 common cachestat sys_cachestat
|
||||
452 common fchmodat2 sys_fchmodat2
|
||||
454 common futex_wake sys_futex_wake
|
||||
455 common futex_wait sys_futex_wait
|
||||
456 common futex_requeue sys_futex_requeue
|
||||
|
@ -498,3 +498,6 @@
|
||||
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 common cachestat sys_cachestat
|
||||
452 common fchmodat2 sys_fchmodat2
|
||||
454 common futex_wake sys_futex_wake
|
||||
455 common futex_wait sys_futex_wait
|
||||
456 common futex_requeue sys_futex_requeue
|
||||
|
@ -28,7 +28,6 @@ config X86_64
|
||||
select ARCH_HAS_GIGANTIC_PAGE
|
||||
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
|
||||
select ARCH_SUPPORTS_PER_VMA_LOCK
|
||||
select ARCH_USE_CMPXCHG_LOCKREF
|
||||
select HAVE_ARCH_SOFT_DIRTY
|
||||
select MODULES_USE_ELF_RELA
|
||||
select NEED_DMA_MAP_STATE
|
||||
@ -118,6 +117,7 @@ config X86
|
||||
select ARCH_SUPPORTS_LTO_CLANG
|
||||
select ARCH_SUPPORTS_LTO_CLANG_THIN
|
||||
select ARCH_USE_BUILTIN_BSWAP
|
||||
select ARCH_USE_CMPXCHG_LOCKREF if X86_CMPXCHG64
|
||||
select ARCH_USE_MEMTEST
|
||||
select ARCH_USE_QUEUED_RWLOCKS
|
||||
select ARCH_USE_QUEUED_SPINLOCKS
|
||||
|
@ -457,3 +457,6 @@
|
||||
450 i386 set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 i386 cachestat sys_cachestat
|
||||
452 i386 fchmodat2 sys_fchmodat2
|
||||
454 i386 futex_wake sys_futex_wake
|
||||
455 i386 futex_wait sys_futex_wait
|
||||
456 i386 futex_requeue sys_futex_requeue
|
||||
|
@ -375,6 +375,9 @@
|
||||
451 common cachestat sys_cachestat
|
||||
452 common fchmodat2 sys_fchmodat2
|
||||
453 64 map_shadow_stack sys_map_shadow_stack
|
||||
454 common futex_wake sys_futex_wake
|
||||
455 common futex_wait sys_futex_wait
|
||||
456 common futex_requeue sys_futex_requeue
|
||||
|
||||
#
|
||||
# Due to a historical design error, certain syscalls are numbered differently
|
||||
|
@ -423,3 +423,6 @@
|
||||
450 common set_mempolicy_home_node sys_set_mempolicy_home_node
|
||||
451 common cachestat sys_cachestat
|
||||
452 common fchmodat2 sys_fchmodat2
|
||||
454 common futex_wake sys_futex_wake
|
||||
455 common futex_wait sys_futex_wait
|
||||
456 common futex_requeue sys_futex_requeue
|
||||
|
@ -68,11 +68,18 @@ static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
|
||||
smp_store_release(ptr, (u16)val + 1);
|
||||
}
|
||||
|
||||
static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
|
||||
{
|
||||
u32 val = lock.counter;
|
||||
|
||||
return ((val >> 16) == (val & 0xffff));
|
||||
}
|
||||
|
||||
static __always_inline int arch_spin_is_locked(arch_spinlock_t *lock)
|
||||
{
|
||||
u32 val = atomic_read(lock);
|
||||
arch_spinlock_t val = READ_ONCE(*lock);
|
||||
|
||||
return ((val >> 16) != (val & 0xffff));
|
||||
return !arch_spin_value_unlocked(val);
|
||||
}
|
||||
|
||||
static __always_inline int arch_spin_is_contended(arch_spinlock_t *lock)
|
||||
@ -82,11 +89,6 @@ static __always_inline int arch_spin_is_contended(arch_spinlock_t *lock)
|
||||
return (s16)((val >> 16) - (val & 0xffff)) > 1;
|
||||
}
|
||||
|
||||
static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
|
||||
{
|
||||
return !arch_spin_is_locked(&lock);
|
||||
}
|
||||
|
||||
#include <asm/qrwlock.h>
|
||||
|
||||
#endif /* __ASM_GENERIC_SPINLOCK_H */
|
||||
|
@ -7,8 +7,9 @@
|
||||
/*
|
||||
* DEFINE_FREE(name, type, free):
|
||||
* simple helper macro that defines the required wrapper for a __free()
|
||||
* based cleanup function. @free is an expression using '_T' to access
|
||||
* the variable.
|
||||
* based cleanup function. @free is an expression using '_T' to access the
|
||||
* variable. @free should typically include a NULL test before calling a
|
||||
* function, see the example below.
|
||||
*
|
||||
* __free(name):
|
||||
* variable attribute to add a scoped based cleanup to the variable.
|
||||
@ -17,6 +18,9 @@
|
||||
* like a non-atomic xchg(var, NULL), such that the cleanup function will
|
||||
* be inhibited -- provided it sanely deals with a NULL value.
|
||||
*
|
||||
* NOTE: this has __must_check semantics so that it is harder to accidentally
|
||||
* leak the resource.
|
||||
*
|
||||
* return_ptr(p):
|
||||
* returns p while inhibiting the __free().
|
||||
*
|
||||
@ -24,6 +28,8 @@
|
||||
*
|
||||
* DEFINE_FREE(kfree, void *, if (_T) kfree(_T))
|
||||
*
|
||||
* void *alloc_obj(...)
|
||||
* {
|
||||
* struct obj *p __free(kfree) = kmalloc(...);
|
||||
* if (!p)
|
||||
* return NULL;
|
||||
@ -32,6 +38,24 @@
|
||||
* return NULL;
|
||||
*
|
||||
* return_ptr(p);
|
||||
* }
|
||||
*
|
||||
* NOTE: the DEFINE_FREE()'s @free expression includes a NULL test even though
|
||||
* kfree() is fine to be called with a NULL value. This is on purpose. This way
|
||||
* the compiler sees the end of our alloc_obj() function as:
|
||||
*
|
||||
* tmp = p;
|
||||
* p = NULL;
|
||||
* if (p)
|
||||
* kfree(p);
|
||||
* return tmp;
|
||||
*
|
||||
* And through the magic of value-propagation and dead-code-elimination, it
|
||||
* eliminates the actual cleanup call and compiles into:
|
||||
*
|
||||
* return p;
|
||||
*
|
||||
* Without the NULL test it turns into a mess and the compiler can't help us.
|
||||
*/
|
||||
|
||||
#define DEFINE_FREE(_name, _type, _free) \
|
||||
@ -39,8 +63,17 @@
|
||||
|
||||
#define __free(_name) __cleanup(__free_##_name)
|
||||
|
||||
#define __get_and_null_ptr(p) \
|
||||
({ __auto_type __ptr = &(p); \
|
||||
__auto_type __val = *__ptr; \
|
||||
*__ptr = NULL; __val; })
|
||||
|
||||
static inline __must_check
|
||||
const volatile void * __must_check_fn(const volatile void *val)
|
||||
{ return val; }
|
||||
|
||||
#define no_free_ptr(p) \
|
||||
({ __auto_type __ptr = (p); (p) = NULL; __ptr; })
|
||||
((typeof(p)) __must_check_fn(__get_and_null_ptr(p)))
|
||||
|
||||
#define return_ptr(p) return no_free_ptr(p)
|
||||
|
||||
|
@ -911,6 +911,9 @@ struct task_struct {
|
||||
* ->sched_remote_wakeup gets used, so it can be in this word.
|
||||
*/
|
||||
unsigned sched_remote_wakeup:1;
|
||||
#ifdef CONFIG_RT_MUTEXES
|
||||
unsigned sched_rt_mutex:1;
|
||||
#endif
|
||||
|
||||
/* Bit to tell LSMs we're in execve(): */
|
||||
unsigned in_execve:1;
|
||||
|
@ -30,6 +30,10 @@ static inline bool task_is_realtime(struct task_struct *tsk)
|
||||
}
|
||||
|
||||
#ifdef CONFIG_RT_MUTEXES
|
||||
extern void rt_mutex_pre_schedule(void);
|
||||
extern void rt_mutex_schedule(void);
|
||||
extern void rt_mutex_post_schedule(void);
|
||||
|
||||
/*
|
||||
* Must hold either p->pi_lock or task_rq(p)->lock.
|
||||
*/
|
||||
|
@ -549,6 +549,16 @@ asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
|
||||
asmlinkage long sys_futex_waitv(struct futex_waitv *waiters,
|
||||
unsigned int nr_futexes, unsigned int flags,
|
||||
struct __kernel_timespec __user *timeout, clockid_t clockid);
|
||||
|
||||
asmlinkage long sys_futex_wake(void __user *uaddr, unsigned long mask, int nr, unsigned int flags);
|
||||
|
||||
asmlinkage long sys_futex_wait(void __user *uaddr, unsigned long val, unsigned long mask,
|
||||
unsigned int flags, struct __kernel_timespec __user *timespec,
|
||||
clockid_t clockid);
|
||||
|
||||
asmlinkage long sys_futex_requeue(struct futex_waitv __user *waiters,
|
||||
unsigned int flags, int nr_wake, int nr_requeue);
|
||||
|
||||
asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp,
|
||||
struct __kernel_timespec __user *rmtp);
|
||||
asmlinkage long sys_nanosleep_time32(struct old_timespec32 __user *rqtp,
|
||||
|
@ -822,9 +822,15 @@ __SYSCALL(__NR_cachestat, sys_cachestat)
|
||||
|
||||
#define __NR_fchmodat2 452
|
||||
__SYSCALL(__NR_fchmodat2, sys_fchmodat2)
|
||||
#define __NR_futex_wake 454
|
||||
__SYSCALL(__NR_futex_wake, sys_futex_wake)
|
||||
#define __NR_futex_wait 455
|
||||
__SYSCALL(__NR_futex_wait, sys_futex_wait)
|
||||
#define __NR_futex_requeue 456
|
||||
__SYSCALL(__NR_futex_requeue, sys_futex_requeue)
|
||||
|
||||
#undef __NR_syscalls
|
||||
#define __NR_syscalls 453
|
||||
#define __NR_syscalls 457
|
||||
|
||||
/*
|
||||
* 32 bit systems traditionally used different
|
||||
|
@ -44,10 +44,35 @@
|
||||
FUTEX_PRIVATE_FLAG)
|
||||
|
||||
/*
|
||||
* Flags to specify the bit length of the futex word for futex2 syscalls.
|
||||
* Currently, only 32 is supported.
|
||||
* Flags for futex2 syscalls.
|
||||
*
|
||||
* NOTE: these are not pure flags, they can also be seen as:
|
||||
*
|
||||
* union {
|
||||
* u32 flags;
|
||||
* struct {
|
||||
* u32 size : 2,
|
||||
* numa : 1,
|
||||
* : 4,
|
||||
* private : 1;
|
||||
* };
|
||||
* };
|
||||
*/
|
||||
#define FUTEX_32 2
|
||||
#define FUTEX2_SIZE_U8 0x00
|
||||
#define FUTEX2_SIZE_U16 0x01
|
||||
#define FUTEX2_SIZE_U32 0x02
|
||||
#define FUTEX2_SIZE_U64 0x03
|
||||
#define FUTEX2_NUMA 0x04
|
||||
/* 0x08 */
|
||||
/* 0x10 */
|
||||
/* 0x20 */
|
||||
/* 0x40 */
|
||||
#define FUTEX2_PRIVATE FUTEX_PRIVATE_FLAG
|
||||
|
||||
#define FUTEX2_SIZE_MASK 0x03
|
||||
|
||||
/* do not use */
|
||||
#define FUTEX_32 FUTEX2_SIZE_U32 /* historical accident :-( */
|
||||
|
||||
/*
|
||||
* Max numbers of elements in a futex_waitv array
|
||||
|
@ -193,7 +193,7 @@ static u64 get_inode_sequence_number(struct inode *inode)
|
||||
/**
|
||||
* get_futex_key() - Get parameters which are the keys for a futex
|
||||
* @uaddr: virtual address of the futex
|
||||
* @fshared: false for a PROCESS_PRIVATE futex, true for PROCESS_SHARED
|
||||
* @flags: FLAGS_*
|
||||
* @key: address where result is stored.
|
||||
* @rw: mapping needs to be read/write (values: FUTEX_READ,
|
||||
* FUTEX_WRITE)
|
||||
@ -217,14 +217,18 @@ static u64 get_inode_sequence_number(struct inode *inode)
|
||||
*
|
||||
* lock_page() might sleep, the caller should not hold a spinlock.
|
||||
*/
|
||||
int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
|
||||
int get_futex_key(u32 __user *uaddr, unsigned int flags, union futex_key *key,
|
||||
enum futex_access rw)
|
||||
{
|
||||
unsigned long address = (unsigned long)uaddr;
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct page *page, *tail;
|
||||
struct page *page;
|
||||
struct folio *folio;
|
||||
struct address_space *mapping;
|
||||
int err, ro = 0;
|
||||
bool fshared;
|
||||
|
||||
fshared = flags & FLAGS_SHARED;
|
||||
|
||||
/*
|
||||
* The futex address must be "naturally" aligned.
|
||||
@ -273,54 +277,52 @@ again:
|
||||
err = 0;
|
||||
|
||||
/*
|
||||
* The treatment of mapping from this point on is critical. The page
|
||||
* lock protects many things but in this context the page lock
|
||||
* The treatment of mapping from this point on is critical. The folio
|
||||
* lock protects many things but in this context the folio lock
|
||||
* stabilizes mapping, prevents inode freeing in the shared
|
||||
* file-backed region case and guards against movement to swap cache.
|
||||
*
|
||||
* Strictly speaking the page lock is not needed in all cases being
|
||||
* considered here and page lock forces unnecessarily serialization
|
||||
* Strictly speaking the folio lock is not needed in all cases being
|
||||
* considered here and folio lock forces unnecessarily serialization.
|
||||
* From this point on, mapping will be re-verified if necessary and
|
||||
* page lock will be acquired only if it is unavoidable
|
||||
* folio lock will be acquired only if it is unavoidable
|
||||
*
|
||||
* Mapping checks require the head page for any compound page so the
|
||||
* head page and mapping is looked up now. For anonymous pages, it
|
||||
* does not matter if the page splits in the future as the key is
|
||||
* based on the address. For filesystem-backed pages, the tail is
|
||||
* required as the index of the page determines the key. For
|
||||
* base pages, there is no tail page and tail == page.
|
||||
* Mapping checks require the folio so it is looked up now. For
|
||||
* anonymous pages, it does not matter if the folio is split
|
||||
* in the future as the key is based on the address. For
|
||||
* filesystem-backed pages, the precise page is required as the
|
||||
* index of the page determines the key.
|
||||
*/
|
||||
tail = page;
|
||||
page = compound_head(page);
|
||||
mapping = READ_ONCE(page->mapping);
|
||||
folio = page_folio(page);
|
||||
mapping = READ_ONCE(folio->mapping);
|
||||
|
||||
/*
|
||||
* If page->mapping is NULL, then it cannot be a PageAnon
|
||||
* If folio->mapping is NULL, then it cannot be an anonymous
|
||||
* page; but it might be the ZERO_PAGE or in the gate area or
|
||||
* in a special mapping (all cases which we are happy to fail);
|
||||
* or it may have been a good file page when get_user_pages_fast
|
||||
* found it, but truncated or holepunched or subjected to
|
||||
* invalidate_complete_page2 before we got the page lock (also
|
||||
* invalidate_complete_page2 before we got the folio lock (also
|
||||
* cases which we are happy to fail). And we hold a reference,
|
||||
* so refcount care in invalidate_inode_page's remove_mapping
|
||||
* prevents drop_caches from setting mapping to NULL beneath us.
|
||||
*
|
||||
* The case we do have to guard against is when memory pressure made
|
||||
* shmem_writepage move it from filecache to swapcache beneath us:
|
||||
* an unlikely race, but we do need to retry for page->mapping.
|
||||
* an unlikely race, but we do need to retry for folio->mapping.
|
||||
*/
|
||||
if (unlikely(!mapping)) {
|
||||
int shmem_swizzled;
|
||||
|
||||
/*
|
||||
* Page lock is required to identify which special case above
|
||||
* applies. If this is really a shmem page then the page lock
|
||||
* Folio lock is required to identify which special case above
|
||||
* applies. If this is really a shmem page then the folio lock
|
||||
* will prevent unexpected transitions.
|
||||
*/
|
||||
lock_page(page);
|
||||
shmem_swizzled = PageSwapCache(page) || page->mapping;
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
folio_lock(folio);
|
||||
shmem_swizzled = folio_test_swapcache(folio) || folio->mapping;
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
|
||||
if (shmem_swizzled)
|
||||
goto again;
|
||||
@ -331,14 +333,14 @@ again:
|
||||
/*
|
||||
* Private mappings are handled in a simple way.
|
||||
*
|
||||
* If the futex key is stored on an anonymous page, then the associated
|
||||
* If the futex key is stored in anonymous memory, then the associated
|
||||
* object is the mm which is implicitly pinned by the calling process.
|
||||
*
|
||||
* NOTE: When userspace waits on a MAP_SHARED mapping, even if
|
||||
* it's a read-only handle, it's expected that futexes attach to
|
||||
* the object not the particular process.
|
||||
*/
|
||||
if (PageAnon(page)) {
|
||||
if (folio_test_anon(folio)) {
|
||||
/*
|
||||
* A RO anonymous page will never change and thus doesn't make
|
||||
* sense for futex operations.
|
||||
@ -357,10 +359,10 @@ again:
|
||||
|
||||
/*
|
||||
* The associated futex object in this case is the inode and
|
||||
* the page->mapping must be traversed. Ordinarily this should
|
||||
* be stabilised under page lock but it's not strictly
|
||||
* the folio->mapping must be traversed. Ordinarily this should
|
||||
* be stabilised under folio lock but it's not strictly
|
||||
* necessary in this case as we just want to pin the inode, not
|
||||
* update the radix tree or anything like that.
|
||||
* update i_pages or anything like that.
|
||||
*
|
||||
* The RCU read lock is taken as the inode is finally freed
|
||||
* under RCU. If the mapping still matches expectations then the
|
||||
@ -368,9 +370,9 @@ again:
|
||||
*/
|
||||
rcu_read_lock();
|
||||
|
||||
if (READ_ONCE(page->mapping) != mapping) {
|
||||
if (READ_ONCE(folio->mapping) != mapping) {
|
||||
rcu_read_unlock();
|
||||
put_page(page);
|
||||
folio_put(folio);
|
||||
|
||||
goto again;
|
||||
}
|
||||
@ -378,19 +380,19 @@ again:
|
||||
inode = READ_ONCE(mapping->host);
|
||||
if (!inode) {
|
||||
rcu_read_unlock();
|
||||
put_page(page);
|
||||
folio_put(folio);
|
||||
|
||||
goto again;
|
||||
}
|
||||
|
||||
key->both.offset |= FUT_OFF_INODE; /* inode-based key */
|
||||
key->shared.i_seq = get_inode_sequence_number(inode);
|
||||
key->shared.pgoff = page_to_pgoff(tail);
|
||||
key->shared.pgoff = folio->index + folio_page_idx(folio, page);
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
out:
|
||||
put_page(page);
|
||||
folio_put(folio);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <linux/futex.h>
|
||||
#include <linux/rtmutex.h>
|
||||
#include <linux/sched/wake_q.h>
|
||||
#include <linux/compat.h>
|
||||
|
||||
#ifdef CONFIG_PREEMPT_RT
|
||||
#include <linux/rcuwait.h>
|
||||
@ -16,17 +17,84 @@
|
||||
* Futex flags used to encode options to functions and preserve them across
|
||||
* restarts.
|
||||
*/
|
||||
#define FLAGS_SIZE_8 0x0000
|
||||
#define FLAGS_SIZE_16 0x0001
|
||||
#define FLAGS_SIZE_32 0x0002
|
||||
#define FLAGS_SIZE_64 0x0003
|
||||
|
||||
#define FLAGS_SIZE_MASK 0x0003
|
||||
|
||||
#ifdef CONFIG_MMU
|
||||
# define FLAGS_SHARED 0x01
|
||||
# define FLAGS_SHARED 0x0010
|
||||
#else
|
||||
/*
|
||||
* NOMMU does not have per process address space. Let the compiler optimize
|
||||
* code away.
|
||||
*/
|
||||
# define FLAGS_SHARED 0x00
|
||||
# define FLAGS_SHARED 0x0000
|
||||
#endif
|
||||
#define FLAGS_CLOCKRT 0x02
|
||||
#define FLAGS_HAS_TIMEOUT 0x04
|
||||
#define FLAGS_CLOCKRT 0x0020
|
||||
#define FLAGS_HAS_TIMEOUT 0x0040
|
||||
#define FLAGS_NUMA 0x0080
|
||||
#define FLAGS_STRICT 0x0100
|
||||
|
||||
/* FUTEX_ to FLAGS_ */
|
||||
static inline unsigned int futex_to_flags(unsigned int op)
|
||||
{
|
||||
unsigned int flags = FLAGS_SIZE_32;
|
||||
|
||||
if (!(op & FUTEX_PRIVATE_FLAG))
|
||||
flags |= FLAGS_SHARED;
|
||||
|
||||
if (op & FUTEX_CLOCK_REALTIME)
|
||||
flags |= FLAGS_CLOCKRT;
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
/* FUTEX2_ to FLAGS_ */
|
||||
static inline unsigned int futex2_to_flags(unsigned int flags2)
|
||||
{
|
||||
unsigned int flags = flags2 & FUTEX2_SIZE_MASK;
|
||||
|
||||
if (!(flags2 & FUTEX2_PRIVATE))
|
||||
flags |= FLAGS_SHARED;
|
||||
|
||||
if (flags2 & FUTEX2_NUMA)
|
||||
flags |= FLAGS_NUMA;
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
static inline unsigned int futex_size(unsigned int flags)
|
||||
{
|
||||
return 1 << (flags & FLAGS_SIZE_MASK);
|
||||
}
|
||||
|
||||
static inline bool futex_flags_valid(unsigned int flags)
|
||||
{
|
||||
/* Only 64bit futexes for 64bit code */
|
||||
if (!IS_ENABLED(CONFIG_64BIT) || in_compat_syscall()) {
|
||||
if ((flags & FLAGS_SIZE_MASK) == FLAGS_SIZE_64)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Only 32bit futexes are implemented -- for now */
|
||||
if ((flags & FLAGS_SIZE_MASK) != FLAGS_SIZE_32)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool futex_validate_input(unsigned int flags, u64 val)
|
||||
{
|
||||
int bits = 8 * futex_size(flags);
|
||||
|
||||
if (bits < 64 && (val >> bits))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_FAIL_FUTEX
|
||||
extern bool should_fail_futex(bool fshared);
|
||||
@ -116,7 +184,7 @@ enum futex_access {
|
||||
FUTEX_WRITE
|
||||
};
|
||||
|
||||
extern int get_futex_key(u32 __user *uaddr, bool fshared, union futex_key *key,
|
||||
extern int get_futex_key(u32 __user *uaddr, unsigned int flags, union futex_key *key,
|
||||
enum futex_access rw);
|
||||
|
||||
extern struct hrtimer_sleeper *
|
||||
@ -260,10 +328,14 @@ extern int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags, u32
|
||||
val, ktime_t *abs_time, u32 bitset, u32 __user
|
||||
*uaddr2);
|
||||
|
||||
extern int futex_requeue(u32 __user *uaddr1, unsigned int flags,
|
||||
u32 __user *uaddr2, int nr_wake, int nr_requeue,
|
||||
extern int futex_requeue(u32 __user *uaddr1, unsigned int flags1,
|
||||
u32 __user *uaddr2, unsigned int flags2,
|
||||
int nr_wake, int nr_requeue,
|
||||
u32 *cmpval, int requeue_pi);
|
||||
|
||||
extern int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
|
||||
struct hrtimer_sleeper *to, u32 bitset);
|
||||
|
||||
extern int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
|
||||
ktime_t *abs_time, u32 bitset);
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <linux/slab.h>
|
||||
#include <linux/sched/rt.h>
|
||||
#include <linux/sched/task.h>
|
||||
|
||||
#include "futex.h"
|
||||
@ -610,29 +611,16 @@ int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb,
|
||||
/*
|
||||
* Caller must hold a reference on @pi_state.
|
||||
*/
|
||||
static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_state)
|
||||
static int wake_futex_pi(u32 __user *uaddr, u32 uval,
|
||||
struct futex_pi_state *pi_state,
|
||||
struct rt_mutex_waiter *top_waiter)
|
||||
{
|
||||
struct rt_mutex_waiter *top_waiter;
|
||||
struct task_struct *new_owner;
|
||||
bool postunlock = false;
|
||||
DEFINE_RT_WAKE_Q(wqh);
|
||||
u32 curval, newval;
|
||||
int ret = 0;
|
||||
|
||||
top_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex);
|
||||
if (WARN_ON_ONCE(!top_waiter)) {
|
||||
/*
|
||||
* As per the comment in futex_unlock_pi() this should not happen.
|
||||
*
|
||||
* When this happens, give up our locks and try again, giving
|
||||
* the futex_lock_pi() instance time to complete, either by
|
||||
* waiting on the rtmutex or removing itself from the futex
|
||||
* queue.
|
||||
*/
|
||||
ret = -EAGAIN;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
new_owner = top_waiter->task;
|
||||
|
||||
/*
|
||||
@ -945,7 +933,7 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl
|
||||
to = futex_setup_timer(time, &timeout, flags, 0);
|
||||
|
||||
retry:
|
||||
ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q.key, FUTEX_WRITE);
|
||||
ret = get_futex_key(uaddr, flags, &q.key, FUTEX_WRITE);
|
||||
if (unlikely(ret != 0))
|
||||
goto out;
|
||||
|
||||
@ -1002,6 +990,12 @@ retry_private:
|
||||
goto no_block;
|
||||
}
|
||||
|
||||
/*
|
||||
* Must be done before we enqueue the waiter, here is unfortunately
|
||||
* under the hb lock, but that *should* work because it does nothing.
|
||||
*/
|
||||
rt_mutex_pre_schedule();
|
||||
|
||||
rt_mutex_init_waiter(&rt_waiter);
|
||||
|
||||
/*
|
||||
@ -1039,19 +1033,37 @@ retry_private:
|
||||
ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);
|
||||
|
||||
cleanup:
|
||||
spin_lock(q.lock_ptr);
|
||||
/*
|
||||
* If we failed to acquire the lock (deadlock/signal/timeout), we must
|
||||
* first acquire the hb->lock before removing the lock from the
|
||||
* rt_mutex waitqueue, such that we can keep the hb and rt_mutex wait
|
||||
* lists consistent.
|
||||
* must unwind the above, however we canont lock hb->lock because
|
||||
* rt_mutex already has a waiter enqueued and hb->lock can itself try
|
||||
* and enqueue an rt_waiter through rtlock.
|
||||
*
|
||||
* In particular; it is important that futex_unlock_pi() can not
|
||||
* observe this inconsistency.
|
||||
* Doing the cleanup without holding hb->lock can cause inconsistent
|
||||
* state between hb and pi_state, but only in the direction of not
|
||||
* seeing a waiter that is leaving.
|
||||
*
|
||||
* See futex_unlock_pi(), it deals with this inconsistency.
|
||||
*
|
||||
* There be dragons here, since we must deal with the inconsistency on
|
||||
* the way out (here), it is impossible to detect/warn about the race
|
||||
* the other way around (missing an incoming waiter).
|
||||
*
|
||||
* What could possibly go wrong...
|
||||
*/
|
||||
if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter))
|
||||
ret = 0;
|
||||
|
||||
/*
|
||||
* Now that the rt_waiter has been dequeued, it is safe to use
|
||||
* spinlock/rtlock (which might enqueue its own rt_waiter) and fix up
|
||||
* the
|
||||
*/
|
||||
spin_lock(q.lock_ptr);
|
||||
/*
|
||||
* Waiter is unqueued.
|
||||
*/
|
||||
rt_mutex_post_schedule();
|
||||
no_block:
|
||||
/*
|
||||
* Fixup the pi_state owner and possibly acquire the lock if we
|
||||
@ -1117,7 +1129,7 @@ retry:
|
||||
if ((uval & FUTEX_TID_MASK) != vpid)
|
||||
return -EPERM;
|
||||
|
||||
ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_WRITE);
|
||||
ret = get_futex_key(uaddr, flags, &key, FUTEX_WRITE);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -1132,6 +1144,7 @@ retry:
|
||||
top_waiter = futex_top_waiter(hb, &key);
|
||||
if (top_waiter) {
|
||||
struct futex_pi_state *pi_state = top_waiter->pi_state;
|
||||
struct rt_mutex_waiter *rt_waiter;
|
||||
|
||||
ret = -EINVAL;
|
||||
if (!pi_state)
|
||||
@ -1144,22 +1157,39 @@ retry:
|
||||
if (pi_state->owner != current)
|
||||
goto out_unlock;
|
||||
|
||||
get_pi_state(pi_state);
|
||||
/*
|
||||
* By taking wait_lock while still holding hb->lock, we ensure
|
||||
* there is no point where we hold neither; and therefore
|
||||
* wake_futex_p() must observe a state consistent with what we
|
||||
* observed.
|
||||
* there is no point where we hold neither; and thereby
|
||||
* wake_futex_pi() must observe any new waiters.
|
||||
*
|
||||
* Since the cleanup: case in futex_lock_pi() removes the
|
||||
* rt_waiter without holding hb->lock, it is possible for
|
||||
* wake_futex_pi() to not find a waiter while the above does,
|
||||
* in this case the waiter is on the way out and it can be
|
||||
* ignored.
|
||||
*
|
||||
* In particular; this forces __rt_mutex_start_proxy() to
|
||||
* complete such that we're guaranteed to observe the
|
||||
* rt_waiter. Also see the WARN in wake_futex_pi().
|
||||
* rt_waiter.
|
||||
*/
|
||||
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
|
||||
|
||||
/*
|
||||
* Futex vs rt_mutex waiter state -- if there are no rt_mutex
|
||||
* waiters even though futex thinks there are, then the waiter
|
||||
* is leaving and the uncontended path is safe to take.
|
||||
*/
|
||||
rt_waiter = rt_mutex_top_waiter(&pi_state->pi_mutex);
|
||||
if (!rt_waiter) {
|
||||
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
|
||||
goto do_uncontended;
|
||||
}
|
||||
|
||||
get_pi_state(pi_state);
|
||||
spin_unlock(&hb->lock);
|
||||
|
||||
/* drops pi_state->pi_mutex.wait_lock */
|
||||
ret = wake_futex_pi(uaddr, uval, pi_state);
|
||||
ret = wake_futex_pi(uaddr, uval, pi_state, rt_waiter);
|
||||
|
||||
put_pi_state(pi_state);
|
||||
|
||||
@ -1187,6 +1217,7 @@ retry:
|
||||
return ret;
|
||||
}
|
||||
|
||||
do_uncontended:
|
||||
/*
|
||||
* We have no kernel internal state, i.e. no waiters in the
|
||||
* kernel. Waiters which are about to queue themselves are stuck
|
||||
|
@ -346,8 +346,9 @@ futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
|
||||
/**
|
||||
* futex_requeue() - Requeue waiters from uaddr1 to uaddr2
|
||||
* @uaddr1: source futex user address
|
||||
* @flags: futex flags (FLAGS_SHARED, etc.)
|
||||
* @flags1: futex flags (FLAGS_SHARED, etc.)
|
||||
* @uaddr2: target futex user address
|
||||
* @flags2: futex flags (FLAGS_SHARED, etc.)
|
||||
* @nr_wake: number of waiters to wake (must be 1 for requeue_pi)
|
||||
* @nr_requeue: number of waiters to requeue (0-INT_MAX)
|
||||
* @cmpval: @uaddr1 expected value (or %NULL)
|
||||
@ -361,7 +362,8 @@ futex_proxy_trylock_atomic(u32 __user *pifutex, struct futex_hash_bucket *hb1,
|
||||
* - >=0 - on success, the number of tasks requeued or woken;
|
||||
* - <0 - on error
|
||||
*/
|
||||
int futex_requeue(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
|
||||
int futex_requeue(u32 __user *uaddr1, unsigned int flags1,
|
||||
u32 __user *uaddr2, unsigned int flags2,
|
||||
int nr_wake, int nr_requeue, u32 *cmpval, int requeue_pi)
|
||||
{
|
||||
union futex_key key1 = FUTEX_KEY_INIT, key2 = FUTEX_KEY_INIT;
|
||||
@ -424,10 +426,10 @@ int futex_requeue(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
|
||||
}
|
||||
|
||||
retry:
|
||||
ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
|
||||
ret = get_futex_key(uaddr1, flags1, &key1, FUTEX_READ);
|
||||
if (unlikely(ret != 0))
|
||||
return ret;
|
||||
ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2,
|
||||
ret = get_futex_key(uaddr2, flags2, &key2,
|
||||
requeue_pi ? FUTEX_WRITE : FUTEX_READ);
|
||||
if (unlikely(ret != 0))
|
||||
return ret;
|
||||
@ -459,7 +461,7 @@ retry_private:
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!(flags & FLAGS_SHARED))
|
||||
if (!(flags1 & FLAGS_SHARED))
|
||||
goto retry_private;
|
||||
|
||||
goto retry;
|
||||
@ -789,7 +791,7 @@ int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
|
||||
*/
|
||||
rt_mutex_init_waiter(&rt_waiter);
|
||||
|
||||
ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
|
||||
ret = get_futex_key(uaddr2, flags, &key2, FUTEX_WRITE);
|
||||
if (unlikely(ret != 0))
|
||||
goto out;
|
||||
|
||||
@ -850,11 +852,13 @@ int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
|
||||
pi_mutex = &q.pi_state->pi_mutex;
|
||||
ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter);
|
||||
|
||||
/* Current is not longer pi_blocked_on */
|
||||
spin_lock(q.lock_ptr);
|
||||
/*
|
||||
* See futex_unlock_pi()'s cleanup: comment.
|
||||
*/
|
||||
if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter))
|
||||
ret = 0;
|
||||
|
||||
spin_lock(q.lock_ptr);
|
||||
debug_rt_mutex_free_waiter(&rt_waiter);
|
||||
/*
|
||||
* Fixup the pi_state owner and possibly acquire the lock if we
|
||||
|
@ -1,6 +1,5 @@
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <linux/compat.h>
|
||||
#include <linux/syscalls.h>
|
||||
#include <linux/time_namespace.h>
|
||||
|
||||
@ -85,15 +84,12 @@ err_unlock:
|
||||
long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
|
||||
u32 __user *uaddr2, u32 val2, u32 val3)
|
||||
{
|
||||
unsigned int flags = futex_to_flags(op);
|
||||
int cmd = op & FUTEX_CMD_MASK;
|
||||
unsigned int flags = 0;
|
||||
|
||||
if (!(op & FUTEX_PRIVATE_FLAG))
|
||||
flags |= FLAGS_SHARED;
|
||||
|
||||
if (op & FUTEX_CLOCK_REALTIME) {
|
||||
flags |= FLAGS_CLOCKRT;
|
||||
if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI &&
|
||||
if (flags & FLAGS_CLOCKRT) {
|
||||
if (cmd != FUTEX_WAIT_BITSET &&
|
||||
cmd != FUTEX_WAIT_REQUEUE_PI &&
|
||||
cmd != FUTEX_LOCK_PI2)
|
||||
return -ENOSYS;
|
||||
}
|
||||
@ -110,9 +106,9 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
|
||||
case FUTEX_WAKE_BITSET:
|
||||
return futex_wake(uaddr, flags, val, val3);
|
||||
case FUTEX_REQUEUE:
|
||||
return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
|
||||
return futex_requeue(uaddr, flags, uaddr2, flags, val, val2, NULL, 0);
|
||||
case FUTEX_CMP_REQUEUE:
|
||||
return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
|
||||
return futex_requeue(uaddr, flags, uaddr2, flags, val, val2, &val3, 0);
|
||||
case FUTEX_WAKE_OP:
|
||||
return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
|
||||
case FUTEX_LOCK_PI:
|
||||
@ -129,7 +125,7 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
|
||||
return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
|
||||
uaddr2);
|
||||
case FUTEX_CMP_REQUEUE_PI:
|
||||
return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
|
||||
return futex_requeue(uaddr, flags, uaddr2, flags, val, val2, &val3, 1);
|
||||
}
|
||||
return -ENOSYS;
|
||||
}
|
||||
@ -183,8 +179,7 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
|
||||
return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
|
||||
}
|
||||
|
||||
/* Mask of available flags for each futex in futex_waitv list */
|
||||
#define FUTEXV_WAITER_MASK (FUTEX_32 | FUTEX_PRIVATE_FLAG)
|
||||
#define FUTEX2_VALID_MASK (FUTEX2_SIZE_MASK | FUTEX2_PRIVATE)
|
||||
|
||||
/**
|
||||
* futex_parse_waitv - Parse a waitv array from userspace
|
||||
@ -202,16 +197,22 @@ static int futex_parse_waitv(struct futex_vector *futexv,
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < nr_futexes; i++) {
|
||||
unsigned int flags;
|
||||
|
||||
if (copy_from_user(&aux, &uwaitv[i], sizeof(aux)))
|
||||
return -EFAULT;
|
||||
|
||||
if ((aux.flags & ~FUTEXV_WAITER_MASK) || aux.__reserved)
|
||||
if ((aux.flags & ~FUTEX2_VALID_MASK) || aux.__reserved)
|
||||
return -EINVAL;
|
||||
|
||||
if (!(aux.flags & FUTEX_32))
|
||||
flags = futex2_to_flags(aux.flags);
|
||||
if (!futex_flags_valid(flags))
|
||||
return -EINVAL;
|
||||
|
||||
futexv[i].w.flags = aux.flags;
|
||||
if (!futex_validate_input(flags, aux.val))
|
||||
return -EINVAL;
|
||||
|
||||
futexv[i].w.flags = flags;
|
||||
futexv[i].w.val = aux.val;
|
||||
futexv[i].w.uaddr = aux.uaddr;
|
||||
futexv[i].q = futex_q_init;
|
||||
@ -220,6 +221,46 @@ static int futex_parse_waitv(struct futex_vector *futexv,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int futex2_setup_timeout(struct __kernel_timespec __user *timeout,
|
||||
clockid_t clockid, struct hrtimer_sleeper *to)
|
||||
{
|
||||
int flag_clkid = 0, flag_init = 0;
|
||||
struct timespec64 ts;
|
||||
ktime_t time;
|
||||
int ret;
|
||||
|
||||
if (!timeout)
|
||||
return 0;
|
||||
|
||||
if (clockid == CLOCK_REALTIME) {
|
||||
flag_clkid = FLAGS_CLOCKRT;
|
||||
flag_init = FUTEX_CLOCK_REALTIME;
|
||||
}
|
||||
|
||||
if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC)
|
||||
return -EINVAL;
|
||||
|
||||
if (get_timespec64(&ts, timeout))
|
||||
return -EFAULT;
|
||||
|
||||
/*
|
||||
* Since there's no opcode for futex_waitv, use
|
||||
* FUTEX_WAIT_BITSET that uses absolute timeout as well
|
||||
*/
|
||||
ret = futex_init_timeout(FUTEX_WAIT_BITSET, flag_init, &ts, &time);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
futex_setup_timer(&time, to, flag_clkid, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void futex2_destroy_timeout(struct hrtimer_sleeper *to)
|
||||
{
|
||||
hrtimer_cancel(&to->timer);
|
||||
destroy_hrtimer_on_stack(&to->timer);
|
||||
}
|
||||
|
||||
/**
|
||||
* sys_futex_waitv - Wait on a list of futexes
|
||||
* @waiters: List of futexes to wait on
|
||||
@ -249,8 +290,6 @@ SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters,
|
||||
{
|
||||
struct hrtimer_sleeper to;
|
||||
struct futex_vector *futexv;
|
||||
struct timespec64 ts;
|
||||
ktime_t time;
|
||||
int ret;
|
||||
|
||||
/* This syscall supports no flags for now */
|
||||
@ -260,30 +299,8 @@ SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters,
|
||||
if (!nr_futexes || nr_futexes > FUTEX_WAITV_MAX || !waiters)
|
||||
return -EINVAL;
|
||||
|
||||
if (timeout) {
|
||||
int flag_clkid = 0, flag_init = 0;
|
||||
|
||||
if (clockid == CLOCK_REALTIME) {
|
||||
flag_clkid = FLAGS_CLOCKRT;
|
||||
flag_init = FUTEX_CLOCK_REALTIME;
|
||||
}
|
||||
|
||||
if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC)
|
||||
return -EINVAL;
|
||||
|
||||
if (get_timespec64(&ts, timeout))
|
||||
return -EFAULT;
|
||||
|
||||
/*
|
||||
* Since there's no opcode for futex_waitv, use
|
||||
* FUTEX_WAIT_BITSET that uses absolute timeout as well
|
||||
*/
|
||||
ret = futex_init_timeout(FUTEX_WAIT_BITSET, flag_init, &ts, &time);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
futex_setup_timer(&time, &to, flag_clkid, 0);
|
||||
}
|
||||
if (timeout && (ret = futex2_setup_timeout(timeout, clockid, &to)))
|
||||
return ret;
|
||||
|
||||
futexv = kcalloc(nr_futexes, sizeof(*futexv), GFP_KERNEL);
|
||||
if (!futexv) {
|
||||
@ -298,13 +315,125 @@ SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters,
|
||||
kfree(futexv);
|
||||
|
||||
destroy_timer:
|
||||
if (timeout) {
|
||||
hrtimer_cancel(&to.timer);
|
||||
destroy_hrtimer_on_stack(&to.timer);
|
||||
}
|
||||
if (timeout)
|
||||
futex2_destroy_timeout(&to);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* sys_futex_wake - Wake a number of futexes
|
||||
* @uaddr: Address of the futex(es) to wake
|
||||
* @mask: bitmask
|
||||
* @nr: Number of the futexes to wake
|
||||
* @flags: FUTEX2 flags
|
||||
*
|
||||
* Identical to the traditional FUTEX_WAKE_BITSET op, except it is part of the
|
||||
* futex2 family of calls.
|
||||
*/
|
||||
|
||||
SYSCALL_DEFINE4(futex_wake,
|
||||
void __user *, uaddr,
|
||||
unsigned long, mask,
|
||||
int, nr,
|
||||
unsigned int, flags)
|
||||
{
|
||||
if (flags & ~FUTEX2_VALID_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
flags = futex2_to_flags(flags);
|
||||
if (!futex_flags_valid(flags))
|
||||
return -EINVAL;
|
||||
|
||||
if (!futex_validate_input(flags, mask))
|
||||
return -EINVAL;
|
||||
|
||||
return futex_wake(uaddr, FLAGS_STRICT | flags, nr, mask);
|
||||
}
|
||||
|
||||
/*
|
||||
* sys_futex_wait - Wait on a futex
|
||||
* @uaddr: Address of the futex to wait on
|
||||
* @val: Value of @uaddr
|
||||
* @mask: bitmask
|
||||
* @flags: FUTEX2 flags
|
||||
* @timeout: Optional absolute timeout
|
||||
* @clockid: Clock to be used for the timeout, realtime or monotonic
|
||||
*
|
||||
* Identical to the traditional FUTEX_WAIT_BITSET op, except it is part of the
|
||||
* futex2 familiy of calls.
|
||||
*/
|
||||
|
||||
SYSCALL_DEFINE6(futex_wait,
|
||||
void __user *, uaddr,
|
||||
unsigned long, val,
|
||||
unsigned long, mask,
|
||||
unsigned int, flags,
|
||||
struct __kernel_timespec __user *, timeout,
|
||||
clockid_t, clockid)
|
||||
{
|
||||
struct hrtimer_sleeper to;
|
||||
int ret;
|
||||
|
||||
if (flags & ~FUTEX2_VALID_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
flags = futex2_to_flags(flags);
|
||||
if (!futex_flags_valid(flags))
|
||||
return -EINVAL;
|
||||
|
||||
if (!futex_validate_input(flags, val) ||
|
||||
!futex_validate_input(flags, mask))
|
||||
return -EINVAL;
|
||||
|
||||
if (timeout && (ret = futex2_setup_timeout(timeout, clockid, &to)))
|
||||
return ret;
|
||||
|
||||
ret = __futex_wait(uaddr, flags, val, timeout ? &to : NULL, mask);
|
||||
|
||||
if (timeout)
|
||||
futex2_destroy_timeout(&to);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* sys_futex_requeue - Requeue a waiter from one futex to another
|
||||
* @waiters: array describing the source and destination futex
|
||||
* @flags: unused
|
||||
* @nr_wake: number of futexes to wake
|
||||
* @nr_requeue: number of futexes to requeue
|
||||
*
|
||||
* Identical to the traditional FUTEX_CMP_REQUEUE op, except it is part of the
|
||||
* futex2 family of calls.
|
||||
*/
|
||||
|
||||
SYSCALL_DEFINE4(futex_requeue,
|
||||
struct futex_waitv __user *, waiters,
|
||||
unsigned int, flags,
|
||||
int, nr_wake,
|
||||
int, nr_requeue)
|
||||
{
|
||||
struct futex_vector futexes[2];
|
||||
u32 cmpval;
|
||||
int ret;
|
||||
|
||||
if (flags)
|
||||
return -EINVAL;
|
||||
|
||||
if (!waiters)
|
||||
return -EINVAL;
|
||||
|
||||
ret = futex_parse_waitv(futexes, waiters, 2);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
cmpval = futexes[0].w.val;
|
||||
|
||||
return futex_requeue(u64_to_user_ptr(futexes[0].w.uaddr), futexes[0].w.flags,
|
||||
u64_to_user_ptr(futexes[1].w.uaddr), futexes[1].w.flags,
|
||||
nr_wake, nr_requeue, &cmpval, 0);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
COMPAT_SYSCALL_DEFINE2(set_robust_list,
|
||||
struct compat_robust_list_head __user *, head,
|
||||
|
@ -145,16 +145,19 @@ int futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
|
||||
struct futex_hash_bucket *hb;
|
||||
struct futex_q *this, *next;
|
||||
union futex_key key = FUTEX_KEY_INIT;
|
||||
int ret;
|
||||
DEFINE_WAKE_Q(wake_q);
|
||||
int ret;
|
||||
|
||||
if (!bitset)
|
||||
return -EINVAL;
|
||||
|
||||
ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &key, FUTEX_READ);
|
||||
ret = get_futex_key(uaddr, flags, &key, FUTEX_READ);
|
||||
if (unlikely(ret != 0))
|
||||
return ret;
|
||||
|
||||
if ((flags & FLAGS_STRICT) && !nr_wake)
|
||||
return 0;
|
||||
|
||||
hb = futex_hash(&key);
|
||||
|
||||
/* Make sure we really have tasks to wakeup */
|
||||
@ -245,10 +248,10 @@ int futex_wake_op(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2,
|
||||
DEFINE_WAKE_Q(wake_q);
|
||||
|
||||
retry:
|
||||
ret = get_futex_key(uaddr1, flags & FLAGS_SHARED, &key1, FUTEX_READ);
|
||||
ret = get_futex_key(uaddr1, flags, &key1, FUTEX_READ);
|
||||
if (unlikely(ret != 0))
|
||||
return ret;
|
||||
ret = get_futex_key(uaddr2, flags & FLAGS_SHARED, &key2, FUTEX_WRITE);
|
||||
ret = get_futex_key(uaddr2, flags, &key2, FUTEX_WRITE);
|
||||
if (unlikely(ret != 0))
|
||||
return ret;
|
||||
|
||||
@ -419,11 +422,11 @@ static int futex_wait_multiple_setup(struct futex_vector *vs, int count, int *wo
|
||||
*/
|
||||
retry:
|
||||
for (i = 0; i < count; i++) {
|
||||
if ((vs[i].w.flags & FUTEX_PRIVATE_FLAG) && retry)
|
||||
if (!(vs[i].w.flags & FLAGS_SHARED) && retry)
|
||||
continue;
|
||||
|
||||
ret = get_futex_key(u64_to_user_ptr(vs[i].w.uaddr),
|
||||
!(vs[i].w.flags & FUTEX_PRIVATE_FLAG),
|
||||
vs[i].w.flags,
|
||||
&vs[i].q.key, FUTEX_READ);
|
||||
|
||||
if (unlikely(ret))
|
||||
@ -435,7 +438,7 @@ retry:
|
||||
for (i = 0; i < count; i++) {
|
||||
u32 __user *uaddr = (u32 __user *)(unsigned long)vs[i].w.uaddr;
|
||||
struct futex_q *q = &vs[i].q;
|
||||
u32 val = (u32)vs[i].w.val;
|
||||
u32 val = vs[i].w.val;
|
||||
|
||||
hb = futex_q_lock(q);
|
||||
ret = futex_get_value_locked(&uval, uaddr);
|
||||
@ -599,7 +602,7 @@ int futex_wait_setup(u32 __user *uaddr, u32 val, unsigned int flags,
|
||||
* while the syscall executes.
|
||||
*/
|
||||
retry:
|
||||
ret = get_futex_key(uaddr, flags & FLAGS_SHARED, &q->key, FUTEX_READ);
|
||||
ret = get_futex_key(uaddr, flags, &q->key, FUTEX_READ);
|
||||
if (unlikely(ret != 0))
|
||||
return ret;
|
||||
|
||||
@ -629,20 +632,18 @@ retry_private:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset)
|
||||
int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
|
||||
struct hrtimer_sleeper *to, u32 bitset)
|
||||
{
|
||||
struct hrtimer_sleeper timeout, *to;
|
||||
struct restart_block *restart;
|
||||
struct futex_hash_bucket *hb;
|
||||
struct futex_q q = futex_q_init;
|
||||
struct futex_hash_bucket *hb;
|
||||
int ret;
|
||||
|
||||
if (!bitset)
|
||||
return -EINVAL;
|
||||
|
||||
q.bitset = bitset;
|
||||
|
||||
to = futex_setup_timer(abs_time, &timeout, flags,
|
||||
current->timer_slack_ns);
|
||||
retry:
|
||||
/*
|
||||
* Prepare to wait on uaddr. On success, it holds hb->lock and q
|
||||
@ -650,18 +651,17 @@ retry:
|
||||
*/
|
||||
ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
|
||||
if (ret)
|
||||
goto out;
|
||||
return ret;
|
||||
|
||||
/* futex_queue and wait for wakeup, timeout, or a signal. */
|
||||
futex_wait_queue(hb, &q, to);
|
||||
|
||||
/* If we were woken (and unqueued), we succeeded, whatever. */
|
||||
ret = 0;
|
||||
if (!futex_unqueue(&q))
|
||||
goto out;
|
||||
ret = -ETIMEDOUT;
|
||||
return 0;
|
||||
|
||||
if (to && !to->task)
|
||||
goto out;
|
||||
return -ETIMEDOUT;
|
||||
|
||||
/*
|
||||
* We expect signal_pending(current), but we might be the
|
||||
@ -670,24 +670,38 @@ retry:
|
||||
if (!signal_pending(current))
|
||||
goto retry;
|
||||
|
||||
ret = -ERESTARTSYS;
|
||||
if (!abs_time)
|
||||
goto out;
|
||||
return -ERESTARTSYS;
|
||||
}
|
||||
|
||||
restart = ¤t->restart_block;
|
||||
restart->futex.uaddr = uaddr;
|
||||
restart->futex.val = val;
|
||||
restart->futex.time = *abs_time;
|
||||
restart->futex.bitset = bitset;
|
||||
restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
|
||||
int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset)
|
||||
{
|
||||
struct hrtimer_sleeper timeout, *to;
|
||||
struct restart_block *restart;
|
||||
int ret;
|
||||
|
||||
ret = set_restart_fn(restart, futex_wait_restart);
|
||||
to = futex_setup_timer(abs_time, &timeout, flags,
|
||||
current->timer_slack_ns);
|
||||
|
||||
out:
|
||||
if (to) {
|
||||
hrtimer_cancel(&to->timer);
|
||||
destroy_hrtimer_on_stack(&to->timer);
|
||||
ret = __futex_wait(uaddr, flags, val, to, bitset);
|
||||
|
||||
/* No timeout, nothing to clean up. */
|
||||
if (!to)
|
||||
return ret;
|
||||
|
||||
hrtimer_cancel(&to->timer);
|
||||
destroy_hrtimer_on_stack(&to->timer);
|
||||
|
||||
if (ret == -ERESTARTSYS) {
|
||||
restart = ¤t->restart_block;
|
||||
restart->futex.uaddr = uaddr;
|
||||
restart->futex.val = val;
|
||||
restart->futex.time = *abs_time;
|
||||
restart->futex.bitset = bitset;
|
||||
restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
|
||||
|
||||
return set_restart_fn(restart, futex_wait_restart);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -218,6 +218,11 @@ static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock,
|
||||
return try_cmpxchg_acquire(&lock->owner, &old, new);
|
||||
}
|
||||
|
||||
static __always_inline bool rt_mutex_try_acquire(struct rt_mutex_base *lock)
|
||||
{
|
||||
return rt_mutex_cmpxchg_acquire(lock, NULL, current);
|
||||
}
|
||||
|
||||
static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,
|
||||
struct task_struct *old,
|
||||
struct task_struct *new)
|
||||
@ -297,6 +302,20 @@ static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock,
|
||||
|
||||
}
|
||||
|
||||
static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock);
|
||||
|
||||
static __always_inline bool rt_mutex_try_acquire(struct rt_mutex_base *lock)
|
||||
{
|
||||
/*
|
||||
* With debug enabled rt_mutex_cmpxchg trylock() will always fail.
|
||||
*
|
||||
* Avoid unconditionally taking the slow path by using
|
||||
* rt_mutex_slow_trylock() which is covered by the debug code and can
|
||||
* acquire a non-contended rtmutex.
|
||||
*/
|
||||
return rt_mutex_slowtrylock(lock);
|
||||
}
|
||||
|
||||
static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock,
|
||||
struct task_struct *old,
|
||||
struct task_struct *new)
|
||||
@ -1613,7 +1632,7 @@ static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock,
|
||||
raw_spin_unlock_irq(&lock->wait_lock);
|
||||
|
||||
if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner))
|
||||
schedule();
|
||||
rt_mutex_schedule();
|
||||
|
||||
raw_spin_lock_irq(&lock->wait_lock);
|
||||
set_current_state(state);
|
||||
@ -1642,7 +1661,7 @@ static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock,
|
||||
WARN(1, "rtmutex deadlock detected\n");
|
||||
while (1) {
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
schedule();
|
||||
rt_mutex_schedule();
|
||||
}
|
||||
}
|
||||
|
||||
@ -1737,6 +1756,15 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Do all pre-schedule work here, before we queue a waiter and invoke
|
||||
* PI -- any such work that trips on rtlock (PREEMPT_RT spinlock) would
|
||||
* otherwise recurse back into task_blocks_on_rt_mutex() through
|
||||
* rtlock_slowlock() and will then enqueue a second waiter for this
|
||||
* same task and things get really confusing real fast.
|
||||
*/
|
||||
rt_mutex_pre_schedule();
|
||||
|
||||
/*
|
||||
* Technically we could use raw_spin_[un]lock_irq() here, but this can
|
||||
* be called in early boot if the cmpxchg() fast path is disabled
|
||||
@ -1748,6 +1776,7 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
|
||||
raw_spin_lock_irqsave(&lock->wait_lock, flags);
|
||||
ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state);
|
||||
raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
|
||||
rt_mutex_post_schedule();
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -1755,7 +1784,9 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock,
|
||||
static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock,
|
||||
unsigned int state)
|
||||
{
|
||||
if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current)))
|
||||
lockdep_assert(!current->pi_blocked_on);
|
||||
|
||||
if (likely(rt_mutex_try_acquire(lock)))
|
||||
return 0;
|
||||
|
||||
return rt_mutex_slowlock(lock, NULL, state);
|
||||
|
@ -71,6 +71,7 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb,
|
||||
struct rt_mutex_base *rtm = &rwb->rtmutex;
|
||||
int ret;
|
||||
|
||||
rwbase_pre_schedule();
|
||||
raw_spin_lock_irq(&rtm->wait_lock);
|
||||
|
||||
/*
|
||||
@ -125,12 +126,15 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb,
|
||||
rwbase_rtmutex_unlock(rtm);
|
||||
|
||||
trace_contention_end(rwb, ret);
|
||||
rwbase_post_schedule();
|
||||
return ret;
|
||||
}
|
||||
|
||||
static __always_inline int rwbase_read_lock(struct rwbase_rt *rwb,
|
||||
unsigned int state)
|
||||
{
|
||||
lockdep_assert(!current->pi_blocked_on);
|
||||
|
||||
if (rwbase_read_trylock(rwb))
|
||||
return 0;
|
||||
|
||||
@ -237,6 +241,8 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb,
|
||||
/* Force readers into slow path */
|
||||
atomic_sub(READER_BIAS, &rwb->readers);
|
||||
|
||||
rwbase_pre_schedule();
|
||||
|
||||
raw_spin_lock_irqsave(&rtm->wait_lock, flags);
|
||||
if (__rwbase_write_trylock(rwb))
|
||||
goto out_unlock;
|
||||
@ -248,6 +254,7 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb,
|
||||
if (rwbase_signal_pending_state(state, current)) {
|
||||
rwbase_restore_current_state();
|
||||
__rwbase_write_unlock(rwb, 0, flags);
|
||||
rwbase_post_schedule();
|
||||
trace_contention_end(rwb, -EINTR);
|
||||
return -EINTR;
|
||||
}
|
||||
@ -266,6 +273,7 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb,
|
||||
|
||||
out_unlock:
|
||||
raw_spin_unlock_irqrestore(&rtm->wait_lock, flags);
|
||||
rwbase_post_schedule();
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1427,8 +1427,14 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
|
||||
#define rwbase_signal_pending_state(state, current) \
|
||||
signal_pending_state(state, current)
|
||||
|
||||
#define rwbase_pre_schedule() \
|
||||
rt_mutex_pre_schedule()
|
||||
|
||||
#define rwbase_schedule() \
|
||||
schedule()
|
||||
rt_mutex_schedule()
|
||||
|
||||
#define rwbase_post_schedule() \
|
||||
rt_mutex_post_schedule()
|
||||
|
||||
#include "rwbase_rt.c"
|
||||
|
||||
|
@ -37,6 +37,8 @@
|
||||
|
||||
static __always_inline void rtlock_lock(struct rt_mutex_base *rtm)
|
||||
{
|
||||
lockdep_assert(!current->pi_blocked_on);
|
||||
|
||||
if (unlikely(!rt_mutex_cmpxchg_acquire(rtm, NULL, current)))
|
||||
rtlock_slowlock(rtm);
|
||||
}
|
||||
@ -184,9 +186,13 @@ static __always_inline int rwbase_rtmutex_trylock(struct rt_mutex_base *rtm)
|
||||
|
||||
#define rwbase_signal_pending_state(state, current) (0)
|
||||
|
||||
#define rwbase_pre_schedule()
|
||||
|
||||
#define rwbase_schedule() \
|
||||
schedule_rtlock()
|
||||
|
||||
#define rwbase_post_schedule()
|
||||
|
||||
#include "rwbase_rt.c"
|
||||
/*
|
||||
* The common functions which get wrapped into the rwlock API.
|
||||
|
@ -9,7 +9,7 @@
|
||||
#include <linux/delay.h>
|
||||
#include <linux/kthread.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/prandom.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/ww_mutex.h>
|
||||
|
||||
@ -386,6 +386,19 @@ struct stress {
|
||||
int nlocks;
|
||||
};
|
||||
|
||||
struct rnd_state rng;
|
||||
DEFINE_SPINLOCK(rng_lock);
|
||||
|
||||
static inline u32 prandom_u32_below(u32 ceil)
|
||||
{
|
||||
u32 ret;
|
||||
|
||||
spin_lock(&rng_lock);
|
||||
ret = prandom_u32_state(&rng) % ceil;
|
||||
spin_unlock(&rng_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int *get_random_order(int count)
|
||||
{
|
||||
int *order;
|
||||
@ -399,7 +412,7 @@ static int *get_random_order(int count)
|
||||
order[n] = n;
|
||||
|
||||
for (n = count - 1; n > 1; n--) {
|
||||
r = get_random_u32_below(n + 1);
|
||||
r = prandom_u32_below(n + 1);
|
||||
if (r != n) {
|
||||
tmp = order[n];
|
||||
order[n] = order[r];
|
||||
@ -452,21 +465,21 @@ retry:
|
||||
ww_mutex_unlock(&locks[order[n]]);
|
||||
|
||||
if (err == -EDEADLK) {
|
||||
ww_mutex_lock_slow(&locks[order[contended]], &ctx);
|
||||
goto retry;
|
||||
if (!time_after(jiffies, stress->timeout)) {
|
||||
ww_mutex_lock_slow(&locks[order[contended]], &ctx);
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
|
||||
ww_acquire_fini(&ctx);
|
||||
if (err) {
|
||||
pr_err_once("stress (%s) failed with %d\n",
|
||||
__func__, err);
|
||||
break;
|
||||
}
|
||||
|
||||
ww_acquire_fini(&ctx);
|
||||
} while (!time_after(jiffies, stress->timeout));
|
||||
|
||||
kfree(order);
|
||||
kfree(stress);
|
||||
}
|
||||
|
||||
struct reorder_lock {
|
||||
@ -531,7 +544,6 @@ out:
|
||||
list_for_each_entry_safe(ll, ln, &locks, link)
|
||||
kfree(ll);
|
||||
kfree(order);
|
||||
kfree(stress);
|
||||
}
|
||||
|
||||
static void stress_one_work(struct work_struct *work)
|
||||
@ -552,8 +564,6 @@ static void stress_one_work(struct work_struct *work)
|
||||
break;
|
||||
}
|
||||
} while (!time_after(jiffies, stress->timeout));
|
||||
|
||||
kfree(stress);
|
||||
}
|
||||
|
||||
#define STRESS_INORDER BIT(0)
|
||||
@ -564,15 +574,24 @@ static void stress_one_work(struct work_struct *work)
|
||||
static int stress(int nlocks, int nthreads, unsigned int flags)
|
||||
{
|
||||
struct ww_mutex *locks;
|
||||
int n;
|
||||
struct stress *stress_array;
|
||||
int n, count;
|
||||
|
||||
locks = kmalloc_array(nlocks, sizeof(*locks), GFP_KERNEL);
|
||||
if (!locks)
|
||||
return -ENOMEM;
|
||||
|
||||
stress_array = kmalloc_array(nthreads, sizeof(*stress_array),
|
||||
GFP_KERNEL);
|
||||
if (!stress_array) {
|
||||
kfree(locks);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
for (n = 0; n < nlocks; n++)
|
||||
ww_mutex_init(&locks[n], &ww_class);
|
||||
|
||||
count = 0;
|
||||
for (n = 0; nthreads; n++) {
|
||||
struct stress *stress;
|
||||
void (*fn)(struct work_struct *work);
|
||||
@ -596,9 +615,7 @@ static int stress(int nlocks, int nthreads, unsigned int flags)
|
||||
if (!fn)
|
||||
continue;
|
||||
|
||||
stress = kmalloc(sizeof(*stress), GFP_KERNEL);
|
||||
if (!stress)
|
||||
break;
|
||||
stress = &stress_array[count++];
|
||||
|
||||
INIT_WORK(&stress->work, fn);
|
||||
stress->locks = locks;
|
||||
@ -613,6 +630,7 @@ static int stress(int nlocks, int nthreads, unsigned int flags)
|
||||
|
||||
for (n = 0; n < nlocks; n++)
|
||||
ww_mutex_destroy(&locks[n]);
|
||||
kfree(stress_array);
|
||||
kfree(locks);
|
||||
|
||||
return 0;
|
||||
@ -625,6 +643,8 @@ static int __init test_ww_mutex_init(void)
|
||||
|
||||
printk(KERN_INFO "Beginning ww mutex selftests\n");
|
||||
|
||||
prandom_seed_state(&rng, get_random_u64());
|
||||
|
||||
wq = alloc_workqueue("test-ww_mutex", WQ_UNBOUND, 0);
|
||||
if (!wq)
|
||||
return -ENOMEM;
|
||||
|
@ -62,7 +62,7 @@ __ww_rt_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx,
|
||||
}
|
||||
mutex_acquire_nest(&rtm->dep_map, 0, 0, nest_lock, ip);
|
||||
|
||||
if (likely(rt_mutex_cmpxchg_acquire(&rtm->rtmutex, NULL, current))) {
|
||||
if (likely(rt_mutex_try_acquire(&rtm->rtmutex))) {
|
||||
if (ww_ctx)
|
||||
ww_mutex_set_context_fastpath(lock, ww_ctx);
|
||||
return 0;
|
||||
|
@ -6720,10 +6720,14 @@ void __noreturn do_task_dead(void)
|
||||
|
||||
static inline void sched_submit_work(struct task_struct *tsk)
|
||||
{
|
||||
static DEFINE_WAIT_OVERRIDE_MAP(sched_map, LD_WAIT_CONFIG);
|
||||
unsigned int task_flags;
|
||||
|
||||
if (task_is_running(tsk))
|
||||
return;
|
||||
/*
|
||||
* Establish LD_WAIT_CONFIG context to ensure none of the code called
|
||||
* will use a blocking primitive -- which would lead to recursion.
|
||||
*/
|
||||
lock_map_acquire_try(&sched_map);
|
||||
|
||||
task_flags = tsk->flags;
|
||||
/*
|
||||
@ -6749,6 +6753,8 @@ static inline void sched_submit_work(struct task_struct *tsk)
|
||||
* make sure to submit it to avoid deadlocks.
|
||||
*/
|
||||
blk_flush_plug(tsk->plug, true);
|
||||
|
||||
lock_map_release(&sched_map);
|
||||
}
|
||||
|
||||
static void sched_update_worker(struct task_struct *tsk)
|
||||
@ -6761,16 +6767,26 @@ static void sched_update_worker(struct task_struct *tsk)
|
||||
}
|
||||
}
|
||||
|
||||
static __always_inline void __schedule_loop(unsigned int sched_mode)
|
||||
{
|
||||
do {
|
||||
preempt_disable();
|
||||
__schedule(sched_mode);
|
||||
sched_preempt_enable_no_resched();
|
||||
} while (need_resched());
|
||||
}
|
||||
|
||||
asmlinkage __visible void __sched schedule(void)
|
||||
{
|
||||
struct task_struct *tsk = current;
|
||||
|
||||
sched_submit_work(tsk);
|
||||
do {
|
||||
preempt_disable();
|
||||
__schedule(SM_NONE);
|
||||
sched_preempt_enable_no_resched();
|
||||
} while (need_resched());
|
||||
#ifdef CONFIG_RT_MUTEXES
|
||||
lockdep_assert(!tsk->sched_rt_mutex);
|
||||
#endif
|
||||
|
||||
if (!task_is_running(tsk))
|
||||
sched_submit_work(tsk);
|
||||
__schedule_loop(SM_NONE);
|
||||
sched_update_worker(tsk);
|
||||
}
|
||||
EXPORT_SYMBOL(schedule);
|
||||
@ -6834,11 +6850,7 @@ void __sched schedule_preempt_disabled(void)
|
||||
#ifdef CONFIG_PREEMPT_RT
|
||||
void __sched notrace schedule_rtlock(void)
|
||||
{
|
||||
do {
|
||||
preempt_disable();
|
||||
__schedule(SM_RTLOCK_WAIT);
|
||||
sched_preempt_enable_no_resched();
|
||||
} while (need_resched());
|
||||
__schedule_loop(SM_RTLOCK_WAIT);
|
||||
}
|
||||
NOKPROBE_SYMBOL(schedule_rtlock);
|
||||
#endif
|
||||
@ -7034,6 +7046,32 @@ static void __setscheduler_prio(struct task_struct *p, int prio)
|
||||
|
||||
#ifdef CONFIG_RT_MUTEXES
|
||||
|
||||
/*
|
||||
* Would be more useful with typeof()/auto_type but they don't mix with
|
||||
* bit-fields. Since it's a local thing, use int. Keep the generic sounding
|
||||
* name such that if someone were to implement this function we get to compare
|
||||
* notes.
|
||||
*/
|
||||
#define fetch_and_set(x, v) ({ int _x = (x); (x) = (v); _x; })
|
||||
|
||||
void rt_mutex_pre_schedule(void)
|
||||
{
|
||||
lockdep_assert(!fetch_and_set(current->sched_rt_mutex, 1));
|
||||
sched_submit_work(current);
|
||||
}
|
||||
|
||||
void rt_mutex_schedule(void)
|
||||
{
|
||||
lockdep_assert(current->sched_rt_mutex);
|
||||
__schedule_loop(SM_NONE);
|
||||
}
|
||||
|
||||
void rt_mutex_post_schedule(void)
|
||||
{
|
||||
sched_update_worker(current);
|
||||
lockdep_assert(fetch_and_set(current->sched_rt_mutex, 0));
|
||||
}
|
||||
|
||||
static inline int __rt_effective_prio(struct task_struct *pi_task, int prio)
|
||||
{
|
||||
if (pi_task)
|
||||
|
@ -87,6 +87,9 @@ COND_SYSCALL_COMPAT(set_robust_list);
|
||||
COND_SYSCALL(get_robust_list);
|
||||
COND_SYSCALL_COMPAT(get_robust_list);
|
||||
COND_SYSCALL(futex_waitv);
|
||||
COND_SYSCALL(futex_wake);
|
||||
COND_SYSCALL(futex_wait);
|
||||
COND_SYSCALL(futex_requeue);
|
||||
COND_SYSCALL(kexec_load);
|
||||
COND_SYSCALL_COMPAT(kexec_load);
|
||||
COND_SYSCALL(init_module);
|
||||
|
Loading…
Reference in New Issue
Block a user