Use C11-like atomics instead of plain memory accesses in x86 lock elision.

This uses atomic operations to access lock elision metadata that is accessed
concurrently (ie, adapt_count fields).  The size of the data is less than a
word but accessed only with atomic loads and stores; therefore, we add
support for shorter-size atomic load and stores too.

	* include/atomic.h (__atomic_check_size_ls): New.
	(atomic_load_relaxed, atomic_load_acquire, atomic_store_relaxed,
	atomic_store_release): Use it.
	* sysdeps/x86/elide.h (ACCESS_ONCE): Remove.
	(elision_adapt, ELIDE_LOCK): Use atomics.
	* sysdeps/unix/sysv/linux/x86/elision-lock.c (__lll_lock_elision): Use
	atomics and improve code comments.
	* sysdeps/unix/sysv/linux/x86/elision-trylock.c
	(__lll_trylock_elision): Likewise.
This commit is contained in:
Torvald Riegel 2016-11-30 17:53:11 +01:00
parent 71be79a25f
commit ca6e601a9d
5 changed files with 82 additions and 30 deletions

View File

@ -1,3 +1,15 @@
2016-12-05 Torvald Riegel <triegel@redhat.com>
* include/atomic.h (__atomic_check_size_ls): New.
(atomic_load_relaxed, atomic_load_acquire, atomic_store_relaxed,
atomic_store_release): Use it.
* sysdeps/x86/elide.h (ACCESS_ONCE): Remove.
(elision_adapt, ELIDE_LOCK): Use atomics.
* sysdeps/unix/sysv/linux/x86/elision-lock.c (__lll_lock_elision): Use
atomics and improve code comments.
* sysdeps/unix/sysv/linux/x86/elision-trylock.c
(__lll_trylock_elision): Likewise.
2016-12-04 Samuel Thibault <samuel.thibault@ens-lyon.org>
* hurd/hurd.h: Cast errno constants to error_t to fix usage in C++

View File

@ -550,6 +550,20 @@ void __atomic_link_error (void);
if (sizeof (*mem) != 4) \
__atomic_link_error ();
# endif
/* We additionally provide 8b and 16b atomic loads and stores; we do not yet
need other atomic operations of such sizes, and restricting the support to
loads and stores makes this easier for archs that do not have native
support for atomic operations to less-than-word-sized data. */
# if __HAVE_64B_ATOMICS == 1
# define __atomic_check_size_ls(mem) \
if ((sizeof (*mem) != 1) && (sizeof (*mem) != 2) && (sizeof (*mem) != 4) \
&& (sizeof (*mem) != 8)) \
__atomic_link_error ();
# else
# define __atomic_check_size_ls(mem) \
if ((sizeof (*mem) != 1) && (sizeof (*mem) != 2) && sizeof (*mem) != 4) \
__atomic_link_error ();
# endif
# define atomic_thread_fence_acquire() \
__atomic_thread_fence (__ATOMIC_ACQUIRE)
@ -559,18 +573,20 @@ void __atomic_link_error (void);
__atomic_thread_fence (__ATOMIC_SEQ_CST)
# define atomic_load_relaxed(mem) \
({ __atomic_check_size((mem)); __atomic_load_n ((mem), __ATOMIC_RELAXED); })
({ __atomic_check_size_ls((mem)); \
__atomic_load_n ((mem), __ATOMIC_RELAXED); })
# define atomic_load_acquire(mem) \
({ __atomic_check_size((mem)); __atomic_load_n ((mem), __ATOMIC_ACQUIRE); })
({ __atomic_check_size_ls((mem)); \
__atomic_load_n ((mem), __ATOMIC_ACQUIRE); })
# define atomic_store_relaxed(mem, val) \
do { \
__atomic_check_size((mem)); \
__atomic_check_size_ls((mem)); \
__atomic_store_n ((mem), (val), __ATOMIC_RELAXED); \
} while (0)
# define atomic_store_release(mem, val) \
do { \
__atomic_check_size((mem)); \
__atomic_check_size_ls((mem)); \
__atomic_store_n ((mem), (val), __ATOMIC_RELEASE); \
} while (0)

View File

@ -44,7 +44,13 @@
int
__lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
{
if (*adapt_count <= 0)
/* adapt_count can be accessed concurrently; these accesses can be both
inside of transactions (if critical sections are nested and the outer
critical section uses lock elision) and outside of transactions. Thus,
we need to use atomic accesses to avoid data races. However, the
value of adapt_count is just a hint, so relaxed MO accesses are
sufficient. */
if (atomic_load_relaxed (adapt_count) <= 0)
{
unsigned status;
int try_xbegin;
@ -70,15 +76,20 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
&& _XABORT_CODE (status) == _ABORT_LOCK_BUSY)
{
/* Right now we skip here. Better would be to wait a bit
and retry. This likely needs some spinning. */
if (*adapt_count != aconf.skip_lock_busy)
*adapt_count = aconf.skip_lock_busy;
and retry. This likely needs some spinning. See
above for why relaxed MO is sufficient. */
if (atomic_load_relaxed (adapt_count)
!= aconf.skip_lock_busy)
atomic_store_relaxed (adapt_count, aconf.skip_lock_busy);
}
/* Internal abort. There is no chance for retry.
Use the normal locking and next time use lock.
Be careful to avoid writing to the lock. */
else if (*adapt_count != aconf.skip_lock_internal_abort)
*adapt_count = aconf.skip_lock_internal_abort;
Be careful to avoid writing to the lock. See above for why
relaxed MO is sufficient. */
else if (atomic_load_relaxed (adapt_count)
!= aconf.skip_lock_internal_abort)
atomic_store_relaxed (adapt_count,
aconf.skip_lock_internal_abort);
break;
}
}
@ -87,7 +98,8 @@ __lll_lock_elision (int *futex, short *adapt_count, EXTRAARG int private)
{
/* Use a normal lock until the threshold counter runs out.
Lost updates possible. */
(*adapt_count)--;
atomic_store_relaxed (adapt_count,
atomic_load_relaxed (adapt_count) - 1);
}
/* Use a normal lock here. */

View File

@ -36,8 +36,10 @@ __lll_trylock_elision (int *futex, short *adapt_count)
return an error. */
_xabort (_ABORT_NESTED_TRYLOCK);
/* Only try a transaction if it's worth it. */
if (*adapt_count <= 0)
/* Only try a transaction if it's worth it. See __lll_lock_elision for
why we need atomic accesses. Relaxed MO is sufficient because this is
just a hint. */
if (atomic_load_relaxed (adapt_count) <= 0)
{
unsigned status;
@ -55,16 +57,18 @@ __lll_trylock_elision (int *futex, short *adapt_count)
if (!(status & _XABORT_RETRY))
{
/* Internal abort. No chance for retry. For future
locks don't try speculation for some time. */
if (*adapt_count != aconf.skip_trylock_internal_abort)
*adapt_count = aconf.skip_trylock_internal_abort;
locks don't try speculation for some time. See above for MO. */
if (atomic_load_relaxed (adapt_count)
!= aconf.skip_lock_internal_abort)
atomic_store_relaxed (adapt_count, aconf.skip_lock_internal_abort);
}
/* Could do some retries here. */
}
else
{
/* Lost updates are possible, but harmless. */
(*adapt_count)--;
/* Lost updates are possible but harmless (see above). */
atomic_store_relaxed (adapt_count,
atomic_load_relaxed (adapt_count) - 1);
}
return lll_trylock (*futex);

View File

@ -20,8 +20,8 @@
#include <hle.h>
#include <elision-conf.h>
#include <atomic.h>
#define ACCESS_ONCE(x) (* (volatile typeof(x) *) &(x))
/* Adapt elision with ADAPT_COUNT and STATUS and decide retries. */
@ -35,28 +35,35 @@ elision_adapt(signed char *adapt_count, unsigned int status)
{
/* Right now we skip here. Better would be to wait a bit
and retry. This likely needs some spinning. Be careful
to avoid writing the lock. */
if (*adapt_count != __elision_aconf.skip_lock_busy)
ACCESS_ONCE (*adapt_count) = __elision_aconf.skip_lock_busy;
to avoid writing the lock.
Using relaxed MO and separate atomic accesses is sufficient because
adapt_count is just a hint. */
if (atomic_load_relaxed (adapt_count) != __elision_aconf.skip_lock_busy)
atomic_store_relaxed (adapt_count, __elision_aconf.skip_lock_busy);
}
/* Internal abort. There is no chance for retry.
Use the normal locking and next time use lock.
Be careful to avoid writing to the lock. */
else if (*adapt_count != __elision_aconf.skip_lock_internal_abort)
ACCESS_ONCE (*adapt_count) = __elision_aconf.skip_lock_internal_abort;
Be careful to avoid writing to the lock. See above for MO. */
else if (atomic_load_relaxed (adapt_count)
!= __elision_aconf.skip_lock_internal_abort)
atomic_store_relaxed (adapt_count,
__elision_aconf.skip_lock_internal_abort);
return true;
}
/* is_lock_free must be executed inside the transaction */
/* Returns true if lock defined by IS_LOCK_FREE was elided.
ADAPT_COUNT is a pointer to per-lock state variable. */
ADAPT_COUNT is a per-lock state variable; it must be accessed atomically
to avoid data races but is just a hint, so using relaxed MO and separate
atomic loads and stores instead of atomic read-modify-write operations is
sufficient. */
#define ELIDE_LOCK(adapt_count, is_lock_free) \
({ \
int ret = 0; \
\
if ((adapt_count) <= 0) \
if (atomic_load_relaxed (&(adapt_count)) <= 0) \
{ \
for (int i = __elision_aconf.retry_try_xbegin; i > 0; i--) \
{ \
@ -75,12 +82,13 @@ elision_adapt(signed char *adapt_count, unsigned int status)
} \
} \
else \
(adapt_count)--; /* missing updates ok */ \
atomic_store_relaxed (&(adapt_count), \
atomic_load_relaxed (&(adapt_count)) - 1); \
ret; \
})
/* Returns true if lock defined by IS_LOCK_FREE was try-elided.
ADAPT_COUNT is a pointer to per-lock state variable. */
ADAPT_COUNT is a per-lock state variable. */
#define ELIDE_TRYLOCK(adapt_count, is_lock_free, write) ({ \
int ret = 0; \