mirror of
https://gcc.gnu.org/git/gcc.git
synced 2024-11-23 19:03:59 +08:00
libitm: Remove unused code.
In particular, unused code that's presenting portability problems. From-SVN: r181241
This commit is contained in:
parent
cb8010f922
commit
79b1edb6b5
@ -1,3 +1,26 @@
|
||||
2011-11-09 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* barrier.tpl, memcpy.cc, memset.cc, method-wbetl.cc: Remove file.
|
||||
* config/alpha/unaligned.h: Remove file.
|
||||
* config/generic/unaligned.h: Remove file.
|
||||
* config/x86/unaligned.h: Remove file.
|
||||
* config/generic/cachepage.h: Remove file.
|
||||
* config/posix/cachepage.cc: Remove file.
|
||||
* config/generic/cacheline.cc: Remove file.
|
||||
* config/x86/cacheline.cc: Remove file.
|
||||
* config/generic/cacheline.h (gtm_cacheline): Remove the
|
||||
store_mask, copy_mask, copy_mask_wb methods.
|
||||
* config/x86/cacheline.h: Likewise.
|
||||
* config/alpha/cacheline.h: Fall back to generic after setting size.
|
||||
* config/generic/tls.cc (gtm_mask_stack): Remove.
|
||||
* config/x86/x86_avx.cc (GTM_vpperm_shift): Remove.
|
||||
(GTM_vpalignr_table): Remove.
|
||||
* config/x86/x86_sse.cc (GTM_palignr_table): Remove.
|
||||
(GTM_pshift_table): Remove.
|
||||
* config/libitm_i.h: Don't include cachepage.h.
|
||||
* Makefile.am (libitm_la_SOURCES): Remove cacheline.cc, cachepage.cc
|
||||
* Makefile.in, testsuite/Makefile.in: Rebuild.
|
||||
|
||||
2011-11-09 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* config/x86/cacheline.h (gtm_cacheline::store_mask): Use .byte
|
||||
|
@ -41,7 +41,7 @@ libitm_la_LDFLAGS = $(libitm_version_info) $(libitm_version_script) \
|
||||
|
||||
libitm_la_SOURCES = \
|
||||
aatree.cc alloc.cc alloc_c.cc alloc_cpp.cc barrier.cc beginend.cc \
|
||||
clone.cc cacheline.cc cachepage.cc eh_cpp.cc local.cc \
|
||||
clone.cc eh_cpp.cc local.cc \
|
||||
query.cc retry.cc rwlock.cc useraction.cc util.cc \
|
||||
sjlj.S tls.cc method-serial.cc method-gl.cc
|
||||
|
||||
|
@ -48,6 +48,7 @@ DIST_COMMON = $(am__configure_deps) $(srcdir)/../config.guess \
|
||||
$(top_srcdir)/configure ChangeLog
|
||||
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
|
||||
am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
|
||||
$(top_srcdir)/../config/asmcfi.m4 \
|
||||
$(top_srcdir)/../config/depstand.m4 \
|
||||
$(top_srcdir)/../config/enable.m4 \
|
||||
$(top_srcdir)/../config/futex.m4 \
|
||||
@ -94,17 +95,17 @@ am__installdirs = "$(DESTDIR)$(toolexeclibdir)" "$(DESTDIR)$(infodir)" \
|
||||
LTLIBRARIES = $(toolexeclib_LTLIBRARIES)
|
||||
libitm_la_LIBADD =
|
||||
am__libitm_la_SOURCES_DIST = aatree.cc alloc.cc alloc_c.cc \
|
||||
alloc_cpp.cc barrier.cc beginend.cc clone.cc cacheline.cc \
|
||||
cachepage.cc eh_cpp.cc local.cc query.cc retry.cc rwlock.cc \
|
||||
useraction.cc util.cc sjlj.S tls.cc method-serial.cc \
|
||||
method-gl.cc x86_sse.cc x86_avx.cc futex.cc
|
||||
alloc_cpp.cc barrier.cc beginend.cc clone.cc eh_cpp.cc \
|
||||
local.cc query.cc retry.cc rwlock.cc useraction.cc util.cc \
|
||||
sjlj.S tls.cc method-serial.cc method-gl.cc x86_sse.cc \
|
||||
x86_avx.cc futex.cc
|
||||
@ARCH_X86_TRUE@am__objects_1 = x86_sse.lo x86_avx.lo
|
||||
@ARCH_FUTEX_TRUE@am__objects_2 = futex.lo
|
||||
am_libitm_la_OBJECTS = aatree.lo alloc.lo alloc_c.lo alloc_cpp.lo \
|
||||
barrier.lo beginend.lo clone.lo cacheline.lo cachepage.lo \
|
||||
eh_cpp.lo local.lo query.lo retry.lo rwlock.lo useraction.lo \
|
||||
util.lo sjlj.lo tls.lo method-serial.lo method-gl.lo \
|
||||
$(am__objects_1) $(am__objects_2)
|
||||
barrier.lo beginend.lo clone.lo eh_cpp.lo local.lo query.lo \
|
||||
retry.lo rwlock.lo useraction.lo util.lo sjlj.lo tls.lo \
|
||||
method-serial.lo method-gl.lo $(am__objects_1) \
|
||||
$(am__objects_2)
|
||||
libitm_la_OBJECTS = $(am_libitm_la_OBJECTS)
|
||||
DEFAULT_INCLUDES = -I.@am__isrc@
|
||||
depcomp = $(SHELL) $(top_srcdir)/../depcomp
|
||||
@ -234,8 +235,6 @@ ECHO_N = @ECHO_N@
|
||||
ECHO_T = @ECHO_T@
|
||||
EGREP = @EGREP@
|
||||
EXEEXT = @EXEEXT@
|
||||
FC = @FC@
|
||||
FCFLAGS = @FCFLAGS@
|
||||
FGREP = @FGREP@
|
||||
GREP = @GREP@
|
||||
INSTALL = @INSTALL@
|
||||
@ -286,7 +285,6 @@ abs_top_srcdir = @abs_top_srcdir@
|
||||
ac_ct_CC = @ac_ct_CC@
|
||||
ac_ct_CXX = @ac_ct_CXX@
|
||||
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
|
||||
ac_ct_FC = @ac_ct_FC@
|
||||
am__include = @am__include@
|
||||
am__leading_dot = @am__leading_dot@
|
||||
am__quote = @am__quote@
|
||||
@ -371,10 +369,9 @@ libitm_la_LDFLAGS = $(libitm_version_info) $(libitm_version_script) \
|
||||
-no-undefined
|
||||
|
||||
libitm_la_SOURCES = aatree.cc alloc.cc alloc_c.cc alloc_cpp.cc \
|
||||
barrier.cc beginend.cc clone.cc cacheline.cc cachepage.cc \
|
||||
eh_cpp.cc local.cc query.cc retry.cc rwlock.cc useraction.cc \
|
||||
util.cc sjlj.S tls.cc method-serial.cc method-gl.cc \
|
||||
$(am__append_1) $(am__append_2)
|
||||
barrier.cc beginend.cc clone.cc eh_cpp.cc local.cc query.cc \
|
||||
retry.cc rwlock.cc useraction.cc util.cc sjlj.S tls.cc \
|
||||
method-serial.cc method-gl.cc $(am__append_1) $(am__append_2)
|
||||
|
||||
# Automake Documentation:
|
||||
# If your package has Texinfo files in many directories, you can use the
|
||||
@ -500,8 +497,6 @@ distclean-compile:
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/alloc_cpp.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/barrier.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/beginend.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cacheline.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cachepage.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/clone.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/eh_cpp.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/futex.Plo@am__quote@
|
||||
|
@ -1,170 +0,0 @@
|
||||
/* -*- c++ -*- */
|
||||
/* Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc.
|
||||
Contributed by Richard Henderson <rth@redhat.com>.
|
||||
|
||||
This file is part of the GNU Transactional Memory Library (libitm).
|
||||
|
||||
Libitm is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "unaligned.h"
|
||||
|
||||
namespace {
|
||||
|
||||
using namespace GTM;
|
||||
|
||||
template<typename T>
|
||||
T do_read (const T *ptr, abi_dispatch::lock_type lock)
|
||||
{
|
||||
//
|
||||
// Find the cacheline that holds the current value of *PTR.
|
||||
//
|
||||
abi_dispatch *disp = abi_disp();
|
||||
uintptr_t iptr = reinterpret_cast<uintptr_t>(ptr);
|
||||
// Normalize PTR by chopping off the bottom bits so we can search
|
||||
// for PTR in the cacheline hash.
|
||||
uintptr_t iline = iptr & -CACHELINE_SIZE;
|
||||
// The position in the resulting cacheline where *PTR is actually stored.
|
||||
uintptr_t iofs = iptr & (CACHELINE_SIZE - 1);
|
||||
const gtm_cacheline *pline = reinterpret_cast<const gtm_cacheline *>(iline);
|
||||
// Search for the actual cacheline that holds the current value of *PTR.
|
||||
const gtm_cacheline *line = disp->read_lock(pline, lock);
|
||||
|
||||
// Point to the position in the cacheline where *PTR is stored.
|
||||
ptr = reinterpret_cast<const T *>(&line->b[iofs]);
|
||||
|
||||
// Straight up loads, because we're either aligned, or we don't care
|
||||
// about alignment.
|
||||
//
|
||||
// If we require alignment on type T, do a straight load if we're
|
||||
// aligned. Otherwise do a straight load IFF the load fits entirely
|
||||
// in this cacheline. That is, it won't span multiple cachelines.
|
||||
if (__builtin_expect (strict_alignment<T>::value
|
||||
? (iofs & (sizeof (T) - 1)) == 0
|
||||
: iofs + sizeof(T) <= CACHELINE_SIZE, 1))
|
||||
{
|
||||
do_normal_load:
|
||||
return *ptr;
|
||||
}
|
||||
// If alignment on T is necessary, but we're unaligned, yet we fit
|
||||
// entirely in this cacheline... do the unaligned load dance.
|
||||
else if (__builtin_expect (strict_alignment<T>::value
|
||||
&& iofs + sizeof(T) <= CACHELINE_SIZE, 1))
|
||||
{
|
||||
do_unaligned_load:
|
||||
return unaligned_load<T>(ptr);
|
||||
}
|
||||
// Otherwise, this load will span multiple cachelines.
|
||||
else
|
||||
{
|
||||
// Get the following cacheline for the rest of the data.
|
||||
const gtm_cacheline *line2 = disp->read_lock(pline + 1, lock);
|
||||
|
||||
// If the two cachelines are adjacent, just load it all in one
|
||||
// swoop.
|
||||
if (line2 == line + 1)
|
||||
{
|
||||
if (!strict_alignment<T>::value)
|
||||
goto do_normal_load;
|
||||
else
|
||||
goto do_unaligned_load;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Otherwise, ask the backend to load from two different
|
||||
// cachelines.
|
||||
return unaligned_load2<T>(line, line2, iofs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void do_write (T *ptr, T val, abi_dispatch::lock_type lock)
|
||||
{
|
||||
// Note: See comments for do_read() above for hints on this
|
||||
// function. Ideally we should abstract out a lot out of these two
|
||||
// functions, and avoid all this duplication.
|
||||
|
||||
abi_dispatch *disp = abi_disp();
|
||||
uintptr_t iptr = reinterpret_cast<uintptr_t>(ptr);
|
||||
uintptr_t iline = iptr & -CACHELINE_SIZE;
|
||||
uintptr_t iofs = iptr & (CACHELINE_SIZE - 1);
|
||||
gtm_cacheline *pline = reinterpret_cast<gtm_cacheline *>(iline);
|
||||
gtm_cacheline_mask m = ((gtm_cacheline_mask)2 << (sizeof(T) - 1)) - 1;
|
||||
abi_dispatch::mask_pair pair = disp->write_lock(pline, lock);
|
||||
|
||||
ptr = reinterpret_cast<T *>(&pair.line->b[iofs]);
|
||||
|
||||
if (__builtin_expect (strict_alignment<T>::value
|
||||
? (iofs & (sizeof (val) - 1)) == 0
|
||||
: iofs + sizeof(val) <= CACHELINE_SIZE, 1))
|
||||
{
|
||||
*pair.mask |= m << iofs;
|
||||
do_normal_store:
|
||||
*ptr = val;
|
||||
}
|
||||
else if (__builtin_expect (strict_alignment<T>::value
|
||||
&& iofs + sizeof(val) <= CACHELINE_SIZE, 1))
|
||||
{
|
||||
*pair.mask |= m << iofs;
|
||||
do_unaligned_store:
|
||||
unaligned_store<T>(ptr, val);
|
||||
}
|
||||
else
|
||||
{
|
||||
*pair.mask |= m << iofs;
|
||||
abi_dispatch::mask_pair pair2 = disp->write_lock(pline + 1, lock);
|
||||
|
||||
uintptr_t ileft = CACHELINE_SIZE - iofs;
|
||||
*pair2.mask |= m >> ileft;
|
||||
|
||||
if (pair2.line == pair.line + 1)
|
||||
{
|
||||
if (!strict_alignment<T>::value)
|
||||
goto do_normal_store;
|
||||
else
|
||||
goto do_unaligned_store;
|
||||
}
|
||||
else
|
||||
unaligned_store2<T>(pair.line, pair2.line, iofs, val);
|
||||
}
|
||||
}
|
||||
|
||||
} /* anonymous namespace */
|
||||
|
||||
#define ITM_READ(T, LOCK) \
|
||||
_ITM_TYPE_##T ITM_REGPARM _ITM_##LOCK##T (const _ITM_TYPE_##T *ptr) \
|
||||
{ \
|
||||
return do_read (ptr, abi_dispatch::LOCK); \
|
||||
}
|
||||
|
||||
#define ITM_WRITE(T, LOCK) \
|
||||
void ITM_REGPARM _ITM_##LOCK##T (_ITM_TYPE_##T *ptr, _ITM_TYPE_##T val) \
|
||||
{ \
|
||||
do_write (ptr, val, abi_dispatch::LOCK); \
|
||||
}
|
||||
|
||||
#define ITM_BARRIERS(T) \
|
||||
ITM_READ(T, R) \
|
||||
ITM_READ(T, RaR) \
|
||||
ITM_READ(T, RaW) \
|
||||
ITM_READ(T, RfW) \
|
||||
ITM_WRITE(T, W) \
|
||||
ITM_WRITE(T, WaR) \
|
||||
ITM_WRITE(T, WaW)
|
@ -33,90 +33,6 @@
|
||||
// modification mask, below.
|
||||
#define CACHELINE_SIZE 64
|
||||
|
||||
#ifdef __alpha_bwx__
|
||||
# include "config/generic/cacheline.h"
|
||||
#else
|
||||
// If we don't have byte-word stores, then we'll never be able to
|
||||
// adjust *all* of the byte loads/stores to be truely atomic. So
|
||||
// only guarantee 4-byte aligned values atomicly stored, exactly
|
||||
// like the native system. Use byte zap instructions to accelerate
|
||||
// sub-word masked stores.
|
||||
#include "config/generic/cacheline.h"
|
||||
|
||||
namespace GTM HIDDEN {
|
||||
|
||||
// A gtm_cacheline_mask stores a modified bit for every modified byte
|
||||
// in the cacheline with which it is associated.
|
||||
typedef sized_integral<CACHELINE_SIZE / 8>::type gtm_cacheline_mask;
|
||||
|
||||
union gtm_cacheline
|
||||
{
|
||||
// Byte access to the cacheline.
|
||||
unsigned char b[CACHELINE_SIZE] __attribute__((aligned(CACHELINE_SIZE)));
|
||||
|
||||
// Larger sized access to the cacheline.
|
||||
uint16_t u16[CACHELINE_SIZE / sizeof(uint16_t)];
|
||||
uint32_t u32[CACHELINE_SIZE / sizeof(uint32_t)];
|
||||
uint64_t u64[CACHELINE_SIZE / sizeof(uint64_t)];
|
||||
gtm_word w[CACHELINE_SIZE / sizeof(gtm_word)];
|
||||
|
||||
// Store S into D, but only the bytes specified by M.
|
||||
static void store_mask(uint32_t *d, uint32_t s, uint8_t m);
|
||||
static void store_mask(uint64_t *d, uint64_t s, uint8_t m);
|
||||
|
||||
// Copy S to D, but only the bytes specified by M.
|
||||
static void copy_mask (gtm_cacheline * __restrict d,
|
||||
const gtm_cacheline * __restrict s,
|
||||
gtm_cacheline_mask m);
|
||||
|
||||
// A write barrier to emit after (a series of) copy_mask.
|
||||
static void copy_mask_wb () { atomic_write_barrier(); }
|
||||
};
|
||||
|
||||
inline void ALWAYS_INLINE
|
||||
gtm_cacheline::store_mask (uint32_t *d, uint32_t s, uint8_t m)
|
||||
{
|
||||
const uint8_t tm = (1 << sizeof(uint32_t)) - 1;
|
||||
|
||||
m &= tm;
|
||||
if (__builtin_expect (m, tm))
|
||||
{
|
||||
if (__builtin_expect (m == tm, 1))
|
||||
*d = s;
|
||||
else
|
||||
*d = __builtin_alpha_zap (*d, m) | __builtin_alpha_zapnot (s, m);
|
||||
}
|
||||
}
|
||||
|
||||
inline void ALWAYS_INLINE
|
||||
gtm_cacheline::store_mask (uint64_t *d, uint64_t s, uint8_t m)
|
||||
{
|
||||
if (__builtin_expect (m, 0xff))
|
||||
{
|
||||
if (__builtin_expect (m == 0xff, 1))
|
||||
*d = s;
|
||||
else
|
||||
{
|
||||
typedef uint32_t *p32 __attribute__((may_alias));
|
||||
p32 d32 = reinterpret_cast<p32>(d);
|
||||
|
||||
if ((m & 0x0f) == 0x0f)
|
||||
{
|
||||
d32[0] = s;
|
||||
m &= 0xf0;
|
||||
}
|
||||
else if ((m & 0xf0) == 0xf0)
|
||||
{
|
||||
d32[1] = s >> 32;
|
||||
m &= 0x0f;
|
||||
}
|
||||
|
||||
if (m)
|
||||
*d = __builtin_alpha_zap (*d, m) | __builtin_alpha_zapnot (s, m);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace GTM
|
||||
|
||||
#endif // __alpha_bwx__
|
||||
#endif // LIBITM_ALPHA_CACHELINE_H
|
||||
|
@ -1,118 +0,0 @@
|
||||
/* Copyright (C) 2009, 2011 Free Software Foundation, Inc.
|
||||
Contributed by Richard Henderson <rth@redhat.com>.
|
||||
|
||||
This file is part of the GNU Transactional Memory Library (libitm).
|
||||
|
||||
Libitm is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef LIBITM_ALPHA_UNALIGNED_H
|
||||
#define LIBITM_ALPHA_UNALIGNED_H 1
|
||||
|
||||
#define HAVE_ARCH_UNALIGNED_LOAD2_U2 1
|
||||
#define HAVE_ARCH_UNALIGNED_LOAD2_U4 1
|
||||
#define HAVE_ARCH_UNALIGNED_LOAD2_U8 1
|
||||
|
||||
#ifndef __alpha_bwx__
|
||||
#define HAVE_ARCH_UNALIGNED_STORE2_U2 1
|
||||
#endif
|
||||
#define HAVE_ARCH_UNALIGNED_STORE2_U4 1
|
||||
#define HAVE_ARCH_UNALIGNED_STORE2_U8 1
|
||||
|
||||
#include "config/generic/unaligned.h"
|
||||
|
||||
namespace GTM HIDDEN {
|
||||
|
||||
template<>
|
||||
inline uint16_t ALWAYS_INLINE
|
||||
unaligned_load2<uint16_t>(const gtm_cacheline *c1,
|
||||
const gtm_cacheline *c2, size_t ofs)
|
||||
{
|
||||
uint64_t v1 = c1->u64[CACHELINE_SIZE / sizeof(uint64_t) - 1];
|
||||
uint64_t v2 = c2->u64[0];
|
||||
|
||||
return __builtin_alpha_extwl (v1, ofs) | __builtin_alpha_extwh (v2, ofs);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline uint32_t ALWAYS_INLINE
|
||||
unaligned_load2<uint32_t>(const gtm_cacheline *c1,
|
||||
const gtm_cacheline *c2, size_t ofs)
|
||||
{
|
||||
uint64_t v1 = c1->u64[CACHELINE_SIZE / sizeof(uint64_t) - 1];
|
||||
uint64_t v2 = c2->u64[0];
|
||||
|
||||
return __builtin_alpha_extll (v1, ofs) + __builtin_alpha_extlh (v2, ofs);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline uint64_t ALWAYS_INLINE
|
||||
unaligned_load2<uint64_t>(const gtm_cacheline *c1,
|
||||
const gtm_cacheline *c2, size_t ofs)
|
||||
{
|
||||
uint64_t v1 = c1->u64[CACHELINE_SIZE / sizeof(uint64_t) - 1];
|
||||
uint64_t v2 = c2->u64[0];
|
||||
|
||||
return __builtin_alpha_extql (v1, ofs) | __builtin_alpha_extqh (v2, ofs);
|
||||
}
|
||||
|
||||
#ifndef __alpha_bwx__
|
||||
template<>
|
||||
inline void
|
||||
unaligned_store2<uint16_t>(gtm_cacheline *c1, gtm_cacheline *c2,
|
||||
size_t ofs, uint16_t val)
|
||||
{
|
||||
uint32_t vl = (uint32_t)val << 24, vh = val >> 8;
|
||||
|
||||
gtm_cacheline::store_mask (&c1->u32[CACHELINE_SIZE / 4 - 1], vl, 4);
|
||||
gtm_cacheline::store_mask (&c2->u32[0], vh, 1);
|
||||
}
|
||||
#endif
|
||||
|
||||
template<>
|
||||
inline void
|
||||
unaligned_store2<uint32_t>(gtm_cacheline *c1, gtm_cacheline *c2,
|
||||
size_t ofs, uint32_t val)
|
||||
{
|
||||
uint64_t vl = __builtin_alpha_insll (val, ofs);
|
||||
uint64_t ml = __builtin_alpha_insll (~0u, ofs);
|
||||
uint64_t vh = __builtin_alpha_inslh (val, ofs);
|
||||
uint64_t mh = __builtin_alpha_inslh (~0u, ofs);
|
||||
|
||||
gtm_cacheline::store_mask (&c1->u64[CACHELINE_SIZE / 8 - 1], vl, ml);
|
||||
gtm_cacheline::store_mask (&c2->u64[0], vh, mh);
|
||||
}
|
||||
|
||||
template<>
|
||||
inline void
|
||||
unaligned_store2<uint64_t>(gtm_cacheline *c1, gtm_cacheline *c2,
|
||||
size_t ofs, uint64_t val)
|
||||
{
|
||||
uint64_t vl = __builtin_alpha_insql (val, ofs);
|
||||
uint64_t ml = __builtin_alpha_insql (~0u, ofs);
|
||||
uint64_t vh = __builtin_alpha_insqh (val, ofs);
|
||||
uint64_t mh = __builtin_alpha_insqh (~0u, ofs);
|
||||
|
||||
gtm_cacheline::store_mask (&c1->u64[CACHELINE_SIZE / 8 - 1], vl, ml);
|
||||
gtm_cacheline::store_mask (&c2->u64[0], vh, mh);
|
||||
}
|
||||
|
||||
} // namespace GTM
|
||||
|
||||
#endif // LIBITM_ALPHA_UNALIGNED_H
|
@ -1,49 +0,0 @@
|
||||
/* Copyright (C) 2009, 2011 Free Software Foundation, Inc.
|
||||
Contributed by Richard Henderson <rth@redhat.com>.
|
||||
|
||||
This file is part of the GNU Transactional Memory Library (libitm).
|
||||
|
||||
Libitm is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "libitm_i.h"
|
||||
|
||||
|
||||
namespace GTM HIDDEN {
|
||||
|
||||
void
|
||||
gtm_cacheline::copy_mask (gtm_cacheline * __restrict d,
|
||||
const gtm_cacheline * __restrict s,
|
||||
gtm_cacheline_mask m)
|
||||
{
|
||||
const size_t n = sizeof (gtm_word);
|
||||
|
||||
if (m == (gtm_cacheline_mask) -1)
|
||||
{
|
||||
*d = *s;
|
||||
return;
|
||||
}
|
||||
if (__builtin_expect (m == 0, 0))
|
||||
return;
|
||||
|
||||
for (size_t i = 0; i < CACHELINE_SIZE / n; ++i, m >>= n)
|
||||
store_mask (&d->w[i], s->w[i], m);
|
||||
}
|
||||
|
||||
} // namespace GTM
|
@ -51,57 +51,8 @@ union gtm_cacheline
|
||||
uint32_t u32[CACHELINE_SIZE / sizeof(uint32_t)];
|
||||
uint64_t u64[CACHELINE_SIZE / sizeof(uint64_t)];
|
||||
gtm_word w[CACHELINE_SIZE / sizeof(gtm_word)];
|
||||
|
||||
// Store S into D, but only the bytes specified by M.
|
||||
template<typename T> static void store_mask (T *d, T s, uint8_t m);
|
||||
|
||||
// Copy S to D, but only the bytes specified by M.
|
||||
static void copy_mask (gtm_cacheline * __restrict d,
|
||||
const gtm_cacheline * __restrict s,
|
||||
gtm_cacheline_mask m);
|
||||
|
||||
// A write barrier to emit after (a series of) copy_mask.
|
||||
// When we're emitting non-temporal stores, the normal strong
|
||||
// ordering of the machine doesn't apply.
|
||||
static void copy_mask_wb () { atomic_write_barrier(); }
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
inline void
|
||||
gtm_cacheline::store_mask (T *d, T s, uint8_t m)
|
||||
{
|
||||
const uint8_t tm = (1 << sizeof(T)) - 1;
|
||||
|
||||
if (__builtin_expect (m & tm, tm))
|
||||
{
|
||||
if (__builtin_expect ((m & tm) == tm, 1))
|
||||
*d = s;
|
||||
else
|
||||
{
|
||||
const int half = sizeof(T) / 2;
|
||||
typedef typename sized_integral<half>::type half_t;
|
||||
half_t *dhalf = reinterpret_cast<half_t *>(d);
|
||||
half_t s1, s2;
|
||||
|
||||
if (WORDS_BIGENDIAN)
|
||||
s1 = s >> half*8, s2 = s;
|
||||
else
|
||||
s1 = s, s2 = s >> half*8;
|
||||
|
||||
store_mask (dhalf, s1, m);
|
||||
store_mask (dhalf + 1, s2, m >> half);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
inline void ALWAYS_INLINE
|
||||
gtm_cacheline::store_mask<uint8_t> (uint8_t *d, uint8_t s, uint8_t m)
|
||||
{
|
||||
if (m & 1)
|
||||
*d = s;
|
||||
}
|
||||
|
||||
} // namespace GTM
|
||||
|
||||
#endif // LIBITM_CACHELINE_H
|
||||
|
@ -1,77 +0,0 @@
|
||||
/* Copyright (C) 2009, 2011 Free Software Foundation, Inc.
|
||||
Contributed by Richard Henderson <rth@redhat.com>.
|
||||
|
||||
This file is part of the GNU Transactional Memory Library (libitm).
|
||||
|
||||
Libitm is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef LIBITM_CACHEPAGE_H
|
||||
#define LIBITM_CACHEPAGE_H 1
|
||||
|
||||
namespace GTM HIDDEN {
|
||||
|
||||
// A "page" worth of saved cachelines plus modification masks. This
|
||||
// arrangement is intended to minimize the overhead of alignment. The
|
||||
// PAGE_SIZE defined by the target must be a constant for this to work,
|
||||
// which means that this definition may not be the same as the real
|
||||
// system page size. An additional define of FIXED_PAGE_SIZE by the
|
||||
// target indicates that PAGE_SIZE exactly matches the system page size.
|
||||
|
||||
#ifndef PAGE_SIZE
|
||||
#define PAGE_SIZE 4096
|
||||
#endif
|
||||
|
||||
struct gtm_cacheline_page
|
||||
{
|
||||
static const size_t LINES
|
||||
= ((PAGE_SIZE - sizeof(gtm_cacheline_page *))
|
||||
/ (CACHELINE_SIZE + sizeof(gtm_cacheline_mask)));
|
||||
|
||||
gtm_cacheline lines[LINES] __attribute__((aligned(PAGE_SIZE)));
|
||||
gtm_cacheline_mask masks[LINES];
|
||||
gtm_cacheline_page *prev;
|
||||
|
||||
static gtm_cacheline_page *
|
||||
page_for_line (gtm_cacheline *c)
|
||||
{
|
||||
return (gtm_cacheline_page *)((uintptr_t)c & -PAGE_SIZE);
|
||||
}
|
||||
|
||||
gtm_cacheline_mask *
|
||||
mask_for_line (gtm_cacheline *c)
|
||||
{
|
||||
size_t index = c - &this->lines[0];
|
||||
return &this->masks[index];
|
||||
}
|
||||
|
||||
static gtm_cacheline_mask *
|
||||
mask_for_page_line (gtm_cacheline *c)
|
||||
{
|
||||
gtm_cacheline_page *p = page_for_line (c);
|
||||
return p->mask_for_line (c);
|
||||
}
|
||||
|
||||
static void *operator new (size_t);
|
||||
static void operator delete (void *);
|
||||
};
|
||||
|
||||
} // namespace GTM
|
||||
|
||||
#endif // LIBITM_CACHEPAGE_H
|
@ -30,51 +30,4 @@ namespace GTM HIDDEN {
|
||||
__thread gtm_thread_tls _gtm_thr_tls;
|
||||
#endif
|
||||
|
||||
// Filter out any updates that overlap the libitm stack, as defined by
|
||||
// TOP (entry point to library) and BOT (below current function). This
|
||||
// definition should be fine for all stack-grows-down architectures.
|
||||
|
||||
gtm_cacheline_mask __attribute__((noinline))
|
||||
gtm_mask_stack(gtm_cacheline *line, gtm_cacheline_mask mask)
|
||||
{
|
||||
void *top = gtm_thr()->jb.cfa;
|
||||
void *bot = __builtin_dwarf_cfa();
|
||||
|
||||
// We must have come through an entry point that set TOP.
|
||||
assert (top != NULL);
|
||||
|
||||
if (line + 1 < bot)
|
||||
{
|
||||
// Since we don't have the REAL stack boundaries for this thread,
|
||||
// we cannot know if this is a dead write to a stack address below
|
||||
// the current function or if it is write to another VMA. In either
|
||||
// case allowing the write should not affect correctness.
|
||||
}
|
||||
else if (line >= top)
|
||||
{
|
||||
// A valid write to an address in an outer stack frame, or a write
|
||||
// to another VMA.
|
||||
}
|
||||
else
|
||||
{
|
||||
uintptr_t diff = (uintptr_t)top - (uintptr_t)line;
|
||||
if (diff >= CACHELINE_SIZE)
|
||||
{
|
||||
// The write is either fully within the proscribed area, or the tail
|
||||
// of the cacheline overlaps the proscribed area. Assume that all
|
||||
// stacks are at least cacheline aligned and declare the head of the
|
||||
// cacheline dead.
|
||||
mask = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
// The head of the cacheline is within the proscribed area, but the
|
||||
// tail of the cacheline is live. Eliminate the dead writes.
|
||||
mask &= (gtm_cacheline_mask)-1 << diff;
|
||||
}
|
||||
}
|
||||
|
||||
return mask;
|
||||
}
|
||||
|
||||
} // namespace GTM
|
||||
|
@ -1,228 +0,0 @@
|
||||
/* Copyright (C) 2009, 2011 Free Software Foundation, Inc.
|
||||
Contributed by Richard Henderson <rth@redhat.com>.
|
||||
|
||||
This file is part of the GNU Transactional Memory Library (libitm).
|
||||
|
||||
Libitm is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef LIBITM_UNALIGNED_H
|
||||
#define LIBITM_UNALIGNED_H 1
|
||||
|
||||
namespace GTM HIDDEN {
|
||||
|
||||
#ifndef STRICT_ALIGNMENT
|
||||
#define STRICT_ALIGNMENT 1
|
||||
#endif
|
||||
|
||||
// A type trait for whether type T requires strict alignment.
|
||||
// The generic types are assumed to all be the same; specializations
|
||||
// for target-specific types should be done in config/cpu/unaligned.h.
|
||||
template<typename T>
|
||||
struct strict_alignment
|
||||
: public std::integral_constant<bool, STRICT_ALIGNMENT>
|
||||
{ };
|
||||
|
||||
// A helper template for accessing an integral type the same size as T
|
||||
template<typename T>
|
||||
struct make_integral
|
||||
: public sized_integral<sizeof(T)>
|
||||
{ };
|
||||
|
||||
// A helper class for accessing T as an unaligned value.
|
||||
template<typename T>
|
||||
struct __attribute__((packed)) unaligned_helper
|
||||
{ T x; };
|
||||
|
||||
// A helper class for view-converting T as an integer.
|
||||
template<typename T>
|
||||
union view_convert_helper
|
||||
{
|
||||
typedef T type;
|
||||
typedef make_integral<T> itype;
|
||||
|
||||
type t;
|
||||
itype i;
|
||||
};
|
||||
|
||||
// Generate an unaligned load sequence.
|
||||
// The compiler knows how to do this for any specific type.
|
||||
template<typename T>
|
||||
inline T ALWAYS_INLINE
|
||||
unaligned_load(const void *t)
|
||||
{
|
||||
typedef unaligned_helper<T> UT;
|
||||
const UT *ut = reinterpret_cast<const UT *>(t);
|
||||
return ut->x;
|
||||
}
|
||||
|
||||
// Generate an unaligned store sequence.
|
||||
template<typename T>
|
||||
inline void ALWAYS_INLINE
|
||||
unaligned_store(void *t, T val)
|
||||
{
|
||||
typedef unaligned_helper<T> UT;
|
||||
UT *ut = reinterpret_cast<UT *>(t);
|
||||
ut->x = val;
|
||||
}
|
||||
|
||||
// Generate an unaligned load from two different cachelines.
|
||||
// It is known that OFS + SIZEOF(T) > CACHELINE_SIZE.
|
||||
template<typename T>
|
||||
inline T ALWAYS_INLINE
|
||||
unaligned_load2(const gtm_cacheline *c1, const gtm_cacheline *c2, size_t ofs)
|
||||
{
|
||||
size_t left = CACHELINE_SIZE - ofs;
|
||||
T ret;
|
||||
|
||||
memcpy (&ret, &c1->b[ofs], left);
|
||||
memcpy ((char *)&ret + ofs, c2, sizeof(T) - left);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
// Generate an unaligned store into two different cachelines.
|
||||
// It is known that OFS + SIZEOF(T) > CACHELINE_SIZE.
|
||||
template<typename T>
|
||||
inline void ALWAYS_INLINE
|
||||
unaligned_store2(gtm_cacheline *c1, gtm_cacheline *c2, size_t ofs, T val)
|
||||
{
|
||||
size_t left = CACHELINE_SIZE - ofs;
|
||||
memcpy (&c1->b[ofs], &val, left);
|
||||
memcpy (c2, (char *)&val + left, sizeof(T) - left);
|
||||
}
|
||||
|
||||
#ifndef HAVE_ARCH_UNALIGNED_LOAD2_U2
|
||||
template<>
|
||||
inline uint16_t ALWAYS_INLINE
|
||||
unaligned_load2<uint16_t>(const gtm_cacheline *c1,
|
||||
const gtm_cacheline *c2, size_t ofs)
|
||||
{
|
||||
uint16_t v1 = c1->b[CACHELINE_SIZE - 1];
|
||||
uint16_t v2 = c2->b[0];
|
||||
|
||||
if (WORDS_BIGENDIAN)
|
||||
return v1 << 8 | v2;
|
||||
else
|
||||
return v2 << 8 | v1;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef HAVE_ARCH_UNALIGNED_LOAD2_U4
|
||||
template<>
|
||||
inline uint32_t ALWAYS_INLINE
|
||||
unaligned_load2<uint32_t>(const gtm_cacheline *c1,
|
||||
const gtm_cacheline *c2, size_t ofs)
|
||||
{
|
||||
uint32_t v1 = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 1];
|
||||
uint32_t v2 = c2->u32[0];
|
||||
int s2 = (ofs & (sizeof(uint32_t) - 1)) * 8;
|
||||
int s1 = sizeof(uint32_t) * 8 - s2;
|
||||
|
||||
if (WORDS_BIGENDIAN)
|
||||
return v1 << s2 | v2 >> s1;
|
||||
else
|
||||
return v2 << s2 | v1 >> s1;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef HAVE_ARCH_UNALIGNED_LOAD2_U8
|
||||
template<>
|
||||
inline uint64_t ALWAYS_INLINE
|
||||
unaligned_load2<uint64_t>(const gtm_cacheline *c1,
|
||||
const gtm_cacheline *c2, size_t ofs)
|
||||
{
|
||||
uint64_t v1 = c1->u64[CACHELINE_SIZE / sizeof(uint64_t) - 1];
|
||||
uint64_t v2 = c2->u64[0];
|
||||
int s2 = (ofs & (sizeof(uint64_t) - 1)) * 8;
|
||||
int s1 = sizeof(uint64_t) * 8 - s2;
|
||||
|
||||
if (WORDS_BIGENDIAN)
|
||||
return v1 << s2 | v2 >> s1;
|
||||
else
|
||||
return v2 << s2 | v1 >> s1;
|
||||
}
|
||||
#endif
|
||||
|
||||
template<>
|
||||
inline float ALWAYS_INLINE
|
||||
unaligned_load2<float>(const gtm_cacheline *c1,
|
||||
const gtm_cacheline *c2, size_t ofs)
|
||||
{
|
||||
typedef view_convert_helper<float> VC; VC vc;
|
||||
vc.i = unaligned_load2<VC::itype>(c1, c2, ofs);
|
||||
return vc.t;
|
||||
}
|
||||
|
||||
template<>
|
||||
inline double ALWAYS_INLINE
|
||||
unaligned_load2<double>(const gtm_cacheline *c1,
|
||||
const gtm_cacheline *c2, size_t ofs)
|
||||
{
|
||||
typedef view_convert_helper<double> VC; VC vc;
|
||||
vc.i = unaligned_load2<VC::itype>(c1, c2, ofs);
|
||||
return vc.t;
|
||||
}
|
||||
|
||||
#ifndef HAVE_ARCH_UNALIGNED_STORE2_U2
|
||||
template<>
|
||||
inline void ALWAYS_INLINE
|
||||
unaligned_store2<uint16_t>(gtm_cacheline *c1, gtm_cacheline *c2,
|
||||
size_t ofs, uint16_t val)
|
||||
{
|
||||
uint8_t vl = val, vh = val >> 8;
|
||||
|
||||
if (WORDS_BIGENDIAN)
|
||||
{
|
||||
c1->b[CACHELINE_SIZE - 1] = vh;
|
||||
c2->b[0] = vl;
|
||||
}
|
||||
else
|
||||
{
|
||||
c1->b[CACHELINE_SIZE - 1] = vl;
|
||||
c2->b[0] = vh;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
#ifndef HAVE_ARCH_UNALIGNED_STORE2_U4
|
||||
template<>
|
||||
inline void ALWAYS_INLINE
|
||||
unaligned_store2<uint32_t>(gtm_cacheline *c1, gtm_cacheline *c2,
|
||||
size_t ofs, uint32_t val)
|
||||
{
|
||||
// ??? We could reuse the store_mask stuff here.
|
||||
}
|
||||
#endif
|
||||
|
||||
template<>
|
||||
inline void ALWAYS_INLINE
|
||||
unaligned_store2<float>(gtm_cacheline *c1, gtm_cacheline *c2,
|
||||
size_t ofs, float val)
|
||||
{
|
||||
typedef view_convert_helper<float> VC; VC vc;
|
||||
vc.t = val;
|
||||
unaligned_store2(c1, c2, ofs, vc.i);
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace GTM
|
||||
|
||||
#endif // LIBITM_UNALIGNED_H
|
@ -1,183 +0,0 @@
|
||||
/* Copyright (C) 2009, 2011 Free Software Foundation, Inc.
|
||||
Contributed by Richard Henderson <rth@redhat.com>.
|
||||
|
||||
This file is part of the GNU Transactional Memory Library (libitm).
|
||||
|
||||
Libitm is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "libitm_i.h"
|
||||
#include <pthread.h>
|
||||
|
||||
//
|
||||
// We have three possibilities for alloction: mmap, memalign, posix_memalign
|
||||
//
|
||||
|
||||
#if defined(HAVE_MMAP_ANON) || defined(HAVE_MMAP_DEV_ZERO)
|
||||
#include <sys/mman.h>
|
||||
#include <fcntl.h>
|
||||
#endif
|
||||
#ifdef HAVE_MALLOC_H
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
|
||||
namespace GTM HIDDEN {
|
||||
|
||||
#if defined(HAVE_MMAP_ANON)
|
||||
# if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
|
||||
# define MAP_ANONYMOUS MAP_ANON
|
||||
# endif
|
||||
# define dev_zero -1
|
||||
#elif defined(HAVE_MMAP_DEV_ZERO)
|
||||
# ifndef MAP_ANONYMOUS
|
||||
# define MAP_ANONYMOUS 0
|
||||
# endif
|
||||
static int dev_zero = -1;
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_MMAP_ANON) || defined(HAVE_MMAP_DEV_ZERO)
|
||||
/* If we get here, we've already opened /dev/zero and verified that
|
||||
PAGE_SIZE is valid for the system. */
|
||||
static gtm_cacheline_page * alloc_mmap (void) UNUSED;
|
||||
static gtm_cacheline_page *
|
||||
alloc_mmap (void)
|
||||
{
|
||||
gtm_cacheline_page *r;
|
||||
r = (gtm_cacheline_page *) mmap (NULL, PAGE_SIZE, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS, dev_zero, 0);
|
||||
if (r == (gtm_cacheline_page *) MAP_FAILED)
|
||||
abort ();
|
||||
return r;
|
||||
}
|
||||
#endif /* MMAP_ANON | MMAP_DEV_ZERO */
|
||||
|
||||
#ifdef HAVE_MEMALIGN
|
||||
static gtm_cacheline_page * alloc_memalign (void) UNUSED;
|
||||
static gtm_cacheline_page *
|
||||
alloc_memalign (void)
|
||||
{
|
||||
gtm_cacheline_page *r;
|
||||
r = (gtm_cacheline_page *) memalign (PAGE_SIZE, PAGE_SIZE);
|
||||
if (r == NULL)
|
||||
abort ();
|
||||
return r;
|
||||
}
|
||||
#endif /* MEMALIGN */
|
||||
|
||||
#ifdef HAVE_POSIX_MEMALIGN
|
||||
static gtm_cacheline_page *alloc_posix_memalign (void) UNUSED;
|
||||
static gtm_cacheline_page *
|
||||
alloc_posix_memalign (void)
|
||||
{
|
||||
void *r;
|
||||
if (posix_memalign (&r, PAGE_SIZE, PAGE_SIZE))
|
||||
abort ();
|
||||
return (gtm_cacheline_page *) r;
|
||||
}
|
||||
#endif /* POSIX_MEMALIGN */
|
||||
|
||||
#if defined(HAVE_MMAP_ANON) && defined(FIXED_PAGE_SIZE)
|
||||
# define alloc_page alloc_mmap
|
||||
#elif defined(HAVE_MMAP_DEV_ZERO) && defined(FIXED_PAGE_SIZE)
|
||||
static gtm_cacheline_page *
|
||||
alloc_page (void)
|
||||
{
|
||||
if (dev_zero < 0)
|
||||
{
|
||||
dev_zero = open ("/dev/zero", O_RDWR);
|
||||
assert (dev_zero >= 0);
|
||||
}
|
||||
return alloc_mmap ();
|
||||
}
|
||||
#elif defined(HAVE_MMAP_ANON) || defined(HAVE_MMAP_DEV_ZERO)
|
||||
static gtm_cacheline_page * (*alloc_page) (void);
|
||||
static void __attribute__((constructor))
|
||||
init_alloc_page (void)
|
||||
{
|
||||
size_t page_size = getpagesize ();
|
||||
if (page_size <= PAGE_SIZE && PAGE_SIZE % page_size == 0)
|
||||
{
|
||||
# ifndef HAVE_MMAP_ANON
|
||||
dev_zero = open ("/dev/zero", O_RDWR);
|
||||
assert (dev_zero >= 0);
|
||||
# endif
|
||||
alloc_page = alloc_mmap;
|
||||
return;
|
||||
}
|
||||
# ifdef HAVE_MEMALIGN
|
||||
alloc_page = alloc_memalign;
|
||||
# elif defined(HAVE_POSIX_MEMALIGN)
|
||||
alloc_page = alloc_posix_memalign;
|
||||
# else
|
||||
# error "No fallback aligned memory allocation method"
|
||||
# endif
|
||||
}
|
||||
#elif defined(HAVE_MEMALIGN)
|
||||
# define alloc_page alloc_memalign
|
||||
#elif defined(HAVE_POSIX_MEMALIGN)
|
||||
# define alloc_page alloc_posix_memalign
|
||||
#else
|
||||
# error "No aligned memory allocation method"
|
||||
#endif
|
||||
|
||||
static gtm_cacheline_page *free_pages;
|
||||
static pthread_mutex_t free_page_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||
|
||||
void *
|
||||
gtm_cacheline_page::operator new (size_t size)
|
||||
{
|
||||
assert (size == sizeof (gtm_cacheline_page));
|
||||
assert (size <= PAGE_SIZE);
|
||||
|
||||
pthread_mutex_lock(&free_page_lock);
|
||||
|
||||
gtm_cacheline_page *r = free_pages;
|
||||
free_pages = r ? r->prev : NULL;
|
||||
|
||||
pthread_mutex_unlock(&free_page_lock);
|
||||
|
||||
if (r == NULL)
|
||||
r = alloc_page ();
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
void
|
||||
gtm_cacheline_page::operator delete (void *xhead)
|
||||
{
|
||||
gtm_cacheline_page *head = static_cast<gtm_cacheline_page *>(xhead);
|
||||
gtm_cacheline_page *tail;
|
||||
|
||||
if (head == 0)
|
||||
return;
|
||||
|
||||
/* ??? We should eventually really free some of these. */
|
||||
|
||||
for (tail = head; tail->prev != 0; tail = tail->prev)
|
||||
continue;
|
||||
|
||||
pthread_mutex_lock(&free_page_lock);
|
||||
|
||||
tail->prev = free_pages;
|
||||
free_pages = head;
|
||||
|
||||
pthread_mutex_unlock(&free_page_lock);
|
||||
}
|
||||
|
||||
} // namespace GTM
|
@ -1,73 +0,0 @@
|
||||
/* Copyright (C) 2009, 2011 Free Software Foundation, Inc.
|
||||
Contributed by Richard Henderson <rth@redhat.com>.
|
||||
|
||||
This file is part of the GNU Transactional Memory Library (libitm).
|
||||
|
||||
Libitm is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "libitm_i.h"
|
||||
|
||||
namespace GTM HIDDEN {
|
||||
|
||||
uint32_t const gtm_bit_to_byte_mask[16] =
|
||||
{
|
||||
0x00000000,
|
||||
0x000000ff,
|
||||
0x0000ff00,
|
||||
0x0000ffff,
|
||||
0x00ff0000,
|
||||
0x00ff00ff,
|
||||
0x00ffff00,
|
||||
0x00ffffff,
|
||||
0xff000000,
|
||||
0xff0000ff,
|
||||
0xff00ff00,
|
||||
0xff00ffff,
|
||||
0xffff0000,
|
||||
0xffff00ff,
|
||||
0xffffff00,
|
||||
0xffffffff
|
||||
};
|
||||
|
||||
#ifdef __SSE2__
|
||||
# define MEMBER m128i
|
||||
#else
|
||||
# define MEMBER w
|
||||
#endif
|
||||
|
||||
void
|
||||
gtm_cacheline::copy_mask (gtm_cacheline * __restrict d,
|
||||
const gtm_cacheline * __restrict s,
|
||||
gtm_cacheline_mask m)
|
||||
{
|
||||
if (m == (gtm_cacheline_mask)-1)
|
||||
{
|
||||
*d = *s;
|
||||
return;
|
||||
}
|
||||
if (__builtin_expect (m == 0, 0))
|
||||
return;
|
||||
|
||||
size_t n = sizeof(d->MEMBER[0]);
|
||||
for (size_t i = 0; i < CACHELINE_SIZE / n; ++i, m >>= n)
|
||||
store_mask (&d->MEMBER[i], s->MEMBER[i], m);
|
||||
}
|
||||
|
||||
} // namespace GTM
|
@ -40,8 +40,6 @@ namespace GTM HIDDEN {
|
||||
// in the cacheline with which it is associated.
|
||||
typedef sized_integral<CACHELINE_SIZE / 8>::type gtm_cacheline_mask;
|
||||
|
||||
extern uint32_t const gtm_bit_to_byte_mask[16];
|
||||
|
||||
union gtm_cacheline
|
||||
{
|
||||
// Byte access to the cacheline.
|
||||
@ -67,23 +65,6 @@ union gtm_cacheline
|
||||
__m256i m256i[CACHELINE_SIZE / sizeof(__m256i)];
|
||||
#endif
|
||||
|
||||
// Store S into D, but only the bytes specified by M.
|
||||
static void store_mask (uint32_t *d, uint32_t s, uint8_t m);
|
||||
static void store_mask (uint64_t *d, uint64_t s, uint8_t m);
|
||||
#ifdef __SSE2__
|
||||
static void store_mask (__m128i *d, __m128i s, uint16_t m);
|
||||
#endif
|
||||
|
||||
// Copy S to D, but only the bytes specified by M.
|
||||
static void copy_mask (gtm_cacheline * __restrict d,
|
||||
const gtm_cacheline * __restrict s,
|
||||
gtm_cacheline_mask m);
|
||||
|
||||
// A write barrier to emit after (a series of) copy_mask.
|
||||
// When we're emitting non-temporal stores, the normal strong
|
||||
// ordering of the machine doesn't apply.
|
||||
static void copy_mask_wb ();
|
||||
|
||||
#if defined(__SSE__) || defined(__AVX__)
|
||||
// Copy S to D; only bother defining if we can do this more efficiently
|
||||
// than the compiler-generated default implementation.
|
||||
@ -91,14 +72,6 @@ union gtm_cacheline
|
||||
#endif // SSE, AVX
|
||||
};
|
||||
|
||||
inline void
|
||||
gtm_cacheline::copy_mask_wb ()
|
||||
{
|
||||
#ifdef __SSE2__
|
||||
_mm_sfence ();
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(__SSE__) || defined(__AVX__)
|
||||
inline gtm_cacheline& ALWAYS_INLINE
|
||||
gtm_cacheline::operator= (const gtm_cacheline & __restrict s)
|
||||
@ -141,104 +114,12 @@ gtm_cacheline::operator= (const gtm_cacheline & __restrict s)
|
||||
}
|
||||
|
||||
return *this;
|
||||
|
||||
#undef CP
|
||||
#undef TYPE
|
||||
}
|
||||
#endif
|
||||
|
||||
// Support masked integer stores more efficiently with an unlocked cmpxchg
|
||||
// insn. My reasoning is that while we write to locations that we do not wish
|
||||
// to modify, we do it in an uninterruptable insn, and so we either truely
|
||||
// write back the original data or the insn fails -- unlike with a
|
||||
// load/and/or/write sequence which can be interrupted either by a kernel
|
||||
// task switch or an unlucky cacheline steal by another processor. Avoiding
|
||||
// the LOCK prefix improves performance by a factor of 10, and we don't need
|
||||
// the memory barrier semantics implied by that prefix.
|
||||
|
||||
inline void ALWAYS_INLINE
|
||||
gtm_cacheline::store_mask (uint32_t *d, uint32_t s, uint8_t m)
|
||||
{
|
||||
gtm_cacheline_mask tm = (1 << sizeof (s)) - 1;
|
||||
if (__builtin_expect (m & tm, tm))
|
||||
{
|
||||
if (__builtin_expect ((m & tm) == tm, 1))
|
||||
*d = s;
|
||||
else
|
||||
{
|
||||
gtm_cacheline_mask bm = gtm_bit_to_byte_mask[m & 15];
|
||||
gtm_word n, o = *d;
|
||||
|
||||
__asm("\n0:\t"
|
||||
"mov %[o], %[n]\n\t"
|
||||
"and %[m], %[n]\n\t"
|
||||
"or %[s], %[n]\n\t"
|
||||
"cmpxchg %[n], %[d]\n\t"
|
||||
".byte 0x2e\n\t" // predict not-taken, aka jnz,pn
|
||||
"jnz 0b"
|
||||
: [d] "+m"(*d), [n] "=&r" (n), [o] "+a"(o)
|
||||
: [s] "r" (s & bm), [m] "r" (~bm));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline void ALWAYS_INLINE
|
||||
gtm_cacheline::store_mask (uint64_t *d, uint64_t s, uint8_t m)
|
||||
{
|
||||
gtm_cacheline_mask tm = (1 << sizeof (s)) - 1;
|
||||
if (__builtin_expect (m & tm, tm))
|
||||
{
|
||||
if (__builtin_expect ((m & tm) == tm, 1))
|
||||
*d = s;
|
||||
else
|
||||
{
|
||||
#ifdef __x86_64__
|
||||
uint32_t bl = gtm_bit_to_byte_mask[m & 15];
|
||||
uint32_t bh = gtm_bit_to_byte_mask[(m >> 4) & 15];
|
||||
gtm_cacheline_mask bm = bl | ((gtm_cacheline_mask)bh << 31 << 1);
|
||||
uint64_t n, o = *d;
|
||||
__asm("\n0:\t"
|
||||
"mov %[o], %[n]\n\t"
|
||||
"and %[m], %[n]\n\t"
|
||||
"or %[s], %[n]\n\t"
|
||||
"cmpxchg %[n], %[d]\n\t"
|
||||
".byte 0x2e\n\t" // predict not-taken, aka jnz,pn
|
||||
"jnz 0b"
|
||||
: [d] "+m"(*d), [n] "=&r" (n), [o] "+a"(o)
|
||||
: [s] "r" (s & bm), [m] "r" (~bm));
|
||||
#else
|
||||
/* ??? While it's possible to perform this operation with
|
||||
cmpxchg8b, the sequence requires all 7 general registers
|
||||
and thus cannot be performed with -fPIC. Don't even try. */
|
||||
uint32_t *d32 = reinterpret_cast<uint32_t *>(d);
|
||||
store_mask (d32, s, m);
|
||||
store_mask (d32 + 1, s >> 32, m >> 4);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __SSE2__
|
||||
inline void ALWAYS_INLINE
|
||||
gtm_cacheline::store_mask (__m128i *d, __m128i s, uint16_t m)
|
||||
{
|
||||
if (__builtin_expect (m == 0, 0))
|
||||
return;
|
||||
if (__builtin_expect (m == 0xffff, 1))
|
||||
*d = s;
|
||||
else
|
||||
{
|
||||
__m128i bm0, bm1, bm2, bm3;
|
||||
bm0 = _mm_set_epi32 (0, 0, 0, gtm_bit_to_byte_mask[m & 15]); m >>= 4;
|
||||
bm1 = _mm_set_epi32 (0, 0, 0, gtm_bit_to_byte_mask[m & 15]); m >>= 4;
|
||||
bm2 = _mm_set_epi32 (0, 0, 0, gtm_bit_to_byte_mask[m & 15]); m >>= 4;
|
||||
bm3 = _mm_set_epi32 (0, 0, 0, gtm_bit_to_byte_mask[m & 15]); m >>= 4;
|
||||
bm0 = _mm_unpacklo_epi32 (bm0, bm1);
|
||||
bm2 = _mm_unpacklo_epi32 (bm2, bm3);
|
||||
bm0 = _mm_unpacklo_epi64 (bm0, bm2);
|
||||
|
||||
_mm_maskmoveu_si128 (s, bm0, (char *)d);
|
||||
}
|
||||
}
|
||||
#endif // SSE2
|
||||
|
||||
} // namespace GTM
|
||||
|
||||
#endif // LIBITM_CACHELINE_H
|
||||
|
@ -1,237 +0,0 @@
|
||||
/* Copyright (C) 2009, 2011 Free Software Foundation, Inc.
|
||||
Contributed by Richard Henderson <rth@redhat.com>.
|
||||
|
||||
This file is part of the GNU Transactional Memory Library (libitm).
|
||||
|
||||
Libitm is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef LIBITM_X86_UNALIGNED_H
|
||||
#define LIBITM_X86_UNALIGNED_H 1
|
||||
|
||||
#define HAVE_ARCH_UNALIGNED_LOAD2_U4 1
|
||||
#define HAVE_ARCH_UNALIGNED_LOAD2_U8 1
|
||||
|
||||
#include "config/generic/unaligned.h"
|
||||
|
||||
namespace GTM HIDDEN {
|
||||
|
||||
template<>
|
||||
inline uint32_t
|
||||
unaligned_load2<uint32_t>(const gtm_cacheline *c1,
|
||||
const gtm_cacheline *c2, size_t ofs)
|
||||
{
|
||||
uint32_t r, lo, hi;
|
||||
lo = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 1];
|
||||
hi = c2->u32[0];
|
||||
asm("shrd %b2, %1, %0" : "=r"(r) : "r"(hi), "c"((ofs & 3) * 8), "0"(lo));
|
||||
return r;
|
||||
}
|
||||
|
||||
template<>
|
||||
inline uint64_t
|
||||
unaligned_load2<uint64_t>(const gtm_cacheline *c1,
|
||||
const gtm_cacheline *c2, size_t ofs)
|
||||
{
|
||||
#ifdef __x86_64__
|
||||
uint64_t r, lo, hi;
|
||||
lo = c1->u64[CACHELINE_SIZE / sizeof(uint64_t) - 1];
|
||||
hi = c2->u64[0];
|
||||
asm("shrd %b2, %1, %0" : "=r"(r) : "r"(hi), "c"((ofs & 3) * 8), "0"(lo));
|
||||
return r;
|
||||
#else
|
||||
uint32_t v0, v1, v2;
|
||||
uint64_t r;
|
||||
|
||||
if (ofs < CACHELINE_SIZE - 4)
|
||||
{
|
||||
v0 = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 2];
|
||||
v1 = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 1];
|
||||
v2 = c2->u32[0];
|
||||
}
|
||||
else
|
||||
{
|
||||
v0 = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 1];
|
||||
v1 = c2->u32[0];
|
||||
v2 = c2->u32[1];
|
||||
}
|
||||
ofs = (ofs & 3) * 8;
|
||||
asm("shrd %%cl, %[v1], %[v0]; shrd %%cl, %[v2], %[v1]"
|
||||
: "=A"(r) : "c"(ofs), [v0] "a"(v0), [v1] "d"(v1), [v2] "r"(v2));
|
||||
|
||||
return r;
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(__SSE2__) || defined(__MMX__)
|
||||
template<>
|
||||
inline _ITM_TYPE_M64
|
||||
unaligned_load2<_ITM_TYPE_M64>(const gtm_cacheline *c1,
|
||||
const gtm_cacheline *c2, size_t ofs)
|
||||
{
|
||||
# ifdef __x86_64__
|
||||
__m128i lo = _mm_movpi64_epi64 (c1->m64[CACHELINE_SIZE / 8 - 1]);
|
||||
__m128i hi = _mm_movpi64_epi64 (c2->m64[0]);
|
||||
|
||||
ofs = (ofs & 7) * 8;
|
||||
lo = _mm_srli_epi64 (lo, ofs);
|
||||
hi = _mm_slli_epi64 (hi, 64 - ofs);
|
||||
lo = lo | hi;
|
||||
return _mm_movepi64_pi64 (lo);
|
||||
# else
|
||||
// On 32-bit we're about to return the result in an MMX register, so go
|
||||
// ahead and do the computation in that unit, even if SSE2 is available.
|
||||
__m64 lo = c1->m64[CACHELINE_SIZE / 8 - 1];
|
||||
__m64 hi = c2->m64[0];
|
||||
|
||||
ofs = (ofs & 7) * 8;
|
||||
lo = _mm_srli_si64 (lo, ofs);
|
||||
hi = _mm_slli_si64 (hi, 64 - ofs);
|
||||
return lo | hi;
|
||||
# endif
|
||||
}
|
||||
#endif // SSE2 or MMX
|
||||
|
||||
// The SSE types are strictly aligned.
|
||||
#ifdef __SSE__
|
||||
template<>
|
||||
struct strict_alignment<_ITM_TYPE_M128>
|
||||
: public std::true_type
|
||||
{ };
|
||||
|
||||
// Expand the unaligned SSE move instructions.
|
||||
template<>
|
||||
inline _ITM_TYPE_M128
|
||||
unaligned_load<_ITM_TYPE_M128>(const void *t)
|
||||
{
|
||||
return _mm_loadu_ps (static_cast<const float *>(t));
|
||||
}
|
||||
|
||||
template<>
|
||||
inline void
|
||||
unaligned_store<_ITM_TYPE_M128>(void *t, _ITM_TYPE_M128 val)
|
||||
{
|
||||
_mm_storeu_ps (static_cast<float *>(t), val);
|
||||
}
|
||||
#endif // SSE
|
||||
|
||||
#ifdef __AVX__
|
||||
// The AVX types are strictly aligned when it comes to vmovaps vs vmovups.
|
||||
template<>
|
||||
struct strict_alignment<_ITM_TYPE_M256>
|
||||
: public std::true_type
|
||||
{ };
|
||||
|
||||
template<>
|
||||
inline _ITM_TYPE_M256
|
||||
unaligned_load<_ITM_TYPE_M256>(const void *t)
|
||||
{
|
||||
return _mm256_loadu_ps (static_cast<const float *>(t));
|
||||
}
|
||||
|
||||
template<>
|
||||
inline void
|
||||
unaligned_store<_ITM_TYPE_M256>(void *t, _ITM_TYPE_M256 val)
|
||||
{
|
||||
_mm256_storeu_ps (static_cast<float *>(t), val);
|
||||
}
|
||||
#endif // AVX
|
||||
|
||||
#ifdef __XOP__
|
||||
# define HAVE_ARCH_REALIGN_M128I 1
|
||||
extern const __v16qi GTM_vpperm_shift[16];
|
||||
inline __m128i
|
||||
realign_m128i (__m128i lo, __m128i hi, unsigned byte_count)
|
||||
{
|
||||
return _mm_perm_epi8 (lo, hi, GTM_vpperm_shift[byte_count]);
|
||||
}
|
||||
#elif defined(__AVX__)
|
||||
# define HAVE_ARCH_REALIGN_M128I 1
|
||||
extern "C" const uint64_t GTM_vpalignr_table[16];
|
||||
inline __m128i
|
||||
realign_m128i (__m128i lo, __m128i hi, unsigned byte_count)
|
||||
{
|
||||
register __m128i xmm0 __asm__("xmm0") = hi;
|
||||
register __m128i xmm1 __asm__("xmm1") = lo;
|
||||
__asm("call *%2" : "+x"(xmm0) : "x"(xmm1),
|
||||
"r"(>M_vpalignr_table[byte_count]));
|
||||
return xmm0;
|
||||
}
|
||||
#elif defined(__SSSE3__)
|
||||
# define HAVE_ARCH_REALIGN_M128I 1
|
||||
extern "C" const uint64_t GTM_palignr_table[16];
|
||||
inline __m128i
|
||||
realign_m128i (__m128i lo, __m128i hi, unsigned byte_count)
|
||||
{
|
||||
register __m128i xmm0 __asm__("xmm0") = hi;
|
||||
register __m128i xmm1 __asm__("xmm1") = lo;
|
||||
__asm("call *%2" : "+x"(xmm0) : "x"(xmm1),
|
||||
"r"(>M_palignr_table[byte_count]));
|
||||
return xmm0;
|
||||
}
|
||||
#elif defined(__SSE2__)
|
||||
# define HAVE_ARCH_REALIGN_M128I 1
|
||||
extern "C" const char GTM_pshift_table[16 * 16];
|
||||
inline __m128i
|
||||
realign_m128i (__m128i lo, __m128i hi, unsigned byte_count)
|
||||
{
|
||||
register __m128i xmm0 __asm__("xmm0") = lo;
|
||||
register __m128i xmm1 __asm__("xmm1") = hi;
|
||||
__asm("call *%2" : "+x"(xmm0), "+x"(xmm1)
|
||||
: "r"(GTM_pshift_table + byte_count*16));
|
||||
return xmm0;
|
||||
}
|
||||
#endif // XOP, AVX, SSSE3, SSE2
|
||||
|
||||
#ifdef HAVE_ARCH_REALIGN_M128I
|
||||
template<>
|
||||
inline _ITM_TYPE_M128
|
||||
unaligned_load2<_ITM_TYPE_M128>(const gtm_cacheline *c1,
|
||||
const gtm_cacheline *c2, size_t ofs)
|
||||
{
|
||||
return (_ITM_TYPE_M128)
|
||||
realign_m128i (c1->m128i[CACHELINE_SIZE / 16 - 1],
|
||||
c2->m128i[0], ofs & 15);
|
||||
}
|
||||
#endif // HAVE_ARCH_REALIGN_M128I
|
||||
|
||||
#ifdef __AVX__
|
||||
template<>
|
||||
inline _ITM_TYPE_M256
|
||||
unaligned_load2<_ITM_TYPE_M256>(const gtm_cacheline *c1,
|
||||
const gtm_cacheline *c2, size_t ofs)
|
||||
{
|
||||
__m128i v0, v1;
|
||||
__m256i r;
|
||||
|
||||
v0 = (__m128i) unaligned_load2<_ITM_TYPE_M128>(c1, c2, ofs);
|
||||
if (ofs < CACHELINE_SIZE - 16)
|
||||
v1 = v0, v0 = _mm_loadu_si128 ((const __m128i *) &c1->b[ofs]);
|
||||
else
|
||||
v1 = _mm_loadu_si128((const __m128i *)&c2->b[ofs + 16 - CACHELINE_SIZE]);
|
||||
|
||||
r = _mm256_castsi128_si256 ((__m128i)v0);
|
||||
r = _mm256_insertf128_si256 (r, (__m128i)v1, 1);
|
||||
return (_ITM_TYPE_M256) r;
|
||||
}
|
||||
#endif // AVX
|
||||
|
||||
} // namespace GTM
|
||||
|
||||
#endif // LIBITM_X86_UNALIGNED_H
|
@ -34,62 +34,3 @@ _ITM_LM256 (const _ITM_TYPE_M256 *ptr)
|
||||
{
|
||||
GTM::GTM_LB (ptr, sizeof (*ptr));
|
||||
}
|
||||
|
||||
// Helpers for re-aligning two 128-bit values.
|
||||
#ifdef __XOP__
|
||||
const __v16qi GTM::GTM_vpperm_shift[16] =
|
||||
{
|
||||
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
|
||||
{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
|
||||
{ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 },
|
||||
{ 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 },
|
||||
{ 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 },
|
||||
{ 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20 },
|
||||
{ 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 },
|
||||
{ 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22 },
|
||||
{ 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 },
|
||||
{ 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24 },
|
||||
{ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 },
|
||||
{ 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26 },
|
||||
{ 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 },
|
||||
{ 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28 },
|
||||
{ 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29 },
|
||||
{ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30 },
|
||||
};
|
||||
#else
|
||||
# define INSN0 "movdqa %xmm1, %xmm0"
|
||||
# define INSN(N) "vpalignr $" #N ", %xmm0, %xmm1, %xmm0"
|
||||
# define TABLE_ENT_0 INSN0 "\n\tret\n\t"
|
||||
# define TABLE_ENT(N) ".balign 8\n\t" INSN(N) "\n\tret\n\t"
|
||||
|
||||
asm(".pushsection .text\n\
|
||||
.balign 16\n\
|
||||
.globl GTM_vpalignr_table\n\
|
||||
.hidden GTM_vpalignr_table\n\
|
||||
.type GTM_vpalignr_table, @function\n\
|
||||
GTM_vpalignr_table:\n\t"
|
||||
TABLE_ENT_0
|
||||
TABLE_ENT(1)
|
||||
TABLE_ENT(2)
|
||||
TABLE_ENT(3)
|
||||
TABLE_ENT(4)
|
||||
TABLE_ENT(5)
|
||||
TABLE_ENT(6)
|
||||
TABLE_ENT(7)
|
||||
TABLE_ENT(8)
|
||||
TABLE_ENT(9)
|
||||
TABLE_ENT(10)
|
||||
TABLE_ENT(11)
|
||||
TABLE_ENT(12)
|
||||
TABLE_ENT(13)
|
||||
TABLE_ENT(14)
|
||||
TABLE_ENT(15)
|
||||
".balign 8\n\
|
||||
.size GTM_vpalignr_table, .-GTM_vpalignr_table\n\
|
||||
.popsection");
|
||||
|
||||
# undef INSN0
|
||||
# undef INSN
|
||||
# undef TABLE_ENT_0
|
||||
# undef TABLE_ENT
|
||||
#endif
|
||||
|
@ -41,82 +41,3 @@ _ITM_LM128 (const _ITM_TYPE_M128 *ptr)
|
||||
{
|
||||
GTM::GTM_LB (ptr, sizeof (*ptr));
|
||||
}
|
||||
|
||||
// Helpers for re-aligning two 128-bit values.
|
||||
#ifdef __SSSE3__
|
||||
# define INSN0 "movdqa %xmm1, %xmm0"
|
||||
# define INSN(N) "palignr $" #N ", %xmm1, %xmm0"
|
||||
# define TABLE_ENT_0 INSN0 "\n\tret\n\t"
|
||||
# define TABLE_ENT(N) ".balign 8\n\t" INSN(N) "\n\tret\n\t"
|
||||
|
||||
asm(".pushsection .text\n\
|
||||
.balign 16\n\
|
||||
.globl GTM_palignr_table\n\
|
||||
.hidden GTM_palignr_table\n\
|
||||
.type GTM_palignr_table, @function\n\
|
||||
GTM_palignr_table:\n\t"
|
||||
TABLE_ENT_0
|
||||
TABLE_ENT(1)
|
||||
TABLE_ENT(2)
|
||||
TABLE_ENT(3)
|
||||
TABLE_ENT(4)
|
||||
TABLE_ENT(5)
|
||||
TABLE_ENT(6)
|
||||
TABLE_ENT(7)
|
||||
TABLE_ENT(8)
|
||||
TABLE_ENT(9)
|
||||
TABLE_ENT(10)
|
||||
TABLE_ENT(11)
|
||||
TABLE_ENT(12)
|
||||
TABLE_ENT(13)
|
||||
TABLE_ENT(14)
|
||||
TABLE_ENT(15)
|
||||
".balign 8\n\
|
||||
.size GTM_palignr_table, .-GTM_palignr_table\n\
|
||||
.popsection");
|
||||
|
||||
# undef INSN0
|
||||
# undef INSN
|
||||
# undef TABLE_ENT_0
|
||||
# undef TABLE_ENT
|
||||
#elif defined(__SSE2__)
|
||||
# define INSNS_8 "punpcklqdq %xmm1, %xmm0"
|
||||
# define INSNS(N) "psrldq $"#N", %xmm0\n\t" \
|
||||
"pslldq $(16-"#N"), %xmm1\n\t" \
|
||||
"por %xmm1, %xmm0"
|
||||
# define TABLE_ENT_0 "ret\n\t"
|
||||
# define TABLE_ENT_8 ".balign 16\n\t" INSNS_8 "\n\tret\n\t"
|
||||
# define TABLE_ENT(N) ".balign 16\n\t" INSNS(N) "\n\tret\n\t"
|
||||
|
||||
asm(".pushsection .text\n\
|
||||
.balign 16\n\
|
||||
.globl GTM_pshift_table\n\
|
||||
.hidden GTM_pshift_table\n\
|
||||
.type GTM_pshift_table, @function\n\
|
||||
GTM_pshift_table:\n\t"
|
||||
TABLE_ENT_0
|
||||
TABLE_ENT(1)
|
||||
TABLE_ENT(2)
|
||||
TABLE_ENT(3)
|
||||
TABLE_ENT(4)
|
||||
TABLE_ENT(5)
|
||||
TABLE_ENT(6)
|
||||
TABLE_ENT(7)
|
||||
TABLE_ENT_8
|
||||
TABLE_ENT(9)
|
||||
TABLE_ENT(10)
|
||||
TABLE_ENT(11)
|
||||
TABLE_ENT(12)
|
||||
TABLE_ENT(13)
|
||||
TABLE_ENT(14)
|
||||
TABLE_ENT(15)
|
||||
".balign 8\n\
|
||||
.size GTM_pshift_table, .-GTM_pshift_table\n\
|
||||
.popsection");
|
||||
|
||||
# undef INSNS_8
|
||||
# undef INSNS
|
||||
# undef TABLE_ENT_0
|
||||
# undef TABLE_ENT_8
|
||||
# undef TABLE_ENT
|
||||
#endif
|
||||
|
@ -78,7 +78,6 @@ enum gtm_restart_reason
|
||||
#include "rwlock.h"
|
||||
#include "aatree.h"
|
||||
#include "cacheline.h"
|
||||
#include "cachepage.h"
|
||||
#include "stmlock.h"
|
||||
#include "dispatch.h"
|
||||
#include "containers.h"
|
||||
|
365
libitm/memcpy.cc
365
libitm/memcpy.cc
@ -1,365 +0,0 @@
|
||||
/* Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc.
|
||||
Contributed by Richard Henderson <rth@redhat.com>.
|
||||
|
||||
This file is part of the GNU Transactional Memory Library (libitm).
|
||||
|
||||
Libitm is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "libitm_i.h"
|
||||
|
||||
using namespace GTM;
|
||||
|
||||
static void
|
||||
do_memcpy (uintptr_t idst, uintptr_t isrc, size_t size,
|
||||
abi_dispatch::lock_type W, abi_dispatch::lock_type R)
|
||||
{
|
||||
abi_dispatch *disp = abi_disp();
|
||||
// The position in the destination cacheline where *IDST starts.
|
||||
uintptr_t dofs = idst & (CACHELINE_SIZE - 1);
|
||||
// The position in the source cacheline where *ISRC starts.
|
||||
uintptr_t sofs = isrc & (CACHELINE_SIZE - 1);
|
||||
const gtm_cacheline *src
|
||||
= reinterpret_cast<const gtm_cacheline *>(isrc & -CACHELINE_SIZE);
|
||||
gtm_cacheline *dst
|
||||
= reinterpret_cast<gtm_cacheline *>(idst & -CACHELINE_SIZE);
|
||||
const gtm_cacheline *sline;
|
||||
abi_dispatch::mask_pair dpair;
|
||||
|
||||
if (size == 0)
|
||||
return;
|
||||
|
||||
// If both SRC and DST data start at the same position in the cachelines,
|
||||
// we can easily copy the data in tandem, cacheline by cacheline...
|
||||
if (dofs == sofs)
|
||||
{
|
||||
// We copy the data in three stages:
|
||||
|
||||
// (a) Copy stray bytes at the beginning that are smaller than a
|
||||
// cacheline.
|
||||
if (sofs != 0)
|
||||
{
|
||||
size_t sleft = CACHELINE_SIZE - sofs;
|
||||
size_t min = (size <= sleft ? size : sleft);
|
||||
|
||||
dpair = disp->write_lock(dst, W);
|
||||
sline = disp->read_lock(src, R);
|
||||
*dpair.mask |= (((gtm_cacheline_mask)1 << min) - 1) << sofs;
|
||||
memcpy (&dpair.line->b[sofs], &sline->b[sofs], min);
|
||||
dst++;
|
||||
src++;
|
||||
size -= min;
|
||||
}
|
||||
|
||||
// (b) Copy subsequent cacheline sized chunks.
|
||||
while (size >= CACHELINE_SIZE)
|
||||
{
|
||||
dpair = disp->write_lock(dst, W);
|
||||
sline = disp->read_lock(src, R);
|
||||
*dpair.mask = -1;
|
||||
*dpair.line = *sline;
|
||||
dst++;
|
||||
src++;
|
||||
size -= CACHELINE_SIZE;
|
||||
}
|
||||
|
||||
// (c) Copy anything left over.
|
||||
if (size != 0)
|
||||
{
|
||||
dpair = disp->write_lock(dst, W);
|
||||
sline = disp->read_lock(src, R);
|
||||
*dpair.mask |= ((gtm_cacheline_mask)1 << size) - 1;
|
||||
memcpy (dpair.line, sline, size);
|
||||
}
|
||||
}
|
||||
// ... otherwise, we must copy the data in disparate hunks using
|
||||
// temporary storage.
|
||||
else
|
||||
{
|
||||
gtm_cacheline c;
|
||||
size_t sleft = CACHELINE_SIZE - sofs;
|
||||
|
||||
sline = disp->read_lock(src, R);
|
||||
|
||||
// As above, we copy the data in three stages:
|
||||
|
||||
// (a) Copy stray bytes at the beginning that are smaller than a
|
||||
// cacheline.
|
||||
if (dofs != 0)
|
||||
{
|
||||
size_t dleft = CACHELINE_SIZE - dofs;
|
||||
size_t min = (size <= dleft ? size : dleft);
|
||||
|
||||
dpair = disp->write_lock(dst, W);
|
||||
*dpair.mask |= (((gtm_cacheline_mask)1 << min) - 1) << dofs;
|
||||
|
||||
// If what's left in the source cacheline will fit in the
|
||||
// rest of the destination cacheline, straight up copy it.
|
||||
if (min <= sleft)
|
||||
{
|
||||
memcpy (&dpair.line->b[dofs], &sline->b[sofs], min);
|
||||
sofs += min;
|
||||
}
|
||||
// Otherwise, we need more bits from the source cacheline
|
||||
// that are available. Piece together what we need from
|
||||
// contiguous (source) cachelines, into temp space, and copy
|
||||
// it over.
|
||||
else
|
||||
{
|
||||
memcpy (&c, &sline->b[sofs], sleft);
|
||||
sline = disp->read_lock(++src, R);
|
||||
sofs = min - sleft;
|
||||
memcpy (&c.b[sleft], sline, sofs);
|
||||
memcpy (&dpair.line->b[dofs], &c, min);
|
||||
}
|
||||
sleft = CACHELINE_SIZE - sofs;
|
||||
|
||||
dst++;
|
||||
size -= min;
|
||||
}
|
||||
|
||||
// (b) Copy subsequent cacheline sized chunks.
|
||||
while (size >= CACHELINE_SIZE)
|
||||
{
|
||||
// We have a full (destination) cacheline where to put the
|
||||
// data, but to get to the corresponding cacheline sized
|
||||
// chunk in the source, we have to piece together two
|
||||
// contiguous source cachelines.
|
||||
|
||||
memcpy (&c, &sline->b[sofs], sleft);
|
||||
sline = disp->read_lock(++src, R);
|
||||
memcpy (&c.b[sleft], sline, sofs);
|
||||
|
||||
dpair = disp->write_lock(dst, W);
|
||||
*dpair.mask = -1;
|
||||
*dpair.line = c;
|
||||
|
||||
dst++;
|
||||
size -= CACHELINE_SIZE;
|
||||
}
|
||||
|
||||
// (c) Copy anything left over.
|
||||
if (size != 0)
|
||||
{
|
||||
dpair = disp->write_lock(dst, W);
|
||||
*dpair.mask |= ((gtm_cacheline_mask)1 << size) - 1;
|
||||
// If what's left to copy is entirely in the remaining
|
||||
// source cacheline, do it.
|
||||
if (size <= sleft)
|
||||
memcpy (dpair.line, &sline->b[sofs], size);
|
||||
// Otherwise, piece together the remaining bits, and copy.
|
||||
else
|
||||
{
|
||||
memcpy (&c, &sline->b[sofs], sleft);
|
||||
sline = disp->read_lock(++src, R);
|
||||
memcpy (&c.b[sleft], sline, size - sleft);
|
||||
memcpy (dpair.line, &c, size);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
do_memmove (uintptr_t idst, uintptr_t isrc, size_t size,
|
||||
abi_dispatch::lock_type W, abi_dispatch::lock_type R)
|
||||
{
|
||||
abi_dispatch *disp = abi_disp();
|
||||
uintptr_t dleft, sleft, sofs, dofs;
|
||||
const gtm_cacheline *sline;
|
||||
abi_dispatch::mask_pair dpair;
|
||||
|
||||
if (size == 0)
|
||||
return;
|
||||
|
||||
/* The co-aligned memmove below doesn't work for DST == SRC, so filter
|
||||
that out. It's tempting to just return here, as this is a no-op move.
|
||||
However, our caller has the right to expect the locks to be acquired
|
||||
as advertized. */
|
||||
if (__builtin_expect (idst == isrc, 0))
|
||||
{
|
||||
/* If the write lock is already acquired, nothing to do. */
|
||||
if (W == abi_dispatch::WaW)
|
||||
return;
|
||||
/* If the destination is protected, acquire a write lock. */
|
||||
if (W != abi_dispatch::NOLOCK)
|
||||
R = abi_dispatch::RfW;
|
||||
/* Notice serial mode, where we don't acquire locks at all. */
|
||||
if (R == abi_dispatch::NOLOCK)
|
||||
return;
|
||||
|
||||
idst = isrc + size;
|
||||
for (isrc &= -CACHELINE_SIZE; isrc < idst; isrc += CACHELINE_SIZE)
|
||||
disp->read_lock(reinterpret_cast<const gtm_cacheline *>(isrc), R);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Fall back to memcpy if the implementation above can handle it. */
|
||||
if (idst < isrc || isrc + size <= idst)
|
||||
{
|
||||
do_memcpy (idst, isrc, size, W, R);
|
||||
return;
|
||||
}
|
||||
|
||||
/* What remains requires a backward copy from the end of the blocks. */
|
||||
idst += size;
|
||||
isrc += size;
|
||||
dofs = idst & (CACHELINE_SIZE - 1);
|
||||
sofs = isrc & (CACHELINE_SIZE - 1);
|
||||
dleft = CACHELINE_SIZE - dofs;
|
||||
sleft = CACHELINE_SIZE - sofs;
|
||||
|
||||
gtm_cacheline *dst
|
||||
= reinterpret_cast<gtm_cacheline *>(idst & -CACHELINE_SIZE);
|
||||
const gtm_cacheline *src
|
||||
= reinterpret_cast<const gtm_cacheline *>(isrc & -CACHELINE_SIZE);
|
||||
if (dofs == 0)
|
||||
dst--;
|
||||
if (sofs == 0)
|
||||
src--;
|
||||
|
||||
if (dofs == sofs)
|
||||
{
|
||||
/* Since DST and SRC are co-aligned, and we didn't use the memcpy
|
||||
optimization above, that implies that SIZE > CACHELINE_SIZE. */
|
||||
if (sofs != 0)
|
||||
{
|
||||
dpair = disp->write_lock(dst, W);
|
||||
sline = disp->read_lock(src, R);
|
||||
*dpair.mask |= ((gtm_cacheline_mask)1 << sleft) - 1;
|
||||
memcpy (dpair.line, sline, sleft);
|
||||
dst--;
|
||||
src--;
|
||||
size -= sleft;
|
||||
}
|
||||
|
||||
while (size >= CACHELINE_SIZE)
|
||||
{
|
||||
dpair = disp->write_lock(dst, W);
|
||||
sline = disp->read_lock(src, R);
|
||||
*dpair.mask = -1;
|
||||
*dpair.line = *sline;
|
||||
dst--;
|
||||
src--;
|
||||
size -= CACHELINE_SIZE;
|
||||
}
|
||||
|
||||
if (size != 0)
|
||||
{
|
||||
size_t ofs = CACHELINE_SIZE - size;
|
||||
dpair = disp->write_lock(dst, W);
|
||||
sline = disp->read_lock(src, R);
|
||||
*dpair.mask |= (((gtm_cacheline_mask)1 << size) - 1) << ofs;
|
||||
memcpy (&dpair.line->b[ofs], &sline->b[ofs], size);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
gtm_cacheline c;
|
||||
|
||||
sline = disp->read_lock(src, R);
|
||||
if (dofs != 0)
|
||||
{
|
||||
size_t min = (size <= dofs ? size : dofs);
|
||||
|
||||
if (min <= sofs)
|
||||
{
|
||||
sofs -= min;
|
||||
memcpy (&c, &sline->b[sofs], min);
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t min_ofs = min - sofs;
|
||||
memcpy (&c.b[min_ofs], sline, sofs);
|
||||
sline = disp->read_lock(--src, R);
|
||||
sofs = CACHELINE_SIZE - min_ofs;
|
||||
memcpy (&c, &sline->b[sofs], min_ofs);
|
||||
}
|
||||
|
||||
dofs = dleft - min;
|
||||
dpair = disp->write_lock(dst, W);
|
||||
*dpair.mask |= (((gtm_cacheline_mask)1 << min) - 1) << dofs;
|
||||
memcpy (&dpair.line->b[dofs], &c, min);
|
||||
|
||||
sleft = CACHELINE_SIZE - sofs;
|
||||
dst--;
|
||||
size -= min;
|
||||
}
|
||||
|
||||
while (size >= CACHELINE_SIZE)
|
||||
{
|
||||
memcpy (&c.b[sleft], sline, sofs);
|
||||
sline = disp->read_lock(--src, R);
|
||||
memcpy (&c, &sline->b[sofs], sleft);
|
||||
|
||||
dpair = disp->write_lock(dst, W);
|
||||
*dpair.mask = -1;
|
||||
*dpair.line = c;
|
||||
|
||||
dst--;
|
||||
size -= CACHELINE_SIZE;
|
||||
}
|
||||
|
||||
if (size != 0)
|
||||
{
|
||||
dofs = CACHELINE_SIZE - size;
|
||||
|
||||
memcpy (&c.b[sleft], sline, sofs);
|
||||
if (sleft > dofs)
|
||||
{
|
||||
sline = disp->read_lock(--src, R);
|
||||
memcpy (&c, &sline->b[sofs], sleft);
|
||||
}
|
||||
|
||||
dpair = disp->write_lock(dst, W);
|
||||
*dpair.mask |= (gtm_cacheline_mask)-1 << dofs;
|
||||
memcpy (&dpair.line->b[dofs], &c.b[dofs], size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#define ITM_MEM_DEF(NAME, READ, WRITE) \
|
||||
void ITM_REGPARM _ITM_memcpy##NAME(void *dst, const void *src, size_t size) \
|
||||
{ \
|
||||
do_memcpy ((uintptr_t)dst, (uintptr_t)src, size, \
|
||||
abi_dispatch::WRITE, abi_dispatch::READ); \
|
||||
} \
|
||||
void ITM_REGPARM _ITM_memmove##NAME(void *dst, const void *src, size_t size) \
|
||||
{ \
|
||||
do_memmove ((uintptr_t)dst, (uintptr_t)src, size, \
|
||||
abi_dispatch::WRITE, abi_dispatch::READ); \
|
||||
}
|
||||
|
||||
ITM_MEM_DEF(RnWt, NOLOCK, W)
|
||||
ITM_MEM_DEF(RnWtaR, NOLOCK, WaR)
|
||||
ITM_MEM_DEF(RnWtaW, NOLOCK, WaW)
|
||||
|
||||
ITM_MEM_DEF(RtWn, R, NOLOCK)
|
||||
ITM_MEM_DEF(RtWt, R, W)
|
||||
ITM_MEM_DEF(RtWtaR, R, WaR)
|
||||
ITM_MEM_DEF(RtWtaW, R, WaW)
|
||||
|
||||
ITM_MEM_DEF(RtaRWn, RaR, NOLOCK)
|
||||
ITM_MEM_DEF(RtaRWt, RaR, W)
|
||||
ITM_MEM_DEF(RtaRWtaR, RaR, WaR)
|
||||
ITM_MEM_DEF(RtaRWtaW, RaR, WaW)
|
||||
|
||||
ITM_MEM_DEF(RtaWWn, RaW, NOLOCK)
|
||||
ITM_MEM_DEF(RtaWWt, RaW, W)
|
||||
ITM_MEM_DEF(RtaWWtaR, RaW, WaR)
|
||||
ITM_MEM_DEF(RtaWWtaW, RaW, WaW)
|
@ -1,78 +0,0 @@
|
||||
/* Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc.
|
||||
Contributed by Richard Henderson <rth@redhat.com>.
|
||||
|
||||
This file is part of the GNU Transactional Memory Library (libitm).
|
||||
|
||||
Libitm is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "libitm_i.h"
|
||||
|
||||
using namespace GTM;
|
||||
|
||||
static void
|
||||
do_memset(uintptr_t idst, int c, size_t size, abi_dispatch::lock_type W)
|
||||
{
|
||||
abi_dispatch *disp = abi_disp();
|
||||
uintptr_t dofs = idst & (CACHELINE_SIZE - 1);
|
||||
abi_dispatch::mask_pair dpair;
|
||||
gtm_cacheline *dst
|
||||
= reinterpret_cast<gtm_cacheline *>(idst & -CACHELINE_SIZE);
|
||||
|
||||
if (size == 0)
|
||||
return;
|
||||
|
||||
if (dofs != 0)
|
||||
{
|
||||
size_t dleft = CACHELINE_SIZE - dofs;
|
||||
size_t min = (size <= dleft ? size : dleft);
|
||||
|
||||
dpair = disp->write_lock(dst, W);
|
||||
*dpair.mask |= (((gtm_cacheline_mask)1 << min) - 1) << dofs;
|
||||
memset (&dpair.line->b[dofs], c, min);
|
||||
dst++;
|
||||
size -= min;
|
||||
}
|
||||
|
||||
while (size >= CACHELINE_SIZE)
|
||||
{
|
||||
dpair = disp->write_lock(dst, W);
|
||||
*dpair.mask = -1;
|
||||
memset (dpair.line, c, CACHELINE_SIZE);
|
||||
dst++;
|
||||
size -= CACHELINE_SIZE;
|
||||
}
|
||||
|
||||
if (size != 0)
|
||||
{
|
||||
dpair = disp->write_lock(dst, W);
|
||||
*dpair.mask |= ((gtm_cacheline_mask)1 << size) - 1;
|
||||
memset (dpair.line, c, size);
|
||||
}
|
||||
}
|
||||
|
||||
#define ITM_MEM_DEF(WRITE) \
|
||||
void ITM_REGPARM _ITM_memset##WRITE(void *dst, int c, size_t size) \
|
||||
{ \
|
||||
do_memset ((uintptr_t)dst, c, size, abi_dispatch::WRITE); \
|
||||
}
|
||||
|
||||
ITM_MEM_DEF(W)
|
||||
ITM_MEM_DEF(WaR)
|
||||
ITM_MEM_DEF(WaW)
|
@ -1,628 +0,0 @@
|
||||
/* Copyright (C) 2009, 2011 Free Software Foundation, Inc.
|
||||
Contributed by Richard Henderson <rth@redhat.com>.
|
||||
|
||||
This file is part of the GNU Transactional Memory Library (libitm).
|
||||
|
||||
Libitm is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
||||
more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "libitm_i.h"
|
||||
|
||||
namespace {
|
||||
|
||||
using namespace GTM;
|
||||
|
||||
class wbetl_dispatch : public abi_dispatch
|
||||
{
|
||||
private:
|
||||
static const size_t RW_SET_SIZE = 4096;
|
||||
|
||||
struct r_entry
|
||||
{
|
||||
gtm_version version;
|
||||
gtm_stmlock *lock;
|
||||
};
|
||||
|
||||
r_entry *m_rset_entries;
|
||||
size_t m_rset_nb_entries;
|
||||
size_t m_rset_size;
|
||||
|
||||
struct w_entry
|
||||
{
|
||||
/* There's a hashtable where the locks are held, so multiple
|
||||
cachelines can hash to a given bucket. This link points to the
|
||||
possible next cacheline that also hashes to this bucket. */
|
||||
struct w_entry *next;
|
||||
|
||||
/* Every entry in this bucket (accessed by NEXT) has the same LOCK
|
||||
address below. */
|
||||
gtm_stmlock *lock;
|
||||
|
||||
gtm_cacheline *addr;
|
||||
gtm_cacheline *value;
|
||||
gtm_version version;
|
||||
};
|
||||
|
||||
w_entry *m_wset_entries;
|
||||
size_t m_wset_nb_entries;
|
||||
size_t m_wset_size;
|
||||
bool m_wset_reallocate;
|
||||
|
||||
gtm_version m_start;
|
||||
gtm_version m_end;
|
||||
|
||||
gtm_cacheline_page *m_cache_page;
|
||||
unsigned m_n_cache_page;
|
||||
|
||||
private:
|
||||
bool local_w_entry_p (w_entry *w);
|
||||
bool has_read (gtm_stmlock *lock);
|
||||
bool validate();
|
||||
bool extend();
|
||||
|
||||
gtm_cacheline *do_write_lock(gtm_cacheline *);
|
||||
gtm_cacheline *do_after_write_lock(gtm_cacheline *);
|
||||
const gtm_cacheline *do_read_lock(const gtm_cacheline *, bool);
|
||||
|
||||
public:
|
||||
wbetl_dispatch();
|
||||
|
||||
virtual const gtm_cacheline *read_lock(const gtm_cacheline *, ls_modifier);
|
||||
virtual mask_pair write_lock(gtm_cacheline *, ls_modifier);
|
||||
|
||||
virtual bool trycommit();
|
||||
virtual void rollback();
|
||||
virtual void reinit();
|
||||
virtual void fini();
|
||||
virtual bool trydropreference (void *, size_t);
|
||||
};
|
||||
|
||||
/* Check if W is one of our write locks. */
|
||||
|
||||
inline bool
|
||||
wbetl_dispatch::local_w_entry_p (w_entry *w)
|
||||
{
|
||||
return (m_wset_entries <= w && w < m_wset_entries + m_wset_nb_entries);
|
||||
}
|
||||
|
||||
/* Check if stripe has been read previously. */
|
||||
|
||||
inline bool
|
||||
wbetl_dispatch::has_read (gtm_stmlock *lock)
|
||||
{
|
||||
// ??? Consider using an AA tree to lookup the r_set entries.
|
||||
size_t n = m_rset_nb_entries;
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
if (m_rset_entries[i].lock == lock)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Validate read set, i.e. check if all read addresses are still valid now. */
|
||||
|
||||
bool
|
||||
wbetl_dispatch::validate ()
|
||||
{
|
||||
__sync_synchronize ();
|
||||
|
||||
size_t n = m_rset_nb_entries;
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
{
|
||||
r_entry *r = &m_rset_entries[i];
|
||||
gtm_stmlock l = *r->lock;
|
||||
|
||||
if (gtm_stmlock_owned_p (l))
|
||||
{
|
||||
w_entry *w = (w_entry *) gtm_stmlock_get_addr (l);
|
||||
|
||||
// If someone has locked us, it better be by someone in the
|
||||
// current thread.
|
||||
if (!local_w_entry_p (w))
|
||||
return false;
|
||||
}
|
||||
else if (gtm_stmlock_get_version (l) != r->version)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Extend the snapshot range. */
|
||||
|
||||
bool
|
||||
wbetl_dispatch::extend ()
|
||||
{
|
||||
gtm_version now = gtm_get_clock ();
|
||||
|
||||
if (validate ())
|
||||
{
|
||||
m_end = now;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Acquire a write lock on ADDR. */
|
||||
|
||||
gtm_cacheline *
|
||||
wbetl_dispatch::do_write_lock(gtm_cacheline *addr)
|
||||
{
|
||||
gtm_stmlock *lock;
|
||||
gtm_stmlock l, l2;
|
||||
gtm_version version;
|
||||
w_entry *w, *prev = NULL;
|
||||
|
||||
lock = gtm_get_stmlock (addr);
|
||||
l = *lock;
|
||||
|
||||
restart_no_load:
|
||||
if (gtm_stmlock_owned_p (l))
|
||||
{
|
||||
w = (w_entry *) gtm_stmlock_get_addr (l);
|
||||
|
||||
/* Did we previously write the same address? */
|
||||
if (local_w_entry_p (w))
|
||||
{
|
||||
prev = w;
|
||||
while (1)
|
||||
{
|
||||
if (addr == prev->addr)
|
||||
return prev->value;
|
||||
if (prev->next == NULL)
|
||||
break;
|
||||
prev = prev->next;
|
||||
}
|
||||
|
||||
/* Get version from previous entry write set. */
|
||||
version = prev->version;
|
||||
|
||||
/* If there's not enough entries, we must reallocate the array,
|
||||
which invalidates all pointers to write set entries, which
|
||||
means we have to restart the transaction. */
|
||||
if (m_wset_nb_entries == m_wset_size)
|
||||
{
|
||||
m_wset_size *= 2;
|
||||
m_wset_reallocate = true;
|
||||
gtm_tx()->restart (RESTART_REALLOCATE);
|
||||
}
|
||||
|
||||
w = &m_wset_entries[m_wset_nb_entries];
|
||||
goto do_write;
|
||||
}
|
||||
|
||||
gtm_tx()->restart (RESTART_LOCKED_WRITE);
|
||||
}
|
||||
else
|
||||
{
|
||||
version = gtm_stmlock_get_version (l);
|
||||
|
||||
/* We might have read an older version previously. */
|
||||
if (version > m_end)
|
||||
{
|
||||
if (has_read (lock))
|
||||
gtm_tx()->restart (RESTART_VALIDATE_WRITE);
|
||||
}
|
||||
|
||||
/* Extend write set, aborting to reallocate write set entries. */
|
||||
if (m_wset_nb_entries == m_wset_size)
|
||||
{
|
||||
m_wset_size *= 2;
|
||||
m_wset_reallocate = true;
|
||||
gtm_tx()->restart (RESTART_REALLOCATE);
|
||||
}
|
||||
|
||||
/* Acquire the lock. */
|
||||
w = &m_wset_entries[m_wset_nb_entries];
|
||||
l2 = gtm_stmlock_set_owned (w);
|
||||
l = __sync_val_compare_and_swap (lock, l, l2);
|
||||
if (l != l2)
|
||||
goto restart_no_load;
|
||||
}
|
||||
|
||||
do_write:
|
||||
m_wset_nb_entries++;
|
||||
if (prev != NULL)
|
||||
prev->next = w;
|
||||
w->next = 0;
|
||||
w->lock = lock;
|
||||
w->addr = addr;
|
||||
w->version = version;
|
||||
|
||||
gtm_cacheline_page *page = m_cache_page;
|
||||
unsigned index = m_n_cache_page;
|
||||
|
||||
if (page == NULL || index == gtm_cacheline_page::LINES)
|
||||
{
|
||||
gtm_cacheline_page *npage = new gtm_cacheline_page;
|
||||
npage->prev = page;
|
||||
m_cache_page = page = npage;
|
||||
m_n_cache_page = 1;
|
||||
index = 0;
|
||||
}
|
||||
else
|
||||
m_n_cache_page = index + 1;
|
||||
|
||||
gtm_cacheline *line = &page->lines[index];
|
||||
w->value = line;
|
||||
page->masks[index] = 0;
|
||||
*line = *addr;
|
||||
|
||||
return line;
|
||||
}
|
||||
|
||||
gtm_cacheline *
|
||||
wbetl_dispatch::do_after_write_lock (gtm_cacheline *addr)
|
||||
{
|
||||
gtm_stmlock *lock;
|
||||
gtm_stmlock l;
|
||||
w_entry *w;
|
||||
|
||||
lock = gtm_get_stmlock (addr);
|
||||
l = *lock;
|
||||
assert (gtm_stmlock_owned_p (l));
|
||||
|
||||
w = (w_entry *) gtm_stmlock_get_addr (l);
|
||||
assert (local_w_entry_p (w));
|
||||
|
||||
while (1)
|
||||
{
|
||||
if (addr == w->addr)
|
||||
return w->value;
|
||||
w = w->next;
|
||||
}
|
||||
}
|
||||
|
||||
/* Acquire a read lock on ADDR. */
|
||||
|
||||
const gtm_cacheline *
|
||||
wbetl_dispatch::do_read_lock (const gtm_cacheline *addr, bool after_read)
|
||||
{
|
||||
gtm_stmlock *lock;
|
||||
gtm_stmlock l, l2;
|
||||
gtm_version version;
|
||||
w_entry *w;
|
||||
|
||||
lock = gtm_get_stmlock (addr);
|
||||
l = *lock;
|
||||
|
||||
restart_no_load:
|
||||
if (gtm_stmlock_owned_p (l))
|
||||
{
|
||||
w = (w_entry *) gtm_stmlock_get_addr (l);
|
||||
|
||||
/* Did we previously write the same address? */
|
||||
if (local_w_entry_p (w))
|
||||
{
|
||||
while (1)
|
||||
{
|
||||
if (addr == w->addr)
|
||||
return w->value;
|
||||
if (w->next == NULL)
|
||||
return addr;
|
||||
w = w->next;
|
||||
}
|
||||
}
|
||||
|
||||
gtm_tx()->restart (RESTART_LOCKED_READ);
|
||||
}
|
||||
|
||||
version = gtm_stmlock_get_version (l);
|
||||
|
||||
/* If version is no longer valid, re-validate the read set. */
|
||||
if (version > m_end)
|
||||
{
|
||||
if (!extend ())
|
||||
gtm_tx()->restart (RESTART_VALIDATE_READ);
|
||||
|
||||
if (!after_read)
|
||||
{
|
||||
// Verify that the version has not yet been overwritten. The read
|
||||
// value has not yet been added to read set and may not have been
|
||||
// checked during the extend.
|
||||
//
|
||||
// ??? This only makes sense if we're actually reading the value
|
||||
// and returning it now -- which I believe the original TinySTM
|
||||
// did. This doesn't make a whole lot of sense when we're
|
||||
// manipulating cachelines as we are now. Do we need some other
|
||||
// form of lock verification here, or is the validate call in
|
||||
// trycommit sufficient?
|
||||
|
||||
__sync_synchronize ();
|
||||
l2 = *lock;
|
||||
if (l != l2)
|
||||
{
|
||||
l = l2;
|
||||
goto restart_no_load;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!after_read)
|
||||
{
|
||||
r_entry *r;
|
||||
|
||||
/* Add the address and version to the read set. */
|
||||
if (m_rset_nb_entries == m_rset_size)
|
||||
{
|
||||
m_rset_size *= 2;
|
||||
|
||||
m_rset_entries = (r_entry *)
|
||||
xrealloc (m_rset_entries, m_rset_size * sizeof(r_entry));
|
||||
}
|
||||
r = &m_rset_entries[m_rset_nb_entries++];
|
||||
r->version = version;
|
||||
r->lock = lock;
|
||||
}
|
||||
|
||||
return addr;
|
||||
}
|
||||
|
||||
const gtm_cacheline *
|
||||
wbetl_dispatch::read_lock (const gtm_cacheline *addr, ls_modifier ltype)
|
||||
{
|
||||
switch (ltype)
|
||||
{
|
||||
case NONTXNAL:
|
||||
return addr;
|
||||
case R:
|
||||
return do_read_lock (addr, false);
|
||||
case RaR:
|
||||
return do_read_lock (addr, true);
|
||||
case RaW:
|
||||
return do_after_write_lock (const_cast<gtm_cacheline *>(addr));
|
||||
case RfW:
|
||||
return do_write_lock (const_cast<gtm_cacheline *>(addr));
|
||||
default:
|
||||
abort ();
|
||||
}
|
||||
}
|
||||
|
||||
abi_dispatch::mask_pair
|
||||
wbetl_dispatch::write_lock (gtm_cacheline *addr, ls_modifier ltype)
|
||||
{
|
||||
gtm_cacheline *line;
|
||||
|
||||
switch (ltype)
|
||||
{
|
||||
case NONTXNAL:
|
||||
return mask_pair (addr, &mask_sink);
|
||||
case W:
|
||||
case WaR:
|
||||
line = do_write_lock (addr);
|
||||
break;
|
||||
case WaW:
|
||||
line = do_after_write_lock (addr);
|
||||
break;
|
||||
default:
|
||||
abort ();
|
||||
}
|
||||
|
||||
return mask_pair (line, gtm_cacheline_page::mask_for_page_line (line));
|
||||
}
|
||||
|
||||
/* Commit the transaction. */
|
||||
|
||||
bool
|
||||
wbetl_dispatch::trycommit ()
|
||||
{
|
||||
const size_t n = m_wset_nb_entries;
|
||||
if (n != 0)
|
||||
{
|
||||
/* Get commit timestamp. */
|
||||
gtm_version t = gtm_inc_clock ();
|
||||
|
||||
/* Validate only if a concurrent transaction has started since. */
|
||||
if (m_start != t - 1 && !validate ())
|
||||
return false;
|
||||
|
||||
/* Install new versions. */
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
{
|
||||
w_entry *w = &m_wset_entries[i];
|
||||
gtm_cacheline_mask mask
|
||||
= *gtm_cacheline_page::mask_for_page_line (w->value);
|
||||
|
||||
/* Filter out any updates that overlap the libitm stack. */
|
||||
mask = gtm_mask_stack (w->addr, mask);
|
||||
|
||||
gtm_cacheline::copy_mask (w->addr, w->value, mask);
|
||||
}
|
||||
|
||||
/* Only emit barrier after all cachelines are copied. */
|
||||
gtm_cacheline::copy_mask_wb ();
|
||||
|
||||
/* Drop locks. */
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
{
|
||||
w_entry *w = &m_wset_entries[i];
|
||||
|
||||
/* Every link along the chain has the same lock, but only
|
||||
bother dropping the lock once per bucket (at the end). */
|
||||
if (w->next == NULL)
|
||||
*w->lock = gtm_stmlock_set_version (t);
|
||||
}
|
||||
}
|
||||
|
||||
__sync_synchronize ();
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
wbetl_dispatch::rollback ()
|
||||
{
|
||||
/* Drop locks. */
|
||||
const size_t n = m_wset_nb_entries;
|
||||
for (size_t i = 0; i < n; ++i)
|
||||
{
|
||||
w_entry *w = &m_wset_entries[i];
|
||||
|
||||
/* Every link along the chain has the same lock, but only
|
||||
bother dropping the lock once per bucket (at the end). */
|
||||
if (w->next == NULL)
|
||||
*w->lock = gtm_stmlock_set_version (w->version);
|
||||
}
|
||||
|
||||
__sync_synchronize ();
|
||||
}
|
||||
|
||||
void
|
||||
wbetl_dispatch::reinit ()
|
||||
{
|
||||
gtm_cacheline_page *page;
|
||||
|
||||
m_rset_nb_entries = 0;
|
||||
m_wset_nb_entries = 0;
|
||||
|
||||
if (m_wset_reallocate)
|
||||
{
|
||||
m_wset_reallocate = 0;
|
||||
m_wset_entries = (w_entry *)
|
||||
xrealloc (m_wset_entries, m_wset_size * sizeof(w_entry));
|
||||
}
|
||||
|
||||
page = m_cache_page;
|
||||
if (page)
|
||||
{
|
||||
/* Release all but one of the pages of cachelines. */
|
||||
gtm_cacheline_page *prev = page->prev;
|
||||
if (prev)
|
||||
{
|
||||
page->prev = 0;
|
||||
delete prev;
|
||||
}
|
||||
|
||||
/* Start the next cacheline allocation from the beginning. */
|
||||
m_n_cache_page = 0;
|
||||
}
|
||||
|
||||
m_start = m_end = gtm_get_clock ();
|
||||
}
|
||||
|
||||
void
|
||||
wbetl_dispatch::fini ()
|
||||
{
|
||||
delete m_cache_page;
|
||||
free (m_rset_entries);
|
||||
free (m_wset_entries);
|
||||
delete this;
|
||||
}
|
||||
|
||||
/* Attempt to drop any internal references to PTR. Return TRUE if successful.
|
||||
|
||||
This is an adaptation of the transactional memcpy function.
|
||||
|
||||
What we do here is flush out the current transactional content of
|
||||
PTR to real memory, and remove the write mask bits associated with
|
||||
it so future commits will ignore this piece of memory. */
|
||||
|
||||
bool
|
||||
wbetl_dispatch::trydropreference (void *ptr, size_t size)
|
||||
{
|
||||
if (size == 0)
|
||||
return true;
|
||||
|
||||
if (!validate ())
|
||||
return false;
|
||||
|
||||
uintptr_t isrc = (uintptr_t)ptr;
|
||||
// The position in the source cacheline where *PTR starts.
|
||||
uintptr_t sofs = isrc & (CACHELINE_SIZE - 1);
|
||||
gtm_cacheline *src
|
||||
= reinterpret_cast<gtm_cacheline *>(isrc & -CACHELINE_SIZE);
|
||||
unsigned char *dst = (unsigned char *)ptr;
|
||||
abi_dispatch::mask_pair pair;
|
||||
|
||||
// If we're trying to drop a reference, we should already have a
|
||||
// write lock on it. If we don't have one, there's no work to do.
|
||||
if (!gtm_stmlock_owned_p (*gtm_get_stmlock (src)))
|
||||
return true;
|
||||
|
||||
// We copy the data in three stages:
|
||||
|
||||
// (a) Copy stray bytes at the beginning that are smaller than a
|
||||
// cacheline.
|
||||
if (sofs != 0)
|
||||
{
|
||||
size_t sleft = CACHELINE_SIZE - sofs;
|
||||
size_t min = (size <= sleft ? size : sleft);
|
||||
|
||||
// WaW will give us the current locked entry.
|
||||
pair = this->write_lock (src, WaW);
|
||||
|
||||
// *jedi mind wave*...these aren't the droids you're looking for.
|
||||
*pair.mask &= ~((((gtm_cacheline_mask)1 << min) - 1) << sofs);
|
||||
|
||||
memcpy (dst, &pair.line->b[sofs], min);
|
||||
dst += min;
|
||||
src++;
|
||||
size -= min;
|
||||
}
|
||||
|
||||
// (b) Copy subsequent cacheline sized chunks.
|
||||
while (size >= CACHELINE_SIZE)
|
||||
{
|
||||
pair = this->write_lock(src, WaW);
|
||||
*pair.mask = 0;
|
||||
memcpy (dst, pair.line, CACHELINE_SIZE);
|
||||
dst += CACHELINE_SIZE;
|
||||
src++;
|
||||
size -= CACHELINE_SIZE;
|
||||
}
|
||||
|
||||
// (c) Copy anything left over.
|
||||
if (size != 0)
|
||||
{
|
||||
pair = this->write_lock(src, WaW);
|
||||
*pair.mask &= ~(((gtm_cacheline_mask)1 << size) - 1);
|
||||
memcpy (dst, pair.line, size);
|
||||
}
|
||||
|
||||
// No need to drop locks, since we're going to abort the transaction
|
||||
// anyhow.
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
wbetl_dispatch::wbetl_dispatch ()
|
||||
: abi_dispatch (false, false)
|
||||
{
|
||||
m_rset_entries = (r_entry *) xmalloc (RW_SET_SIZE * sizeof(r_entry));
|
||||
m_rset_nb_entries = 0;
|
||||
m_rset_size = RW_SET_SIZE;
|
||||
|
||||
m_wset_entries = (w_entry *) xmalloc (RW_SET_SIZE * sizeof(w_entry));
|
||||
m_wset_nb_entries = 0;
|
||||
m_wset_size = RW_SET_SIZE;
|
||||
m_wset_reallocate = false;
|
||||
|
||||
m_start = m_end = gtm_get_clock ();
|
||||
|
||||
m_cache_page = 0;
|
||||
m_n_cache_page = 0;
|
||||
}
|
||||
|
||||
} // anon namespace
|
||||
|
||||
abi_dispatch *
|
||||
GTM::dispatch_wbetl ()
|
||||
{
|
||||
return new wbetl_dispatch ();
|
||||
}
|
@ -38,6 +38,7 @@ subdir = testsuite
|
||||
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
|
||||
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
|
||||
am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
|
||||
$(top_srcdir)/../config/asmcfi.m4 \
|
||||
$(top_srcdir)/../config/depstand.m4 \
|
||||
$(top_srcdir)/../config/enable.m4 \
|
||||
$(top_srcdir)/../config/futex.m4 \
|
||||
@ -90,8 +91,6 @@ ECHO_N = @ECHO_N@
|
||||
ECHO_T = @ECHO_T@
|
||||
EGREP = @EGREP@
|
||||
EXEEXT = @EXEEXT@
|
||||
FC = @FC@
|
||||
FCFLAGS = @FCFLAGS@
|
||||
FGREP = @FGREP@
|
||||
GREP = @GREP@
|
||||
INSTALL = @INSTALL@
|
||||
@ -142,7 +141,6 @@ abs_top_srcdir = @abs_top_srcdir@
|
||||
ac_ct_CC = @ac_ct_CC@
|
||||
ac_ct_CXX = @ac_ct_CXX@
|
||||
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
|
||||
ac_ct_FC = @ac_ct_FC@
|
||||
am__include = @am__include@
|
||||
am__leading_dot = @am__leading_dot@
|
||||
am__quote = @am__quote@
|
||||
|
Loading…
Reference in New Issue
Block a user