libitm: Remove unused code.

In particular, unused code that's presenting portability problems.

From-SVN: r181241
This commit is contained in:
Richard Henderson 2011-11-09 14:54:55 -08:00 committed by Richard Henderson
parent cb8010f922
commit 79b1edb6b5
22 changed files with 41 additions and 2669 deletions

View File

@ -1,3 +1,26 @@
2011-11-09 Richard Henderson <rth@redhat.com>
* barrier.tpl, memcpy.cc, memset.cc, method-wbetl.cc: Remove file.
* config/alpha/unaligned.h: Remove file.
* config/generic/unaligned.h: Remove file.
* config/x86/unaligned.h: Remove file.
* config/generic/cachepage.h: Remove file.
* config/posix/cachepage.cc: Remove file.
* config/generic/cacheline.cc: Remove file.
* config/x86/cacheline.cc: Remove file.
* config/generic/cacheline.h (gtm_cacheline): Remove the
store_mask, copy_mask, copy_mask_wb methods.
* config/x86/cacheline.h: Likewise.
* config/alpha/cacheline.h: Fall back to generic after setting size.
* config/generic/tls.cc (gtm_mask_stack): Remove.
* config/x86/x86_avx.cc (GTM_vpperm_shift): Remove.
(GTM_vpalignr_table): Remove.
* config/x86/x86_sse.cc (GTM_palignr_table): Remove.
(GTM_pshift_table): Remove.
* config/libitm_i.h: Don't include cachepage.h.
* Makefile.am (libitm_la_SOURCES): Remove cacheline.cc, cachepage.cc
* Makefile.in, testsuite/Makefile.in: Rebuild.
2011-11-09 Richard Henderson <rth@redhat.com>
* config/x86/cacheline.h (gtm_cacheline::store_mask): Use .byte

View File

@ -41,7 +41,7 @@ libitm_la_LDFLAGS = $(libitm_version_info) $(libitm_version_script) \
libitm_la_SOURCES = \
aatree.cc alloc.cc alloc_c.cc alloc_cpp.cc barrier.cc beginend.cc \
clone.cc cacheline.cc cachepage.cc eh_cpp.cc local.cc \
clone.cc eh_cpp.cc local.cc \
query.cc retry.cc rwlock.cc useraction.cc util.cc \
sjlj.S tls.cc method-serial.cc method-gl.cc

View File

@ -48,6 +48,7 @@ DIST_COMMON = $(am__configure_deps) $(srcdir)/../config.guess \
$(top_srcdir)/configure ChangeLog
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
$(top_srcdir)/../config/asmcfi.m4 \
$(top_srcdir)/../config/depstand.m4 \
$(top_srcdir)/../config/enable.m4 \
$(top_srcdir)/../config/futex.m4 \
@ -94,17 +95,17 @@ am__installdirs = "$(DESTDIR)$(toolexeclibdir)" "$(DESTDIR)$(infodir)" \
LTLIBRARIES = $(toolexeclib_LTLIBRARIES)
libitm_la_LIBADD =
am__libitm_la_SOURCES_DIST = aatree.cc alloc.cc alloc_c.cc \
alloc_cpp.cc barrier.cc beginend.cc clone.cc cacheline.cc \
cachepage.cc eh_cpp.cc local.cc query.cc retry.cc rwlock.cc \
useraction.cc util.cc sjlj.S tls.cc method-serial.cc \
method-gl.cc x86_sse.cc x86_avx.cc futex.cc
alloc_cpp.cc barrier.cc beginend.cc clone.cc eh_cpp.cc \
local.cc query.cc retry.cc rwlock.cc useraction.cc util.cc \
sjlj.S tls.cc method-serial.cc method-gl.cc x86_sse.cc \
x86_avx.cc futex.cc
@ARCH_X86_TRUE@am__objects_1 = x86_sse.lo x86_avx.lo
@ARCH_FUTEX_TRUE@am__objects_2 = futex.lo
am_libitm_la_OBJECTS = aatree.lo alloc.lo alloc_c.lo alloc_cpp.lo \
barrier.lo beginend.lo clone.lo cacheline.lo cachepage.lo \
eh_cpp.lo local.lo query.lo retry.lo rwlock.lo useraction.lo \
util.lo sjlj.lo tls.lo method-serial.lo method-gl.lo \
$(am__objects_1) $(am__objects_2)
barrier.lo beginend.lo clone.lo eh_cpp.lo local.lo query.lo \
retry.lo rwlock.lo useraction.lo util.lo sjlj.lo tls.lo \
method-serial.lo method-gl.lo $(am__objects_1) \
$(am__objects_2)
libitm_la_OBJECTS = $(am_libitm_la_OBJECTS)
DEFAULT_INCLUDES = -I.@am__isrc@
depcomp = $(SHELL) $(top_srcdir)/../depcomp
@ -234,8 +235,6 @@ ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGREP = @EGREP@
EXEEXT = @EXEEXT@
FC = @FC@
FCFLAGS = @FCFLAGS@
FGREP = @FGREP@
GREP = @GREP@
INSTALL = @INSTALL@
@ -286,7 +285,6 @@ abs_top_srcdir = @abs_top_srcdir@
ac_ct_CC = @ac_ct_CC@
ac_ct_CXX = @ac_ct_CXX@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
ac_ct_FC = @ac_ct_FC@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@
@ -371,10 +369,9 @@ libitm_la_LDFLAGS = $(libitm_version_info) $(libitm_version_script) \
-no-undefined
libitm_la_SOURCES = aatree.cc alloc.cc alloc_c.cc alloc_cpp.cc \
barrier.cc beginend.cc clone.cc cacheline.cc cachepage.cc \
eh_cpp.cc local.cc query.cc retry.cc rwlock.cc useraction.cc \
util.cc sjlj.S tls.cc method-serial.cc method-gl.cc \
$(am__append_1) $(am__append_2)
barrier.cc beginend.cc clone.cc eh_cpp.cc local.cc query.cc \
retry.cc rwlock.cc useraction.cc util.cc sjlj.S tls.cc \
method-serial.cc method-gl.cc $(am__append_1) $(am__append_2)
# Automake Documentation:
# If your package has Texinfo files in many directories, you can use the
@ -500,8 +497,6 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/alloc_cpp.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/barrier.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/beginend.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cacheline.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cachepage.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/clone.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/eh_cpp.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/futex.Plo@am__quote@

View File

@ -1,170 +0,0 @@
/* -*- c++ -*- */
/* Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc.
Contributed by Richard Henderson <rth@redhat.com>.
This file is part of the GNU Transactional Memory Library (libitm).
Libitm is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "unaligned.h"
namespace {
using namespace GTM;
template<typename T>
T do_read (const T *ptr, abi_dispatch::lock_type lock)
{
//
// Find the cacheline that holds the current value of *PTR.
//
abi_dispatch *disp = abi_disp();
uintptr_t iptr = reinterpret_cast<uintptr_t>(ptr);
// Normalize PTR by chopping off the bottom bits so we can search
// for PTR in the cacheline hash.
uintptr_t iline = iptr & -CACHELINE_SIZE;
// The position in the resulting cacheline where *PTR is actually stored.
uintptr_t iofs = iptr & (CACHELINE_SIZE - 1);
const gtm_cacheline *pline = reinterpret_cast<const gtm_cacheline *>(iline);
// Search for the actual cacheline that holds the current value of *PTR.
const gtm_cacheline *line = disp->read_lock(pline, lock);
// Point to the position in the cacheline where *PTR is stored.
ptr = reinterpret_cast<const T *>(&line->b[iofs]);
// Straight up loads, because we're either aligned, or we don't care
// about alignment.
//
// If we require alignment on type T, do a straight load if we're
// aligned. Otherwise do a straight load IFF the load fits entirely
// in this cacheline. That is, it won't span multiple cachelines.
if (__builtin_expect (strict_alignment<T>::value
? (iofs & (sizeof (T) - 1)) == 0
: iofs + sizeof(T) <= CACHELINE_SIZE, 1))
{
do_normal_load:
return *ptr;
}
// If alignment on T is necessary, but we're unaligned, yet we fit
// entirely in this cacheline... do the unaligned load dance.
else if (__builtin_expect (strict_alignment<T>::value
&& iofs + sizeof(T) <= CACHELINE_SIZE, 1))
{
do_unaligned_load:
return unaligned_load<T>(ptr);
}
// Otherwise, this load will span multiple cachelines.
else
{
// Get the following cacheline for the rest of the data.
const gtm_cacheline *line2 = disp->read_lock(pline + 1, lock);
// If the two cachelines are adjacent, just load it all in one
// swoop.
if (line2 == line + 1)
{
if (!strict_alignment<T>::value)
goto do_normal_load;
else
goto do_unaligned_load;
}
else
{
// Otherwise, ask the backend to load from two different
// cachelines.
return unaligned_load2<T>(line, line2, iofs);
}
}
}
template<typename T>
void do_write (T *ptr, T val, abi_dispatch::lock_type lock)
{
// Note: See comments for do_read() above for hints on this
// function. Ideally we should abstract out a lot out of these two
// functions, and avoid all this duplication.
abi_dispatch *disp = abi_disp();
uintptr_t iptr = reinterpret_cast<uintptr_t>(ptr);
uintptr_t iline = iptr & -CACHELINE_SIZE;
uintptr_t iofs = iptr & (CACHELINE_SIZE - 1);
gtm_cacheline *pline = reinterpret_cast<gtm_cacheline *>(iline);
gtm_cacheline_mask m = ((gtm_cacheline_mask)2 << (sizeof(T) - 1)) - 1;
abi_dispatch::mask_pair pair = disp->write_lock(pline, lock);
ptr = reinterpret_cast<T *>(&pair.line->b[iofs]);
if (__builtin_expect (strict_alignment<T>::value
? (iofs & (sizeof (val) - 1)) == 0
: iofs + sizeof(val) <= CACHELINE_SIZE, 1))
{
*pair.mask |= m << iofs;
do_normal_store:
*ptr = val;
}
else if (__builtin_expect (strict_alignment<T>::value
&& iofs + sizeof(val) <= CACHELINE_SIZE, 1))
{
*pair.mask |= m << iofs;
do_unaligned_store:
unaligned_store<T>(ptr, val);
}
else
{
*pair.mask |= m << iofs;
abi_dispatch::mask_pair pair2 = disp->write_lock(pline + 1, lock);
uintptr_t ileft = CACHELINE_SIZE - iofs;
*pair2.mask |= m >> ileft;
if (pair2.line == pair.line + 1)
{
if (!strict_alignment<T>::value)
goto do_normal_store;
else
goto do_unaligned_store;
}
else
unaligned_store2<T>(pair.line, pair2.line, iofs, val);
}
}
} /* anonymous namespace */
#define ITM_READ(T, LOCK) \
_ITM_TYPE_##T ITM_REGPARM _ITM_##LOCK##T (const _ITM_TYPE_##T *ptr) \
{ \
return do_read (ptr, abi_dispatch::LOCK); \
}
#define ITM_WRITE(T, LOCK) \
void ITM_REGPARM _ITM_##LOCK##T (_ITM_TYPE_##T *ptr, _ITM_TYPE_##T val) \
{ \
do_write (ptr, val, abi_dispatch::LOCK); \
}
#define ITM_BARRIERS(T) \
ITM_READ(T, R) \
ITM_READ(T, RaR) \
ITM_READ(T, RaW) \
ITM_READ(T, RfW) \
ITM_WRITE(T, W) \
ITM_WRITE(T, WaR) \
ITM_WRITE(T, WaW)

View File

@ -33,90 +33,6 @@
// modification mask, below.
#define CACHELINE_SIZE 64
#ifdef __alpha_bwx__
# include "config/generic/cacheline.h"
#else
// If we don't have byte-word stores, then we'll never be able to
// adjust *all* of the byte loads/stores to be truely atomic. So
// only guarantee 4-byte aligned values atomicly stored, exactly
// like the native system. Use byte zap instructions to accelerate
// sub-word masked stores.
#include "config/generic/cacheline.h"
namespace GTM HIDDEN {
// A gtm_cacheline_mask stores a modified bit for every modified byte
// in the cacheline with which it is associated.
typedef sized_integral<CACHELINE_SIZE / 8>::type gtm_cacheline_mask;
union gtm_cacheline
{
// Byte access to the cacheline.
unsigned char b[CACHELINE_SIZE] __attribute__((aligned(CACHELINE_SIZE)));
// Larger sized access to the cacheline.
uint16_t u16[CACHELINE_SIZE / sizeof(uint16_t)];
uint32_t u32[CACHELINE_SIZE / sizeof(uint32_t)];
uint64_t u64[CACHELINE_SIZE / sizeof(uint64_t)];
gtm_word w[CACHELINE_SIZE / sizeof(gtm_word)];
// Store S into D, but only the bytes specified by M.
static void store_mask(uint32_t *d, uint32_t s, uint8_t m);
static void store_mask(uint64_t *d, uint64_t s, uint8_t m);
// Copy S to D, but only the bytes specified by M.
static void copy_mask (gtm_cacheline * __restrict d,
const gtm_cacheline * __restrict s,
gtm_cacheline_mask m);
// A write barrier to emit after (a series of) copy_mask.
static void copy_mask_wb () { atomic_write_barrier(); }
};
inline void ALWAYS_INLINE
gtm_cacheline::store_mask (uint32_t *d, uint32_t s, uint8_t m)
{
const uint8_t tm = (1 << sizeof(uint32_t)) - 1;
m &= tm;
if (__builtin_expect (m, tm))
{
if (__builtin_expect (m == tm, 1))
*d = s;
else
*d = __builtin_alpha_zap (*d, m) | __builtin_alpha_zapnot (s, m);
}
}
inline void ALWAYS_INLINE
gtm_cacheline::store_mask (uint64_t *d, uint64_t s, uint8_t m)
{
if (__builtin_expect (m, 0xff))
{
if (__builtin_expect (m == 0xff, 1))
*d = s;
else
{
typedef uint32_t *p32 __attribute__((may_alias));
p32 d32 = reinterpret_cast<p32>(d);
if ((m & 0x0f) == 0x0f)
{
d32[0] = s;
m &= 0xf0;
}
else if ((m & 0xf0) == 0xf0)
{
d32[1] = s >> 32;
m &= 0x0f;
}
if (m)
*d = __builtin_alpha_zap (*d, m) | __builtin_alpha_zapnot (s, m);
}
}
}
} // namespace GTM
#endif // __alpha_bwx__
#endif // LIBITM_ALPHA_CACHELINE_H

View File

@ -1,118 +0,0 @@
/* Copyright (C) 2009, 2011 Free Software Foundation, Inc.
Contributed by Richard Henderson <rth@redhat.com>.
This file is part of the GNU Transactional Memory Library (libitm).
Libitm is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#ifndef LIBITM_ALPHA_UNALIGNED_H
#define LIBITM_ALPHA_UNALIGNED_H 1
#define HAVE_ARCH_UNALIGNED_LOAD2_U2 1
#define HAVE_ARCH_UNALIGNED_LOAD2_U4 1
#define HAVE_ARCH_UNALIGNED_LOAD2_U8 1
#ifndef __alpha_bwx__
#define HAVE_ARCH_UNALIGNED_STORE2_U2 1
#endif
#define HAVE_ARCH_UNALIGNED_STORE2_U4 1
#define HAVE_ARCH_UNALIGNED_STORE2_U8 1
#include "config/generic/unaligned.h"
namespace GTM HIDDEN {
template<>
inline uint16_t ALWAYS_INLINE
unaligned_load2<uint16_t>(const gtm_cacheline *c1,
const gtm_cacheline *c2, size_t ofs)
{
uint64_t v1 = c1->u64[CACHELINE_SIZE / sizeof(uint64_t) - 1];
uint64_t v2 = c2->u64[0];
return __builtin_alpha_extwl (v1, ofs) | __builtin_alpha_extwh (v2, ofs);
}
template<>
inline uint32_t ALWAYS_INLINE
unaligned_load2<uint32_t>(const gtm_cacheline *c1,
const gtm_cacheline *c2, size_t ofs)
{
uint64_t v1 = c1->u64[CACHELINE_SIZE / sizeof(uint64_t) - 1];
uint64_t v2 = c2->u64[0];
return __builtin_alpha_extll (v1, ofs) + __builtin_alpha_extlh (v2, ofs);
}
template<>
inline uint64_t ALWAYS_INLINE
unaligned_load2<uint64_t>(const gtm_cacheline *c1,
const gtm_cacheline *c2, size_t ofs)
{
uint64_t v1 = c1->u64[CACHELINE_SIZE / sizeof(uint64_t) - 1];
uint64_t v2 = c2->u64[0];
return __builtin_alpha_extql (v1, ofs) | __builtin_alpha_extqh (v2, ofs);
}
#ifndef __alpha_bwx__
template<>
inline void
unaligned_store2<uint16_t>(gtm_cacheline *c1, gtm_cacheline *c2,
size_t ofs, uint16_t val)
{
uint32_t vl = (uint32_t)val << 24, vh = val >> 8;
gtm_cacheline::store_mask (&c1->u32[CACHELINE_SIZE / 4 - 1], vl, 4);
gtm_cacheline::store_mask (&c2->u32[0], vh, 1);
}
#endif
template<>
inline void
unaligned_store2<uint32_t>(gtm_cacheline *c1, gtm_cacheline *c2,
size_t ofs, uint32_t val)
{
uint64_t vl = __builtin_alpha_insll (val, ofs);
uint64_t ml = __builtin_alpha_insll (~0u, ofs);
uint64_t vh = __builtin_alpha_inslh (val, ofs);
uint64_t mh = __builtin_alpha_inslh (~0u, ofs);
gtm_cacheline::store_mask (&c1->u64[CACHELINE_SIZE / 8 - 1], vl, ml);
gtm_cacheline::store_mask (&c2->u64[0], vh, mh);
}
template<>
inline void
unaligned_store2<uint64_t>(gtm_cacheline *c1, gtm_cacheline *c2,
size_t ofs, uint64_t val)
{
uint64_t vl = __builtin_alpha_insql (val, ofs);
uint64_t ml = __builtin_alpha_insql (~0u, ofs);
uint64_t vh = __builtin_alpha_insqh (val, ofs);
uint64_t mh = __builtin_alpha_insqh (~0u, ofs);
gtm_cacheline::store_mask (&c1->u64[CACHELINE_SIZE / 8 - 1], vl, ml);
gtm_cacheline::store_mask (&c2->u64[0], vh, mh);
}
} // namespace GTM
#endif // LIBITM_ALPHA_UNALIGNED_H

View File

@ -1,49 +0,0 @@
/* Copyright (C) 2009, 2011 Free Software Foundation, Inc.
Contributed by Richard Henderson <rth@redhat.com>.
This file is part of the GNU Transactional Memory Library (libitm).
Libitm is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "libitm_i.h"
namespace GTM HIDDEN {
void
gtm_cacheline::copy_mask (gtm_cacheline * __restrict d,
const gtm_cacheline * __restrict s,
gtm_cacheline_mask m)
{
const size_t n = sizeof (gtm_word);
if (m == (gtm_cacheline_mask) -1)
{
*d = *s;
return;
}
if (__builtin_expect (m == 0, 0))
return;
for (size_t i = 0; i < CACHELINE_SIZE / n; ++i, m >>= n)
store_mask (&d->w[i], s->w[i], m);
}
} // namespace GTM

View File

@ -51,57 +51,8 @@ union gtm_cacheline
uint32_t u32[CACHELINE_SIZE / sizeof(uint32_t)];
uint64_t u64[CACHELINE_SIZE / sizeof(uint64_t)];
gtm_word w[CACHELINE_SIZE / sizeof(gtm_word)];
// Store S into D, but only the bytes specified by M.
template<typename T> static void store_mask (T *d, T s, uint8_t m);
// Copy S to D, but only the bytes specified by M.
static void copy_mask (gtm_cacheline * __restrict d,
const gtm_cacheline * __restrict s,
gtm_cacheline_mask m);
// A write barrier to emit after (a series of) copy_mask.
// When we're emitting non-temporal stores, the normal strong
// ordering of the machine doesn't apply.
static void copy_mask_wb () { atomic_write_barrier(); }
};
template<typename T>
inline void
gtm_cacheline::store_mask (T *d, T s, uint8_t m)
{
const uint8_t tm = (1 << sizeof(T)) - 1;
if (__builtin_expect (m & tm, tm))
{
if (__builtin_expect ((m & tm) == tm, 1))
*d = s;
else
{
const int half = sizeof(T) / 2;
typedef typename sized_integral<half>::type half_t;
half_t *dhalf = reinterpret_cast<half_t *>(d);
half_t s1, s2;
if (WORDS_BIGENDIAN)
s1 = s >> half*8, s2 = s;
else
s1 = s, s2 = s >> half*8;
store_mask (dhalf, s1, m);
store_mask (dhalf + 1, s2, m >> half);
}
}
}
template<>
inline void ALWAYS_INLINE
gtm_cacheline::store_mask<uint8_t> (uint8_t *d, uint8_t s, uint8_t m)
{
if (m & 1)
*d = s;
}
} // namespace GTM
#endif // LIBITM_CACHELINE_H

View File

@ -1,77 +0,0 @@
/* Copyright (C) 2009, 2011 Free Software Foundation, Inc.
Contributed by Richard Henderson <rth@redhat.com>.
This file is part of the GNU Transactional Memory Library (libitm).
Libitm is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#ifndef LIBITM_CACHEPAGE_H
#define LIBITM_CACHEPAGE_H 1
namespace GTM HIDDEN {
// A "page" worth of saved cachelines plus modification masks. This
// arrangement is intended to minimize the overhead of alignment. The
// PAGE_SIZE defined by the target must be a constant for this to work,
// which means that this definition may not be the same as the real
// system page size. An additional define of FIXED_PAGE_SIZE by the
// target indicates that PAGE_SIZE exactly matches the system page size.
#ifndef PAGE_SIZE
#define PAGE_SIZE 4096
#endif
struct gtm_cacheline_page
{
static const size_t LINES
= ((PAGE_SIZE - sizeof(gtm_cacheline_page *))
/ (CACHELINE_SIZE + sizeof(gtm_cacheline_mask)));
gtm_cacheline lines[LINES] __attribute__((aligned(PAGE_SIZE)));
gtm_cacheline_mask masks[LINES];
gtm_cacheline_page *prev;
static gtm_cacheline_page *
page_for_line (gtm_cacheline *c)
{
return (gtm_cacheline_page *)((uintptr_t)c & -PAGE_SIZE);
}
gtm_cacheline_mask *
mask_for_line (gtm_cacheline *c)
{
size_t index = c - &this->lines[0];
return &this->masks[index];
}
static gtm_cacheline_mask *
mask_for_page_line (gtm_cacheline *c)
{
gtm_cacheline_page *p = page_for_line (c);
return p->mask_for_line (c);
}
static void *operator new (size_t);
static void operator delete (void *);
};
} // namespace GTM
#endif // LIBITM_CACHEPAGE_H

View File

@ -30,51 +30,4 @@ namespace GTM HIDDEN {
__thread gtm_thread_tls _gtm_thr_tls;
#endif
// Filter out any updates that overlap the libitm stack, as defined by
// TOP (entry point to library) and BOT (below current function). This
// definition should be fine for all stack-grows-down architectures.
gtm_cacheline_mask __attribute__((noinline))
gtm_mask_stack(gtm_cacheline *line, gtm_cacheline_mask mask)
{
void *top = gtm_thr()->jb.cfa;
void *bot = __builtin_dwarf_cfa();
// We must have come through an entry point that set TOP.
assert (top != NULL);
if (line + 1 < bot)
{
// Since we don't have the REAL stack boundaries for this thread,
// we cannot know if this is a dead write to a stack address below
// the current function or if it is write to another VMA. In either
// case allowing the write should not affect correctness.
}
else if (line >= top)
{
// A valid write to an address in an outer stack frame, or a write
// to another VMA.
}
else
{
uintptr_t diff = (uintptr_t)top - (uintptr_t)line;
if (diff >= CACHELINE_SIZE)
{
// The write is either fully within the proscribed area, or the tail
// of the cacheline overlaps the proscribed area. Assume that all
// stacks are at least cacheline aligned and declare the head of the
// cacheline dead.
mask = 0;
}
else
{
// The head of the cacheline is within the proscribed area, but the
// tail of the cacheline is live. Eliminate the dead writes.
mask &= (gtm_cacheline_mask)-1 << diff;
}
}
return mask;
}
} // namespace GTM

View File

@ -1,228 +0,0 @@
/* Copyright (C) 2009, 2011 Free Software Foundation, Inc.
Contributed by Richard Henderson <rth@redhat.com>.
This file is part of the GNU Transactional Memory Library (libitm).
Libitm is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#ifndef LIBITM_UNALIGNED_H
#define LIBITM_UNALIGNED_H 1
namespace GTM HIDDEN {
#ifndef STRICT_ALIGNMENT
#define STRICT_ALIGNMENT 1
#endif
// A type trait for whether type T requires strict alignment.
// The generic types are assumed to all be the same; specializations
// for target-specific types should be done in config/cpu/unaligned.h.
template<typename T>
struct strict_alignment
: public std::integral_constant<bool, STRICT_ALIGNMENT>
{ };
// A helper template for accessing an integral type the same size as T
template<typename T>
struct make_integral
: public sized_integral<sizeof(T)>
{ };
// A helper class for accessing T as an unaligned value.
template<typename T>
struct __attribute__((packed)) unaligned_helper
{ T x; };
// A helper class for view-converting T as an integer.
template<typename T>
union view_convert_helper
{
typedef T type;
typedef make_integral<T> itype;
type t;
itype i;
};
// Generate an unaligned load sequence.
// The compiler knows how to do this for any specific type.
template<typename T>
inline T ALWAYS_INLINE
unaligned_load(const void *t)
{
typedef unaligned_helper<T> UT;
const UT *ut = reinterpret_cast<const UT *>(t);
return ut->x;
}
// Generate an unaligned store sequence.
template<typename T>
inline void ALWAYS_INLINE
unaligned_store(void *t, T val)
{
typedef unaligned_helper<T> UT;
UT *ut = reinterpret_cast<UT *>(t);
ut->x = val;
}
// Generate an unaligned load from two different cachelines.
// It is known that OFS + SIZEOF(T) > CACHELINE_SIZE.
template<typename T>
inline T ALWAYS_INLINE
unaligned_load2(const gtm_cacheline *c1, const gtm_cacheline *c2, size_t ofs)
{
size_t left = CACHELINE_SIZE - ofs;
T ret;
memcpy (&ret, &c1->b[ofs], left);
memcpy ((char *)&ret + ofs, c2, sizeof(T) - left);
return ret;
}
// Generate an unaligned store into two different cachelines.
// It is known that OFS + SIZEOF(T) > CACHELINE_SIZE.
template<typename T>
inline void ALWAYS_INLINE
unaligned_store2(gtm_cacheline *c1, gtm_cacheline *c2, size_t ofs, T val)
{
size_t left = CACHELINE_SIZE - ofs;
memcpy (&c1->b[ofs], &val, left);
memcpy (c2, (char *)&val + left, sizeof(T) - left);
}
#ifndef HAVE_ARCH_UNALIGNED_LOAD2_U2
template<>
inline uint16_t ALWAYS_INLINE
unaligned_load2<uint16_t>(const gtm_cacheline *c1,
const gtm_cacheline *c2, size_t ofs)
{
uint16_t v1 = c1->b[CACHELINE_SIZE - 1];
uint16_t v2 = c2->b[0];
if (WORDS_BIGENDIAN)
return v1 << 8 | v2;
else
return v2 << 8 | v1;
}
#endif
#ifndef HAVE_ARCH_UNALIGNED_LOAD2_U4
template<>
inline uint32_t ALWAYS_INLINE
unaligned_load2<uint32_t>(const gtm_cacheline *c1,
const gtm_cacheline *c2, size_t ofs)
{
uint32_t v1 = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 1];
uint32_t v2 = c2->u32[0];
int s2 = (ofs & (sizeof(uint32_t) - 1)) * 8;
int s1 = sizeof(uint32_t) * 8 - s2;
if (WORDS_BIGENDIAN)
return v1 << s2 | v2 >> s1;
else
return v2 << s2 | v1 >> s1;
}
#endif
#ifndef HAVE_ARCH_UNALIGNED_LOAD2_U8
template<>
inline uint64_t ALWAYS_INLINE
unaligned_load2<uint64_t>(const gtm_cacheline *c1,
const gtm_cacheline *c2, size_t ofs)
{
uint64_t v1 = c1->u64[CACHELINE_SIZE / sizeof(uint64_t) - 1];
uint64_t v2 = c2->u64[0];
int s2 = (ofs & (sizeof(uint64_t) - 1)) * 8;
int s1 = sizeof(uint64_t) * 8 - s2;
if (WORDS_BIGENDIAN)
return v1 << s2 | v2 >> s1;
else
return v2 << s2 | v1 >> s1;
}
#endif
template<>
inline float ALWAYS_INLINE
unaligned_load2<float>(const gtm_cacheline *c1,
const gtm_cacheline *c2, size_t ofs)
{
typedef view_convert_helper<float> VC; VC vc;
vc.i = unaligned_load2<VC::itype>(c1, c2, ofs);
return vc.t;
}
template<>
inline double ALWAYS_INLINE
unaligned_load2<double>(const gtm_cacheline *c1,
const gtm_cacheline *c2, size_t ofs)
{
typedef view_convert_helper<double> VC; VC vc;
vc.i = unaligned_load2<VC::itype>(c1, c2, ofs);
return vc.t;
}
#ifndef HAVE_ARCH_UNALIGNED_STORE2_U2
template<>
inline void ALWAYS_INLINE
unaligned_store2<uint16_t>(gtm_cacheline *c1, gtm_cacheline *c2,
size_t ofs, uint16_t val)
{
uint8_t vl = val, vh = val >> 8;
if (WORDS_BIGENDIAN)
{
c1->b[CACHELINE_SIZE - 1] = vh;
c2->b[0] = vl;
}
else
{
c1->b[CACHELINE_SIZE - 1] = vl;
c2->b[0] = vh;
}
}
#endif
#if 0
#ifndef HAVE_ARCH_UNALIGNED_STORE2_U4
template<>
inline void ALWAYS_INLINE
unaligned_store2<uint32_t>(gtm_cacheline *c1, gtm_cacheline *c2,
size_t ofs, uint32_t val)
{
// ??? We could reuse the store_mask stuff here.
}
#endif
template<>
inline void ALWAYS_INLINE
unaligned_store2<float>(gtm_cacheline *c1, gtm_cacheline *c2,
size_t ofs, float val)
{
typedef view_convert_helper<float> VC; VC vc;
vc.t = val;
unaligned_store2(c1, c2, ofs, vc.i);
}
#endif
} // namespace GTM
#endif // LIBITM_UNALIGNED_H

View File

@ -1,183 +0,0 @@
/* Copyright (C) 2009, 2011 Free Software Foundation, Inc.
Contributed by Richard Henderson <rth@redhat.com>.
This file is part of the GNU Transactional Memory Library (libitm).
Libitm is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "libitm_i.h"
#include <pthread.h>
//
// We have three possibilities for alloction: mmap, memalign, posix_memalign
//
#if defined(HAVE_MMAP_ANON) || defined(HAVE_MMAP_DEV_ZERO)
#include <sys/mman.h>
#include <fcntl.h>
#endif
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif
namespace GTM HIDDEN {
#if defined(HAVE_MMAP_ANON)
# if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
# define MAP_ANONYMOUS MAP_ANON
# endif
# define dev_zero -1
#elif defined(HAVE_MMAP_DEV_ZERO)
# ifndef MAP_ANONYMOUS
# define MAP_ANONYMOUS 0
# endif
static int dev_zero = -1;
#endif
#if defined(HAVE_MMAP_ANON) || defined(HAVE_MMAP_DEV_ZERO)
/* If we get here, we've already opened /dev/zero and verified that
PAGE_SIZE is valid for the system. */
static gtm_cacheline_page * alloc_mmap (void) UNUSED;
static gtm_cacheline_page *
alloc_mmap (void)
{
gtm_cacheline_page *r;
r = (gtm_cacheline_page *) mmap (NULL, PAGE_SIZE, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, dev_zero, 0);
if (r == (gtm_cacheline_page *) MAP_FAILED)
abort ();
return r;
}
#endif /* MMAP_ANON | MMAP_DEV_ZERO */
#ifdef HAVE_MEMALIGN
static gtm_cacheline_page * alloc_memalign (void) UNUSED;
static gtm_cacheline_page *
alloc_memalign (void)
{
gtm_cacheline_page *r;
r = (gtm_cacheline_page *) memalign (PAGE_SIZE, PAGE_SIZE);
if (r == NULL)
abort ();
return r;
}
#endif /* MEMALIGN */
#ifdef HAVE_POSIX_MEMALIGN
static gtm_cacheline_page *alloc_posix_memalign (void) UNUSED;
static gtm_cacheline_page *
alloc_posix_memalign (void)
{
void *r;
if (posix_memalign (&r, PAGE_SIZE, PAGE_SIZE))
abort ();
return (gtm_cacheline_page *) r;
}
#endif /* POSIX_MEMALIGN */
#if defined(HAVE_MMAP_ANON) && defined(FIXED_PAGE_SIZE)
# define alloc_page alloc_mmap
#elif defined(HAVE_MMAP_DEV_ZERO) && defined(FIXED_PAGE_SIZE)
static gtm_cacheline_page *
alloc_page (void)
{
if (dev_zero < 0)
{
dev_zero = open ("/dev/zero", O_RDWR);
assert (dev_zero >= 0);
}
return alloc_mmap ();
}
#elif defined(HAVE_MMAP_ANON) || defined(HAVE_MMAP_DEV_ZERO)
static gtm_cacheline_page * (*alloc_page) (void);
static void __attribute__((constructor))
init_alloc_page (void)
{
size_t page_size = getpagesize ();
if (page_size <= PAGE_SIZE && PAGE_SIZE % page_size == 0)
{
# ifndef HAVE_MMAP_ANON
dev_zero = open ("/dev/zero", O_RDWR);
assert (dev_zero >= 0);
# endif
alloc_page = alloc_mmap;
return;
}
# ifdef HAVE_MEMALIGN
alloc_page = alloc_memalign;
# elif defined(HAVE_POSIX_MEMALIGN)
alloc_page = alloc_posix_memalign;
# else
# error "No fallback aligned memory allocation method"
# endif
}
#elif defined(HAVE_MEMALIGN)
# define alloc_page alloc_memalign
#elif defined(HAVE_POSIX_MEMALIGN)
# define alloc_page alloc_posix_memalign
#else
# error "No aligned memory allocation method"
#endif
static gtm_cacheline_page *free_pages;
static pthread_mutex_t free_page_lock = PTHREAD_MUTEX_INITIALIZER;
void *
gtm_cacheline_page::operator new (size_t size)
{
assert (size == sizeof (gtm_cacheline_page));
assert (size <= PAGE_SIZE);
pthread_mutex_lock(&free_page_lock);
gtm_cacheline_page *r = free_pages;
free_pages = r ? r->prev : NULL;
pthread_mutex_unlock(&free_page_lock);
if (r == NULL)
r = alloc_page ();
return r;
}
void
gtm_cacheline_page::operator delete (void *xhead)
{
gtm_cacheline_page *head = static_cast<gtm_cacheline_page *>(xhead);
gtm_cacheline_page *tail;
if (head == 0)
return;
/* ??? We should eventually really free some of these. */
for (tail = head; tail->prev != 0; tail = tail->prev)
continue;
pthread_mutex_lock(&free_page_lock);
tail->prev = free_pages;
free_pages = head;
pthread_mutex_unlock(&free_page_lock);
}
} // namespace GTM

View File

@ -1,73 +0,0 @@
/* Copyright (C) 2009, 2011 Free Software Foundation, Inc.
Contributed by Richard Henderson <rth@redhat.com>.
This file is part of the GNU Transactional Memory Library (libitm).
Libitm is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "libitm_i.h"
namespace GTM HIDDEN {
uint32_t const gtm_bit_to_byte_mask[16] =
{
0x00000000,
0x000000ff,
0x0000ff00,
0x0000ffff,
0x00ff0000,
0x00ff00ff,
0x00ffff00,
0x00ffffff,
0xff000000,
0xff0000ff,
0xff00ff00,
0xff00ffff,
0xffff0000,
0xffff00ff,
0xffffff00,
0xffffffff
};
#ifdef __SSE2__
# define MEMBER m128i
#else
# define MEMBER w
#endif
void
gtm_cacheline::copy_mask (gtm_cacheline * __restrict d,
const gtm_cacheline * __restrict s,
gtm_cacheline_mask m)
{
if (m == (gtm_cacheline_mask)-1)
{
*d = *s;
return;
}
if (__builtin_expect (m == 0, 0))
return;
size_t n = sizeof(d->MEMBER[0]);
for (size_t i = 0; i < CACHELINE_SIZE / n; ++i, m >>= n)
store_mask (&d->MEMBER[i], s->MEMBER[i], m);
}
} // namespace GTM

View File

@ -40,8 +40,6 @@ namespace GTM HIDDEN {
// in the cacheline with which it is associated.
typedef sized_integral<CACHELINE_SIZE / 8>::type gtm_cacheline_mask;
extern uint32_t const gtm_bit_to_byte_mask[16];
union gtm_cacheline
{
// Byte access to the cacheline.
@ -67,23 +65,6 @@ union gtm_cacheline
__m256i m256i[CACHELINE_SIZE / sizeof(__m256i)];
#endif
// Store S into D, but only the bytes specified by M.
static void store_mask (uint32_t *d, uint32_t s, uint8_t m);
static void store_mask (uint64_t *d, uint64_t s, uint8_t m);
#ifdef __SSE2__
static void store_mask (__m128i *d, __m128i s, uint16_t m);
#endif
// Copy S to D, but only the bytes specified by M.
static void copy_mask (gtm_cacheline * __restrict d,
const gtm_cacheline * __restrict s,
gtm_cacheline_mask m);
// A write barrier to emit after (a series of) copy_mask.
// When we're emitting non-temporal stores, the normal strong
// ordering of the machine doesn't apply.
static void copy_mask_wb ();
#if defined(__SSE__) || defined(__AVX__)
// Copy S to D; only bother defining if we can do this more efficiently
// than the compiler-generated default implementation.
@ -91,14 +72,6 @@ union gtm_cacheline
#endif // SSE, AVX
};
inline void
gtm_cacheline::copy_mask_wb ()
{
#ifdef __SSE2__
_mm_sfence ();
#endif
}
#if defined(__SSE__) || defined(__AVX__)
inline gtm_cacheline& ALWAYS_INLINE
gtm_cacheline::operator= (const gtm_cacheline & __restrict s)
@ -141,104 +114,12 @@ gtm_cacheline::operator= (const gtm_cacheline & __restrict s)
}
return *this;
#undef CP
#undef TYPE
}
#endif
// Support masked integer stores more efficiently with an unlocked cmpxchg
// insn. My reasoning is that while we write to locations that we do not wish
// to modify, we do it in an uninterruptable insn, and so we either truely
// write back the original data or the insn fails -- unlike with a
// load/and/or/write sequence which can be interrupted either by a kernel
// task switch or an unlucky cacheline steal by another processor. Avoiding
// the LOCK prefix improves performance by a factor of 10, and we don't need
// the memory barrier semantics implied by that prefix.
inline void ALWAYS_INLINE
gtm_cacheline::store_mask (uint32_t *d, uint32_t s, uint8_t m)
{
gtm_cacheline_mask tm = (1 << sizeof (s)) - 1;
if (__builtin_expect (m & tm, tm))
{
if (__builtin_expect ((m & tm) == tm, 1))
*d = s;
else
{
gtm_cacheline_mask bm = gtm_bit_to_byte_mask[m & 15];
gtm_word n, o = *d;
__asm("\n0:\t"
"mov %[o], %[n]\n\t"
"and %[m], %[n]\n\t"
"or %[s], %[n]\n\t"
"cmpxchg %[n], %[d]\n\t"
".byte 0x2e\n\t" // predict not-taken, aka jnz,pn
"jnz 0b"
: [d] "+m"(*d), [n] "=&r" (n), [o] "+a"(o)
: [s] "r" (s & bm), [m] "r" (~bm));
}
}
}
inline void ALWAYS_INLINE
gtm_cacheline::store_mask (uint64_t *d, uint64_t s, uint8_t m)
{
gtm_cacheline_mask tm = (1 << sizeof (s)) - 1;
if (__builtin_expect (m & tm, tm))
{
if (__builtin_expect ((m & tm) == tm, 1))
*d = s;
else
{
#ifdef __x86_64__
uint32_t bl = gtm_bit_to_byte_mask[m & 15];
uint32_t bh = gtm_bit_to_byte_mask[(m >> 4) & 15];
gtm_cacheline_mask bm = bl | ((gtm_cacheline_mask)bh << 31 << 1);
uint64_t n, o = *d;
__asm("\n0:\t"
"mov %[o], %[n]\n\t"
"and %[m], %[n]\n\t"
"or %[s], %[n]\n\t"
"cmpxchg %[n], %[d]\n\t"
".byte 0x2e\n\t" // predict not-taken, aka jnz,pn
"jnz 0b"
: [d] "+m"(*d), [n] "=&r" (n), [o] "+a"(o)
: [s] "r" (s & bm), [m] "r" (~bm));
#else
/* ??? While it's possible to perform this operation with
cmpxchg8b, the sequence requires all 7 general registers
and thus cannot be performed with -fPIC. Don't even try. */
uint32_t *d32 = reinterpret_cast<uint32_t *>(d);
store_mask (d32, s, m);
store_mask (d32 + 1, s >> 32, m >> 4);
#endif
}
}
}
#ifdef __SSE2__
inline void ALWAYS_INLINE
gtm_cacheline::store_mask (__m128i *d, __m128i s, uint16_t m)
{
if (__builtin_expect (m == 0, 0))
return;
if (__builtin_expect (m == 0xffff, 1))
*d = s;
else
{
__m128i bm0, bm1, bm2, bm3;
bm0 = _mm_set_epi32 (0, 0, 0, gtm_bit_to_byte_mask[m & 15]); m >>= 4;
bm1 = _mm_set_epi32 (0, 0, 0, gtm_bit_to_byte_mask[m & 15]); m >>= 4;
bm2 = _mm_set_epi32 (0, 0, 0, gtm_bit_to_byte_mask[m & 15]); m >>= 4;
bm3 = _mm_set_epi32 (0, 0, 0, gtm_bit_to_byte_mask[m & 15]); m >>= 4;
bm0 = _mm_unpacklo_epi32 (bm0, bm1);
bm2 = _mm_unpacklo_epi32 (bm2, bm3);
bm0 = _mm_unpacklo_epi64 (bm0, bm2);
_mm_maskmoveu_si128 (s, bm0, (char *)d);
}
}
#endif // SSE2
} // namespace GTM
#endif // LIBITM_CACHELINE_H

View File

@ -1,237 +0,0 @@
/* Copyright (C) 2009, 2011 Free Software Foundation, Inc.
Contributed by Richard Henderson <rth@redhat.com>.
This file is part of the GNU Transactional Memory Library (libitm).
Libitm is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#ifndef LIBITM_X86_UNALIGNED_H
#define LIBITM_X86_UNALIGNED_H 1
#define HAVE_ARCH_UNALIGNED_LOAD2_U4 1
#define HAVE_ARCH_UNALIGNED_LOAD2_U8 1
#include "config/generic/unaligned.h"
namespace GTM HIDDEN {
template<>
inline uint32_t
unaligned_load2<uint32_t>(const gtm_cacheline *c1,
const gtm_cacheline *c2, size_t ofs)
{
uint32_t r, lo, hi;
lo = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 1];
hi = c2->u32[0];
asm("shrd %b2, %1, %0" : "=r"(r) : "r"(hi), "c"((ofs & 3) * 8), "0"(lo));
return r;
}
template<>
inline uint64_t
unaligned_load2<uint64_t>(const gtm_cacheline *c1,
const gtm_cacheline *c2, size_t ofs)
{
#ifdef __x86_64__
uint64_t r, lo, hi;
lo = c1->u64[CACHELINE_SIZE / sizeof(uint64_t) - 1];
hi = c2->u64[0];
asm("shrd %b2, %1, %0" : "=r"(r) : "r"(hi), "c"((ofs & 3) * 8), "0"(lo));
return r;
#else
uint32_t v0, v1, v2;
uint64_t r;
if (ofs < CACHELINE_SIZE - 4)
{
v0 = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 2];
v1 = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 1];
v2 = c2->u32[0];
}
else
{
v0 = c1->u32[CACHELINE_SIZE / sizeof(uint32_t) - 1];
v1 = c2->u32[0];
v2 = c2->u32[1];
}
ofs = (ofs & 3) * 8;
asm("shrd %%cl, %[v1], %[v0]; shrd %%cl, %[v2], %[v1]"
: "=A"(r) : "c"(ofs), [v0] "a"(v0), [v1] "d"(v1), [v2] "r"(v2));
return r;
#endif
}
#if defined(__SSE2__) || defined(__MMX__)
template<>
inline _ITM_TYPE_M64
unaligned_load2<_ITM_TYPE_M64>(const gtm_cacheline *c1,
const gtm_cacheline *c2, size_t ofs)
{
# ifdef __x86_64__
__m128i lo = _mm_movpi64_epi64 (c1->m64[CACHELINE_SIZE / 8 - 1]);
__m128i hi = _mm_movpi64_epi64 (c2->m64[0]);
ofs = (ofs & 7) * 8;
lo = _mm_srli_epi64 (lo, ofs);
hi = _mm_slli_epi64 (hi, 64 - ofs);
lo = lo | hi;
return _mm_movepi64_pi64 (lo);
# else
// On 32-bit we're about to return the result in an MMX register, so go
// ahead and do the computation in that unit, even if SSE2 is available.
__m64 lo = c1->m64[CACHELINE_SIZE / 8 - 1];
__m64 hi = c2->m64[0];
ofs = (ofs & 7) * 8;
lo = _mm_srli_si64 (lo, ofs);
hi = _mm_slli_si64 (hi, 64 - ofs);
return lo | hi;
# endif
}
#endif // SSE2 or MMX
// The SSE types are strictly aligned.
#ifdef __SSE__
template<>
struct strict_alignment<_ITM_TYPE_M128>
: public std::true_type
{ };
// Expand the unaligned SSE move instructions.
template<>
inline _ITM_TYPE_M128
unaligned_load<_ITM_TYPE_M128>(const void *t)
{
return _mm_loadu_ps (static_cast<const float *>(t));
}
template<>
inline void
unaligned_store<_ITM_TYPE_M128>(void *t, _ITM_TYPE_M128 val)
{
_mm_storeu_ps (static_cast<float *>(t), val);
}
#endif // SSE
#ifdef __AVX__
// The AVX types are strictly aligned when it comes to vmovaps vs vmovups.
template<>
struct strict_alignment<_ITM_TYPE_M256>
: public std::true_type
{ };
template<>
inline _ITM_TYPE_M256
unaligned_load<_ITM_TYPE_M256>(const void *t)
{
return _mm256_loadu_ps (static_cast<const float *>(t));
}
template<>
inline void
unaligned_store<_ITM_TYPE_M256>(void *t, _ITM_TYPE_M256 val)
{
_mm256_storeu_ps (static_cast<float *>(t), val);
}
#endif // AVX
#ifdef __XOP__
# define HAVE_ARCH_REALIGN_M128I 1
extern const __v16qi GTM_vpperm_shift[16];
inline __m128i
realign_m128i (__m128i lo, __m128i hi, unsigned byte_count)
{
return _mm_perm_epi8 (lo, hi, GTM_vpperm_shift[byte_count]);
}
#elif defined(__AVX__)
# define HAVE_ARCH_REALIGN_M128I 1
extern "C" const uint64_t GTM_vpalignr_table[16];
inline __m128i
realign_m128i (__m128i lo, __m128i hi, unsigned byte_count)
{
register __m128i xmm0 __asm__("xmm0") = hi;
register __m128i xmm1 __asm__("xmm1") = lo;
__asm("call *%2" : "+x"(xmm0) : "x"(xmm1),
"r"(&GTM_vpalignr_table[byte_count]));
return xmm0;
}
#elif defined(__SSSE3__)
# define HAVE_ARCH_REALIGN_M128I 1
extern "C" const uint64_t GTM_palignr_table[16];
inline __m128i
realign_m128i (__m128i lo, __m128i hi, unsigned byte_count)
{
register __m128i xmm0 __asm__("xmm0") = hi;
register __m128i xmm1 __asm__("xmm1") = lo;
__asm("call *%2" : "+x"(xmm0) : "x"(xmm1),
"r"(&GTM_palignr_table[byte_count]));
return xmm0;
}
#elif defined(__SSE2__)
# define HAVE_ARCH_REALIGN_M128I 1
extern "C" const char GTM_pshift_table[16 * 16];
inline __m128i
realign_m128i (__m128i lo, __m128i hi, unsigned byte_count)
{
register __m128i xmm0 __asm__("xmm0") = lo;
register __m128i xmm1 __asm__("xmm1") = hi;
__asm("call *%2" : "+x"(xmm0), "+x"(xmm1)
: "r"(GTM_pshift_table + byte_count*16));
return xmm0;
}
#endif // XOP, AVX, SSSE3, SSE2
#ifdef HAVE_ARCH_REALIGN_M128I
template<>
inline _ITM_TYPE_M128
unaligned_load2<_ITM_TYPE_M128>(const gtm_cacheline *c1,
const gtm_cacheline *c2, size_t ofs)
{
return (_ITM_TYPE_M128)
realign_m128i (c1->m128i[CACHELINE_SIZE / 16 - 1],
c2->m128i[0], ofs & 15);
}
#endif // HAVE_ARCH_REALIGN_M128I
#ifdef __AVX__
template<>
inline _ITM_TYPE_M256
unaligned_load2<_ITM_TYPE_M256>(const gtm_cacheline *c1,
const gtm_cacheline *c2, size_t ofs)
{
__m128i v0, v1;
__m256i r;
v0 = (__m128i) unaligned_load2<_ITM_TYPE_M128>(c1, c2, ofs);
if (ofs < CACHELINE_SIZE - 16)
v1 = v0, v0 = _mm_loadu_si128 ((const __m128i *) &c1->b[ofs]);
else
v1 = _mm_loadu_si128((const __m128i *)&c2->b[ofs + 16 - CACHELINE_SIZE]);
r = _mm256_castsi128_si256 ((__m128i)v0);
r = _mm256_insertf128_si256 (r, (__m128i)v1, 1);
return (_ITM_TYPE_M256) r;
}
#endif // AVX
} // namespace GTM
#endif // LIBITM_X86_UNALIGNED_H

View File

@ -34,62 +34,3 @@ _ITM_LM256 (const _ITM_TYPE_M256 *ptr)
{
GTM::GTM_LB (ptr, sizeof (*ptr));
}
// Helpers for re-aligning two 128-bit values.
#ifdef __XOP__
const __v16qi GTM::GTM_vpperm_shift[16] =
{
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
{ 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17 },
{ 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 },
{ 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 },
{ 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20 },
{ 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21 },
{ 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22 },
{ 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 },
{ 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24 },
{ 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25 },
{ 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26 },
{ 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 },
{ 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28 },
{ 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29 },
{ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30 },
};
#else
# define INSN0 "movdqa %xmm1, %xmm0"
# define INSN(N) "vpalignr $" #N ", %xmm0, %xmm1, %xmm0"
# define TABLE_ENT_0 INSN0 "\n\tret\n\t"
# define TABLE_ENT(N) ".balign 8\n\t" INSN(N) "\n\tret\n\t"
asm(".pushsection .text\n\
.balign 16\n\
.globl GTM_vpalignr_table\n\
.hidden GTM_vpalignr_table\n\
.type GTM_vpalignr_table, @function\n\
GTM_vpalignr_table:\n\t"
TABLE_ENT_0
TABLE_ENT(1)
TABLE_ENT(2)
TABLE_ENT(3)
TABLE_ENT(4)
TABLE_ENT(5)
TABLE_ENT(6)
TABLE_ENT(7)
TABLE_ENT(8)
TABLE_ENT(9)
TABLE_ENT(10)
TABLE_ENT(11)
TABLE_ENT(12)
TABLE_ENT(13)
TABLE_ENT(14)
TABLE_ENT(15)
".balign 8\n\
.size GTM_vpalignr_table, .-GTM_vpalignr_table\n\
.popsection");
# undef INSN0
# undef INSN
# undef TABLE_ENT_0
# undef TABLE_ENT
#endif

View File

@ -41,82 +41,3 @@ _ITM_LM128 (const _ITM_TYPE_M128 *ptr)
{
GTM::GTM_LB (ptr, sizeof (*ptr));
}
// Helpers for re-aligning two 128-bit values.
#ifdef __SSSE3__
# define INSN0 "movdqa %xmm1, %xmm0"
# define INSN(N) "palignr $" #N ", %xmm1, %xmm0"
# define TABLE_ENT_0 INSN0 "\n\tret\n\t"
# define TABLE_ENT(N) ".balign 8\n\t" INSN(N) "\n\tret\n\t"
asm(".pushsection .text\n\
.balign 16\n\
.globl GTM_palignr_table\n\
.hidden GTM_palignr_table\n\
.type GTM_palignr_table, @function\n\
GTM_palignr_table:\n\t"
TABLE_ENT_0
TABLE_ENT(1)
TABLE_ENT(2)
TABLE_ENT(3)
TABLE_ENT(4)
TABLE_ENT(5)
TABLE_ENT(6)
TABLE_ENT(7)
TABLE_ENT(8)
TABLE_ENT(9)
TABLE_ENT(10)
TABLE_ENT(11)
TABLE_ENT(12)
TABLE_ENT(13)
TABLE_ENT(14)
TABLE_ENT(15)
".balign 8\n\
.size GTM_palignr_table, .-GTM_palignr_table\n\
.popsection");
# undef INSN0
# undef INSN
# undef TABLE_ENT_0
# undef TABLE_ENT
#elif defined(__SSE2__)
# define INSNS_8 "punpcklqdq %xmm1, %xmm0"
# define INSNS(N) "psrldq $"#N", %xmm0\n\t" \
"pslldq $(16-"#N"), %xmm1\n\t" \
"por %xmm1, %xmm0"
# define TABLE_ENT_0 "ret\n\t"
# define TABLE_ENT_8 ".balign 16\n\t" INSNS_8 "\n\tret\n\t"
# define TABLE_ENT(N) ".balign 16\n\t" INSNS(N) "\n\tret\n\t"
asm(".pushsection .text\n\
.balign 16\n\
.globl GTM_pshift_table\n\
.hidden GTM_pshift_table\n\
.type GTM_pshift_table, @function\n\
GTM_pshift_table:\n\t"
TABLE_ENT_0
TABLE_ENT(1)
TABLE_ENT(2)
TABLE_ENT(3)
TABLE_ENT(4)
TABLE_ENT(5)
TABLE_ENT(6)
TABLE_ENT(7)
TABLE_ENT_8
TABLE_ENT(9)
TABLE_ENT(10)
TABLE_ENT(11)
TABLE_ENT(12)
TABLE_ENT(13)
TABLE_ENT(14)
TABLE_ENT(15)
".balign 8\n\
.size GTM_pshift_table, .-GTM_pshift_table\n\
.popsection");
# undef INSNS_8
# undef INSNS
# undef TABLE_ENT_0
# undef TABLE_ENT_8
# undef TABLE_ENT
#endif

View File

@ -78,7 +78,6 @@ enum gtm_restart_reason
#include "rwlock.h"
#include "aatree.h"
#include "cacheline.h"
#include "cachepage.h"
#include "stmlock.h"
#include "dispatch.h"
#include "containers.h"

View File

@ -1,365 +0,0 @@
/* Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc.
Contributed by Richard Henderson <rth@redhat.com>.
This file is part of the GNU Transactional Memory Library (libitm).
Libitm is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "libitm_i.h"
using namespace GTM;
static void
do_memcpy (uintptr_t idst, uintptr_t isrc, size_t size,
abi_dispatch::lock_type W, abi_dispatch::lock_type R)
{
abi_dispatch *disp = abi_disp();
// The position in the destination cacheline where *IDST starts.
uintptr_t dofs = idst & (CACHELINE_SIZE - 1);
// The position in the source cacheline where *ISRC starts.
uintptr_t sofs = isrc & (CACHELINE_SIZE - 1);
const gtm_cacheline *src
= reinterpret_cast<const gtm_cacheline *>(isrc & -CACHELINE_SIZE);
gtm_cacheline *dst
= reinterpret_cast<gtm_cacheline *>(idst & -CACHELINE_SIZE);
const gtm_cacheline *sline;
abi_dispatch::mask_pair dpair;
if (size == 0)
return;
// If both SRC and DST data start at the same position in the cachelines,
// we can easily copy the data in tandem, cacheline by cacheline...
if (dofs == sofs)
{
// We copy the data in three stages:
// (a) Copy stray bytes at the beginning that are smaller than a
// cacheline.
if (sofs != 0)
{
size_t sleft = CACHELINE_SIZE - sofs;
size_t min = (size <= sleft ? size : sleft);
dpair = disp->write_lock(dst, W);
sline = disp->read_lock(src, R);
*dpair.mask |= (((gtm_cacheline_mask)1 << min) - 1) << sofs;
memcpy (&dpair.line->b[sofs], &sline->b[sofs], min);
dst++;
src++;
size -= min;
}
// (b) Copy subsequent cacheline sized chunks.
while (size >= CACHELINE_SIZE)
{
dpair = disp->write_lock(dst, W);
sline = disp->read_lock(src, R);
*dpair.mask = -1;
*dpair.line = *sline;
dst++;
src++;
size -= CACHELINE_SIZE;
}
// (c) Copy anything left over.
if (size != 0)
{
dpair = disp->write_lock(dst, W);
sline = disp->read_lock(src, R);
*dpair.mask |= ((gtm_cacheline_mask)1 << size) - 1;
memcpy (dpair.line, sline, size);
}
}
// ... otherwise, we must copy the data in disparate hunks using
// temporary storage.
else
{
gtm_cacheline c;
size_t sleft = CACHELINE_SIZE - sofs;
sline = disp->read_lock(src, R);
// As above, we copy the data in three stages:
// (a) Copy stray bytes at the beginning that are smaller than a
// cacheline.
if (dofs != 0)
{
size_t dleft = CACHELINE_SIZE - dofs;
size_t min = (size <= dleft ? size : dleft);
dpair = disp->write_lock(dst, W);
*dpair.mask |= (((gtm_cacheline_mask)1 << min) - 1) << dofs;
// If what's left in the source cacheline will fit in the
// rest of the destination cacheline, straight up copy it.
if (min <= sleft)
{
memcpy (&dpair.line->b[dofs], &sline->b[sofs], min);
sofs += min;
}
// Otherwise, we need more bits from the source cacheline
// that are available. Piece together what we need from
// contiguous (source) cachelines, into temp space, and copy
// it over.
else
{
memcpy (&c, &sline->b[sofs], sleft);
sline = disp->read_lock(++src, R);
sofs = min - sleft;
memcpy (&c.b[sleft], sline, sofs);
memcpy (&dpair.line->b[dofs], &c, min);
}
sleft = CACHELINE_SIZE - sofs;
dst++;
size -= min;
}
// (b) Copy subsequent cacheline sized chunks.
while (size >= CACHELINE_SIZE)
{
// We have a full (destination) cacheline where to put the
// data, but to get to the corresponding cacheline sized
// chunk in the source, we have to piece together two
// contiguous source cachelines.
memcpy (&c, &sline->b[sofs], sleft);
sline = disp->read_lock(++src, R);
memcpy (&c.b[sleft], sline, sofs);
dpair = disp->write_lock(dst, W);
*dpair.mask = -1;
*dpair.line = c;
dst++;
size -= CACHELINE_SIZE;
}
// (c) Copy anything left over.
if (size != 0)
{
dpair = disp->write_lock(dst, W);
*dpair.mask |= ((gtm_cacheline_mask)1 << size) - 1;
// If what's left to copy is entirely in the remaining
// source cacheline, do it.
if (size <= sleft)
memcpy (dpair.line, &sline->b[sofs], size);
// Otherwise, piece together the remaining bits, and copy.
else
{
memcpy (&c, &sline->b[sofs], sleft);
sline = disp->read_lock(++src, R);
memcpy (&c.b[sleft], sline, size - sleft);
memcpy (dpair.line, &c, size);
}
}
}
}
static void
do_memmove (uintptr_t idst, uintptr_t isrc, size_t size,
abi_dispatch::lock_type W, abi_dispatch::lock_type R)
{
abi_dispatch *disp = abi_disp();
uintptr_t dleft, sleft, sofs, dofs;
const gtm_cacheline *sline;
abi_dispatch::mask_pair dpair;
if (size == 0)
return;
/* The co-aligned memmove below doesn't work for DST == SRC, so filter
that out. It's tempting to just return here, as this is a no-op move.
However, our caller has the right to expect the locks to be acquired
as advertized. */
if (__builtin_expect (idst == isrc, 0))
{
/* If the write lock is already acquired, nothing to do. */
if (W == abi_dispatch::WaW)
return;
/* If the destination is protected, acquire a write lock. */
if (W != abi_dispatch::NOLOCK)
R = abi_dispatch::RfW;
/* Notice serial mode, where we don't acquire locks at all. */
if (R == abi_dispatch::NOLOCK)
return;
idst = isrc + size;
for (isrc &= -CACHELINE_SIZE; isrc < idst; isrc += CACHELINE_SIZE)
disp->read_lock(reinterpret_cast<const gtm_cacheline *>(isrc), R);
return;
}
/* Fall back to memcpy if the implementation above can handle it. */
if (idst < isrc || isrc + size <= idst)
{
do_memcpy (idst, isrc, size, W, R);
return;
}
/* What remains requires a backward copy from the end of the blocks. */
idst += size;
isrc += size;
dofs = idst & (CACHELINE_SIZE - 1);
sofs = isrc & (CACHELINE_SIZE - 1);
dleft = CACHELINE_SIZE - dofs;
sleft = CACHELINE_SIZE - sofs;
gtm_cacheline *dst
= reinterpret_cast<gtm_cacheline *>(idst & -CACHELINE_SIZE);
const gtm_cacheline *src
= reinterpret_cast<const gtm_cacheline *>(isrc & -CACHELINE_SIZE);
if (dofs == 0)
dst--;
if (sofs == 0)
src--;
if (dofs == sofs)
{
/* Since DST and SRC are co-aligned, and we didn't use the memcpy
optimization above, that implies that SIZE > CACHELINE_SIZE. */
if (sofs != 0)
{
dpair = disp->write_lock(dst, W);
sline = disp->read_lock(src, R);
*dpair.mask |= ((gtm_cacheline_mask)1 << sleft) - 1;
memcpy (dpair.line, sline, sleft);
dst--;
src--;
size -= sleft;
}
while (size >= CACHELINE_SIZE)
{
dpair = disp->write_lock(dst, W);
sline = disp->read_lock(src, R);
*dpair.mask = -1;
*dpair.line = *sline;
dst--;
src--;
size -= CACHELINE_SIZE;
}
if (size != 0)
{
size_t ofs = CACHELINE_SIZE - size;
dpair = disp->write_lock(dst, W);
sline = disp->read_lock(src, R);
*dpair.mask |= (((gtm_cacheline_mask)1 << size) - 1) << ofs;
memcpy (&dpair.line->b[ofs], &sline->b[ofs], size);
}
}
else
{
gtm_cacheline c;
sline = disp->read_lock(src, R);
if (dofs != 0)
{
size_t min = (size <= dofs ? size : dofs);
if (min <= sofs)
{
sofs -= min;
memcpy (&c, &sline->b[sofs], min);
}
else
{
size_t min_ofs = min - sofs;
memcpy (&c.b[min_ofs], sline, sofs);
sline = disp->read_lock(--src, R);
sofs = CACHELINE_SIZE - min_ofs;
memcpy (&c, &sline->b[sofs], min_ofs);
}
dofs = dleft - min;
dpair = disp->write_lock(dst, W);
*dpair.mask |= (((gtm_cacheline_mask)1 << min) - 1) << dofs;
memcpy (&dpair.line->b[dofs], &c, min);
sleft = CACHELINE_SIZE - sofs;
dst--;
size -= min;
}
while (size >= CACHELINE_SIZE)
{
memcpy (&c.b[sleft], sline, sofs);
sline = disp->read_lock(--src, R);
memcpy (&c, &sline->b[sofs], sleft);
dpair = disp->write_lock(dst, W);
*dpair.mask = -1;
*dpair.line = c;
dst--;
size -= CACHELINE_SIZE;
}
if (size != 0)
{
dofs = CACHELINE_SIZE - size;
memcpy (&c.b[sleft], sline, sofs);
if (sleft > dofs)
{
sline = disp->read_lock(--src, R);
memcpy (&c, &sline->b[sofs], sleft);
}
dpair = disp->write_lock(dst, W);
*dpair.mask |= (gtm_cacheline_mask)-1 << dofs;
memcpy (&dpair.line->b[dofs], &c.b[dofs], size);
}
}
}
#define ITM_MEM_DEF(NAME, READ, WRITE) \
void ITM_REGPARM _ITM_memcpy##NAME(void *dst, const void *src, size_t size) \
{ \
do_memcpy ((uintptr_t)dst, (uintptr_t)src, size, \
abi_dispatch::WRITE, abi_dispatch::READ); \
} \
void ITM_REGPARM _ITM_memmove##NAME(void *dst, const void *src, size_t size) \
{ \
do_memmove ((uintptr_t)dst, (uintptr_t)src, size, \
abi_dispatch::WRITE, abi_dispatch::READ); \
}
ITM_MEM_DEF(RnWt, NOLOCK, W)
ITM_MEM_DEF(RnWtaR, NOLOCK, WaR)
ITM_MEM_DEF(RnWtaW, NOLOCK, WaW)
ITM_MEM_DEF(RtWn, R, NOLOCK)
ITM_MEM_DEF(RtWt, R, W)
ITM_MEM_DEF(RtWtaR, R, WaR)
ITM_MEM_DEF(RtWtaW, R, WaW)
ITM_MEM_DEF(RtaRWn, RaR, NOLOCK)
ITM_MEM_DEF(RtaRWt, RaR, W)
ITM_MEM_DEF(RtaRWtaR, RaR, WaR)
ITM_MEM_DEF(RtaRWtaW, RaR, WaW)
ITM_MEM_DEF(RtaWWn, RaW, NOLOCK)
ITM_MEM_DEF(RtaWWt, RaW, W)
ITM_MEM_DEF(RtaWWtaR, RaW, WaR)
ITM_MEM_DEF(RtaWWtaW, RaW, WaW)

View File

@ -1,78 +0,0 @@
/* Copyright (C) 2008, 2009, 2011 Free Software Foundation, Inc.
Contributed by Richard Henderson <rth@redhat.com>.
This file is part of the GNU Transactional Memory Library (libitm).
Libitm is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "libitm_i.h"
using namespace GTM;
static void
do_memset(uintptr_t idst, int c, size_t size, abi_dispatch::lock_type W)
{
abi_dispatch *disp = abi_disp();
uintptr_t dofs = idst & (CACHELINE_SIZE - 1);
abi_dispatch::mask_pair dpair;
gtm_cacheline *dst
= reinterpret_cast<gtm_cacheline *>(idst & -CACHELINE_SIZE);
if (size == 0)
return;
if (dofs != 0)
{
size_t dleft = CACHELINE_SIZE - dofs;
size_t min = (size <= dleft ? size : dleft);
dpair = disp->write_lock(dst, W);
*dpair.mask |= (((gtm_cacheline_mask)1 << min) - 1) << dofs;
memset (&dpair.line->b[dofs], c, min);
dst++;
size -= min;
}
while (size >= CACHELINE_SIZE)
{
dpair = disp->write_lock(dst, W);
*dpair.mask = -1;
memset (dpair.line, c, CACHELINE_SIZE);
dst++;
size -= CACHELINE_SIZE;
}
if (size != 0)
{
dpair = disp->write_lock(dst, W);
*dpair.mask |= ((gtm_cacheline_mask)1 << size) - 1;
memset (dpair.line, c, size);
}
}
#define ITM_MEM_DEF(WRITE) \
void ITM_REGPARM _ITM_memset##WRITE(void *dst, int c, size_t size) \
{ \
do_memset ((uintptr_t)dst, c, size, abi_dispatch::WRITE); \
}
ITM_MEM_DEF(W)
ITM_MEM_DEF(WaR)
ITM_MEM_DEF(WaW)

View File

@ -1,628 +0,0 @@
/* Copyright (C) 2009, 2011 Free Software Foundation, Inc.
Contributed by Richard Henderson <rth@redhat.com>.
This file is part of the GNU Transactional Memory Library (libitm).
Libitm is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for
more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#include "libitm_i.h"
namespace {
using namespace GTM;
class wbetl_dispatch : public abi_dispatch
{
private:
static const size_t RW_SET_SIZE = 4096;
struct r_entry
{
gtm_version version;
gtm_stmlock *lock;
};
r_entry *m_rset_entries;
size_t m_rset_nb_entries;
size_t m_rset_size;
struct w_entry
{
/* There's a hashtable where the locks are held, so multiple
cachelines can hash to a given bucket. This link points to the
possible next cacheline that also hashes to this bucket. */
struct w_entry *next;
/* Every entry in this bucket (accessed by NEXT) has the same LOCK
address below. */
gtm_stmlock *lock;
gtm_cacheline *addr;
gtm_cacheline *value;
gtm_version version;
};
w_entry *m_wset_entries;
size_t m_wset_nb_entries;
size_t m_wset_size;
bool m_wset_reallocate;
gtm_version m_start;
gtm_version m_end;
gtm_cacheline_page *m_cache_page;
unsigned m_n_cache_page;
private:
bool local_w_entry_p (w_entry *w);
bool has_read (gtm_stmlock *lock);
bool validate();
bool extend();
gtm_cacheline *do_write_lock(gtm_cacheline *);
gtm_cacheline *do_after_write_lock(gtm_cacheline *);
const gtm_cacheline *do_read_lock(const gtm_cacheline *, bool);
public:
wbetl_dispatch();
virtual const gtm_cacheline *read_lock(const gtm_cacheline *, ls_modifier);
virtual mask_pair write_lock(gtm_cacheline *, ls_modifier);
virtual bool trycommit();
virtual void rollback();
virtual void reinit();
virtual void fini();
virtual bool trydropreference (void *, size_t);
};
/* Check if W is one of our write locks. */
inline bool
wbetl_dispatch::local_w_entry_p (w_entry *w)
{
return (m_wset_entries <= w && w < m_wset_entries + m_wset_nb_entries);
}
/* Check if stripe has been read previously. */
inline bool
wbetl_dispatch::has_read (gtm_stmlock *lock)
{
// ??? Consider using an AA tree to lookup the r_set entries.
size_t n = m_rset_nb_entries;
for (size_t i = 0; i < n; ++i)
if (m_rset_entries[i].lock == lock)
return true;
return false;
}
/* Validate read set, i.e. check if all read addresses are still valid now. */
bool
wbetl_dispatch::validate ()
{
__sync_synchronize ();
size_t n = m_rset_nb_entries;
for (size_t i = 0; i < n; ++i)
{
r_entry *r = &m_rset_entries[i];
gtm_stmlock l = *r->lock;
if (gtm_stmlock_owned_p (l))
{
w_entry *w = (w_entry *) gtm_stmlock_get_addr (l);
// If someone has locked us, it better be by someone in the
// current thread.
if (!local_w_entry_p (w))
return false;
}
else if (gtm_stmlock_get_version (l) != r->version)
return false;
}
return true;
}
/* Extend the snapshot range. */
bool
wbetl_dispatch::extend ()
{
gtm_version now = gtm_get_clock ();
if (validate ())
{
m_end = now;
return true;
}
return false;
}
/* Acquire a write lock on ADDR. */
gtm_cacheline *
wbetl_dispatch::do_write_lock(gtm_cacheline *addr)
{
gtm_stmlock *lock;
gtm_stmlock l, l2;
gtm_version version;
w_entry *w, *prev = NULL;
lock = gtm_get_stmlock (addr);
l = *lock;
restart_no_load:
if (gtm_stmlock_owned_p (l))
{
w = (w_entry *) gtm_stmlock_get_addr (l);
/* Did we previously write the same address? */
if (local_w_entry_p (w))
{
prev = w;
while (1)
{
if (addr == prev->addr)
return prev->value;
if (prev->next == NULL)
break;
prev = prev->next;
}
/* Get version from previous entry write set. */
version = prev->version;
/* If there's not enough entries, we must reallocate the array,
which invalidates all pointers to write set entries, which
means we have to restart the transaction. */
if (m_wset_nb_entries == m_wset_size)
{
m_wset_size *= 2;
m_wset_reallocate = true;
gtm_tx()->restart (RESTART_REALLOCATE);
}
w = &m_wset_entries[m_wset_nb_entries];
goto do_write;
}
gtm_tx()->restart (RESTART_LOCKED_WRITE);
}
else
{
version = gtm_stmlock_get_version (l);
/* We might have read an older version previously. */
if (version > m_end)
{
if (has_read (lock))
gtm_tx()->restart (RESTART_VALIDATE_WRITE);
}
/* Extend write set, aborting to reallocate write set entries. */
if (m_wset_nb_entries == m_wset_size)
{
m_wset_size *= 2;
m_wset_reallocate = true;
gtm_tx()->restart (RESTART_REALLOCATE);
}
/* Acquire the lock. */
w = &m_wset_entries[m_wset_nb_entries];
l2 = gtm_stmlock_set_owned (w);
l = __sync_val_compare_and_swap (lock, l, l2);
if (l != l2)
goto restart_no_load;
}
do_write:
m_wset_nb_entries++;
if (prev != NULL)
prev->next = w;
w->next = 0;
w->lock = lock;
w->addr = addr;
w->version = version;
gtm_cacheline_page *page = m_cache_page;
unsigned index = m_n_cache_page;
if (page == NULL || index == gtm_cacheline_page::LINES)
{
gtm_cacheline_page *npage = new gtm_cacheline_page;
npage->prev = page;
m_cache_page = page = npage;
m_n_cache_page = 1;
index = 0;
}
else
m_n_cache_page = index + 1;
gtm_cacheline *line = &page->lines[index];
w->value = line;
page->masks[index] = 0;
*line = *addr;
return line;
}
gtm_cacheline *
wbetl_dispatch::do_after_write_lock (gtm_cacheline *addr)
{
gtm_stmlock *lock;
gtm_stmlock l;
w_entry *w;
lock = gtm_get_stmlock (addr);
l = *lock;
assert (gtm_stmlock_owned_p (l));
w = (w_entry *) gtm_stmlock_get_addr (l);
assert (local_w_entry_p (w));
while (1)
{
if (addr == w->addr)
return w->value;
w = w->next;
}
}
/* Acquire a read lock on ADDR. */
const gtm_cacheline *
wbetl_dispatch::do_read_lock (const gtm_cacheline *addr, bool after_read)
{
gtm_stmlock *lock;
gtm_stmlock l, l2;
gtm_version version;
w_entry *w;
lock = gtm_get_stmlock (addr);
l = *lock;
restart_no_load:
if (gtm_stmlock_owned_p (l))
{
w = (w_entry *) gtm_stmlock_get_addr (l);
/* Did we previously write the same address? */
if (local_w_entry_p (w))
{
while (1)
{
if (addr == w->addr)
return w->value;
if (w->next == NULL)
return addr;
w = w->next;
}
}
gtm_tx()->restart (RESTART_LOCKED_READ);
}
version = gtm_stmlock_get_version (l);
/* If version is no longer valid, re-validate the read set. */
if (version > m_end)
{
if (!extend ())
gtm_tx()->restart (RESTART_VALIDATE_READ);
if (!after_read)
{
// Verify that the version has not yet been overwritten. The read
// value has not yet been added to read set and may not have been
// checked during the extend.
//
// ??? This only makes sense if we're actually reading the value
// and returning it now -- which I believe the original TinySTM
// did. This doesn't make a whole lot of sense when we're
// manipulating cachelines as we are now. Do we need some other
// form of lock verification here, or is the validate call in
// trycommit sufficient?
__sync_synchronize ();
l2 = *lock;
if (l != l2)
{
l = l2;
goto restart_no_load;
}
}
}
if (!after_read)
{
r_entry *r;
/* Add the address and version to the read set. */
if (m_rset_nb_entries == m_rset_size)
{
m_rset_size *= 2;
m_rset_entries = (r_entry *)
xrealloc (m_rset_entries, m_rset_size * sizeof(r_entry));
}
r = &m_rset_entries[m_rset_nb_entries++];
r->version = version;
r->lock = lock;
}
return addr;
}
const gtm_cacheline *
wbetl_dispatch::read_lock (const gtm_cacheline *addr, ls_modifier ltype)
{
switch (ltype)
{
case NONTXNAL:
return addr;
case R:
return do_read_lock (addr, false);
case RaR:
return do_read_lock (addr, true);
case RaW:
return do_after_write_lock (const_cast<gtm_cacheline *>(addr));
case RfW:
return do_write_lock (const_cast<gtm_cacheline *>(addr));
default:
abort ();
}
}
abi_dispatch::mask_pair
wbetl_dispatch::write_lock (gtm_cacheline *addr, ls_modifier ltype)
{
gtm_cacheline *line;
switch (ltype)
{
case NONTXNAL:
return mask_pair (addr, &mask_sink);
case W:
case WaR:
line = do_write_lock (addr);
break;
case WaW:
line = do_after_write_lock (addr);
break;
default:
abort ();
}
return mask_pair (line, gtm_cacheline_page::mask_for_page_line (line));
}
/* Commit the transaction. */
bool
wbetl_dispatch::trycommit ()
{
const size_t n = m_wset_nb_entries;
if (n != 0)
{
/* Get commit timestamp. */
gtm_version t = gtm_inc_clock ();
/* Validate only if a concurrent transaction has started since. */
if (m_start != t - 1 && !validate ())
return false;
/* Install new versions. */
for (size_t i = 0; i < n; ++i)
{
w_entry *w = &m_wset_entries[i];
gtm_cacheline_mask mask
= *gtm_cacheline_page::mask_for_page_line (w->value);
/* Filter out any updates that overlap the libitm stack. */
mask = gtm_mask_stack (w->addr, mask);
gtm_cacheline::copy_mask (w->addr, w->value, mask);
}
/* Only emit barrier after all cachelines are copied. */
gtm_cacheline::copy_mask_wb ();
/* Drop locks. */
for (size_t i = 0; i < n; ++i)
{
w_entry *w = &m_wset_entries[i];
/* Every link along the chain has the same lock, but only
bother dropping the lock once per bucket (at the end). */
if (w->next == NULL)
*w->lock = gtm_stmlock_set_version (t);
}
}
__sync_synchronize ();
return true;
}
void
wbetl_dispatch::rollback ()
{
/* Drop locks. */
const size_t n = m_wset_nb_entries;
for (size_t i = 0; i < n; ++i)
{
w_entry *w = &m_wset_entries[i];
/* Every link along the chain has the same lock, but only
bother dropping the lock once per bucket (at the end). */
if (w->next == NULL)
*w->lock = gtm_stmlock_set_version (w->version);
}
__sync_synchronize ();
}
void
wbetl_dispatch::reinit ()
{
gtm_cacheline_page *page;
m_rset_nb_entries = 0;
m_wset_nb_entries = 0;
if (m_wset_reallocate)
{
m_wset_reallocate = 0;
m_wset_entries = (w_entry *)
xrealloc (m_wset_entries, m_wset_size * sizeof(w_entry));
}
page = m_cache_page;
if (page)
{
/* Release all but one of the pages of cachelines. */
gtm_cacheline_page *prev = page->prev;
if (prev)
{
page->prev = 0;
delete prev;
}
/* Start the next cacheline allocation from the beginning. */
m_n_cache_page = 0;
}
m_start = m_end = gtm_get_clock ();
}
void
wbetl_dispatch::fini ()
{
delete m_cache_page;
free (m_rset_entries);
free (m_wset_entries);
delete this;
}
/* Attempt to drop any internal references to PTR. Return TRUE if successful.
This is an adaptation of the transactional memcpy function.
What we do here is flush out the current transactional content of
PTR to real memory, and remove the write mask bits associated with
it so future commits will ignore this piece of memory. */
bool
wbetl_dispatch::trydropreference (void *ptr, size_t size)
{
if (size == 0)
return true;
if (!validate ())
return false;
uintptr_t isrc = (uintptr_t)ptr;
// The position in the source cacheline where *PTR starts.
uintptr_t sofs = isrc & (CACHELINE_SIZE - 1);
gtm_cacheline *src
= reinterpret_cast<gtm_cacheline *>(isrc & -CACHELINE_SIZE);
unsigned char *dst = (unsigned char *)ptr;
abi_dispatch::mask_pair pair;
// If we're trying to drop a reference, we should already have a
// write lock on it. If we don't have one, there's no work to do.
if (!gtm_stmlock_owned_p (*gtm_get_stmlock (src)))
return true;
// We copy the data in three stages:
// (a) Copy stray bytes at the beginning that are smaller than a
// cacheline.
if (sofs != 0)
{
size_t sleft = CACHELINE_SIZE - sofs;
size_t min = (size <= sleft ? size : sleft);
// WaW will give us the current locked entry.
pair = this->write_lock (src, WaW);
// *jedi mind wave*...these aren't the droids you're looking for.
*pair.mask &= ~((((gtm_cacheline_mask)1 << min) - 1) << sofs);
memcpy (dst, &pair.line->b[sofs], min);
dst += min;
src++;
size -= min;
}
// (b) Copy subsequent cacheline sized chunks.
while (size >= CACHELINE_SIZE)
{
pair = this->write_lock(src, WaW);
*pair.mask = 0;
memcpy (dst, pair.line, CACHELINE_SIZE);
dst += CACHELINE_SIZE;
src++;
size -= CACHELINE_SIZE;
}
// (c) Copy anything left over.
if (size != 0)
{
pair = this->write_lock(src, WaW);
*pair.mask &= ~(((gtm_cacheline_mask)1 << size) - 1);
memcpy (dst, pair.line, size);
}
// No need to drop locks, since we're going to abort the transaction
// anyhow.
return true;
}
wbetl_dispatch::wbetl_dispatch ()
: abi_dispatch (false, false)
{
m_rset_entries = (r_entry *) xmalloc (RW_SET_SIZE * sizeof(r_entry));
m_rset_nb_entries = 0;
m_rset_size = RW_SET_SIZE;
m_wset_entries = (w_entry *) xmalloc (RW_SET_SIZE * sizeof(w_entry));
m_wset_nb_entries = 0;
m_wset_size = RW_SET_SIZE;
m_wset_reallocate = false;
m_start = m_end = gtm_get_clock ();
m_cache_page = 0;
m_n_cache_page = 0;
}
} // anon namespace
abi_dispatch *
GTM::dispatch_wbetl ()
{
return new wbetl_dispatch ();
}

View File

@ -38,6 +38,7 @@ subdir = testsuite
DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
$(top_srcdir)/../config/asmcfi.m4 \
$(top_srcdir)/../config/depstand.m4 \
$(top_srcdir)/../config/enable.m4 \
$(top_srcdir)/../config/futex.m4 \
@ -90,8 +91,6 @@ ECHO_N = @ECHO_N@
ECHO_T = @ECHO_T@
EGREP = @EGREP@
EXEEXT = @EXEEXT@
FC = @FC@
FCFLAGS = @FCFLAGS@
FGREP = @FGREP@
GREP = @GREP@
INSTALL = @INSTALL@
@ -142,7 +141,6 @@ abs_top_srcdir = @abs_top_srcdir@
ac_ct_CC = @ac_ct_CC@
ac_ct_CXX = @ac_ct_CXX@
ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
ac_ct_FC = @ac_ct_FC@
am__include = @am__include@
am__leading_dot = @am__leading_dot@
am__quote = @am__quote@