From 5bbf7664c3e26292702d9d7b37d6a1f872f8b5ea Mon Sep 17 00:00:00 2001 From: Michael Collison Date: Sun, 23 Jul 2017 22:46:34 +0000 Subject: [PATCH] Add optimized implementation of mersenne twister for aarch64 2017-07-23 Michael Collison Add optimized implementation of mersenne twister for aarch64 * config/cpu/aarch64/opt/ext/opt_random.h: New file. (__arch64_recursion): New function. (__aarch64_lsr_128): New function. (__aarch64_lsl_128): New function. (operator==): New function. (simd_fast_mersenne_twister_engine): Implement method _M_gen_rand. * config/cpu/aarch64/opt/bits/opt_random.h: New file. * include/ext/random: (simd_fast_mersenne_twister_engine): add _M_state private array. From-SVN: r250464 --- libstdc++-v3/ChangeLog | 14 ++ .../config/cpu/aarch64/opt/bits/opt_random.h | 47 +++++ .../config/cpu/aarch64/opt/ext/opt_random.h | 180 ++++++++++++++++++ libstdc++-v3/include/ext/random | 5 + 4 files changed, 246 insertions(+) create mode 100644 libstdc++-v3/config/cpu/aarch64/opt/bits/opt_random.h create mode 100644 libstdc++-v3/config/cpu/aarch64/opt/ext/opt_random.h diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index edd6f3ffbac1..0a8759717717 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,17 @@ +2017-07-23 Michael Collison + + Add optimized implementation of mersenne twister for aarch64 + * config/cpu/aarch64/opt/ext/opt_random.h: New file. + (__arch64_recursion): New function. + (__aarch64_lsr_128): New function. + (__aarch64_lsl_128): New function. + (operator==): New function. + (simd_fast_mersenne_twister_engine): Implement + method _M_gen_rand. + * config/cpu/aarch64/opt/bits/opt_random.h: New file. + * include/ext/random: (simd_fast_mersenne_twister_engine): + add _M_state private array. + 2017-07-23 François Dumont PR libstdc++/81064 diff --git a/libstdc++-v3/config/cpu/aarch64/opt/bits/opt_random.h b/libstdc++-v3/config/cpu/aarch64/opt/bits/opt_random.h new file mode 100644 index 000000000000..fba7ea812adb --- /dev/null +++ b/libstdc++-v3/config/cpu/aarch64/opt/bits/opt_random.h @@ -0,0 +1,47 @@ +// Optimizations for random number functions, aarch64 version -*- C++ -*- + +// Copyright (C) 2017 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// . + +/** @file bits/opt_random.h + * This is an internal header file, included by other library headers. + * Do not attempt to use it directly. @headername{random} + */ + +#ifndef _BITS_OPT_RANDOM_H +#define _BITS_OPT_RANDOM_H 1 + +#pragma GCC system_header + + +namespace std _GLIBCXX_VISIBILITY (default) +{ +_GLIBCXX_BEGIN_NAMESPACE_VERSION + + + + +_GLIBCXX_END_NAMESPACE_VERSION +} // namespace + + +#endif // _BITS_OPT_RANDOM_H diff --git a/libstdc++-v3/config/cpu/aarch64/opt/ext/opt_random.h b/libstdc++-v3/config/cpu/aarch64/opt/ext/opt_random.h new file mode 100644 index 000000000000..330050fc7f79 --- /dev/null +++ b/libstdc++-v3/config/cpu/aarch64/opt/ext/opt_random.h @@ -0,0 +1,180 @@ +// Optimizations for random number extensions, aarch64 version -*- C++ -*- + +// Copyright (C) 2017 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. + +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. + +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// . + +/** @file ext/random.tcc + * This is an internal header file, included by other library headers. + * Do not attempt to use it directly. @headername{ext/random} + */ + +#ifndef _EXT_OPT_RANDOM_H +#define _EXT_OPT_RANDOM_H 1 + +#pragma GCC system_header + +#ifdef __ARM_NEON + +#ifdef __AARCH64EB__ +# define __VEXT(_A,_B,_C) __builtin_shuffle (_A, _B, (__Uint8x16_t) \ + {16-_C, 17-_C, 18-_C, 19-_C, 20-_C, 21-_C, 22-_C, 23-_C, \ + 24-_C, 25-_C, 26-_C, 27-_C, 28-_C, 29-_C, 30-_C, 31-_C}) +#else +# define __VEXT(_A,_B,_C) __builtin_shuffle (_B, _A, (__Uint8x16_t) \ + {_C, _C+1, _C+2, _C+3, _C+4, _C+5, _C+6, _C+7, \ + _C+8, _C+9, _C+10, _C+11, _C+12, _C+13, _C+14, _C+15}) +#endif + +namespace __gnu_cxx _GLIBCXX_VISIBILITY (default) +{ +_GLIBCXX_BEGIN_NAMESPACE_VERSION + + namespace { + // Logical Shift right 128-bits by c * 8 bits + + __extension__ extern __inline __Uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) + __aarch64_lsr_128 (__Uint8x16_t __a, __const int __c) + { + const __Uint8x16_t __zero = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + + return (__Uint32x4_t) __VEXT (__zero, __a, __c); + } + + // Logical Shift left 128-bits by c * 8 bits + + __extension__ extern __inline __Uint32x4_t + __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) + __aarch64_lsl_128 (__Uint8x16_t __a, __const int __c) + { + const __Uint8x16_t __zero = {0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0}; + + return (__Uint32x4_t) __VEXT (__a, __zero, 16 - __c); + } + + template + inline __Uint32x4_t __aarch64_recursion (__Uint32x4_t __a, + __Uint32x4_t __b, + __Uint32x4_t __c, + __Uint32x4_t __d, + __Uint32x4_t __e) + { + __Uint32x4_t __y = (__b >> __sr1); + __Uint32x4_t __z = __aarch64_lsr_128 ((__Uint8x16_t) __c, __sr2); + + __Uint32x4_t __v = __d << __sl1; + + __z = __z ^ __a; + __z = __z ^ __v; + + __Uint32x4_t __x = __aarch64_lsl_128 ((__Uint8x16_t) __a, __sl2); + + __y = __y & __e; + __z = __z ^ __x; + return __z ^ __y; + } +} + +#define _GLIBCXX_OPT_HAVE_RANDOM_SFMT_GEN_READ 1 + template + void simd_fast_mersenne_twister_engine<_UIntType, __m, + __pos1, __sl1, __sl2, __sr1, __sr2, + __msk1, __msk2, __msk3, __msk4, + __parity1, __parity2, __parity3, + __parity4>:: + _M_gen_rand (void) + { + __Uint32x4_t __r1 = _M_state[_M_nstate - 2]; + __Uint32x4_t __r2 = _M_state[_M_nstate - 1]; + + __Uint32x4_t __aData = {__msk1, __msk2, __msk3, __msk4}; + + size_t __i; + for (__i = 0; __i < _M_nstate - __pos1; ++__i) + { + __Uint32x4_t __r = __aarch64_recursion<__sl1, __sl2, __sr1, __sr2> + (_M_state[__i], _M_state[__i + __pos1], __r1, __r2, __aData); + + _M_state[__i] = __r; + + __r1 = __r2; + __r2 = __r; + } + for (; __i < _M_nstate; ++__i) + { + __Uint32x4_t __r = __aarch64_recursion<__sl1, __sl2, __sr1, __sr2> + (_M_state[__i], _M_state[__i + __pos1 - _M_nstate], __r1, __r2, + __aData); + + _M_state[__i] = __r; + + __r1 = __r2; + __r2 = __r; + } + + _M_pos = 0; + } + + +#define _GLIBCXX_OPT_HAVE_RANDOM_SFMT_OPERATOREQUAL 1 + template + bool + operator==(const __gnu_cxx::simd_fast_mersenne_twister_engine<_UIntType, + __m, __pos1, __sl1, __sl2, __sr1, __sr2, + __msk1, __msk2, __msk3, __msk4, + __parity1, __parity2, __parity3, __parity4>& __lhs, + const __gnu_cxx::simd_fast_mersenne_twister_engine<_UIntType, + __m, __pos1, __sl1, __sl2, __sr1, __sr2, + __msk1, __msk2, __msk3, __msk4, + __parity1, __parity2, __parity3, __parity4>& __rhs) + { + if (__lhs._M_pos != __rhs._M_pos) + return false; + + __Uint32x4_t __res = __lhs._M_state[0] ^ __rhs._M_state[0]; + + for (size_t __i = 1; __i < __lhs._M_nstate; ++__i) + __res |= __lhs._M_state[__i] ^ __rhs._M_state[__i]; + + return (__int128) __res == 0; + } + +_GLIBCXX_END_NAMESPACE_VERSION + } // namespace + +#endif // __ARM_NEON + +#endif // _EXT_OPT_RANDOM_H diff --git a/libstdc++-v3/include/ext/random b/libstdc++-v3/include/ext/random index c991345886f7..3665b285de7a 100644 --- a/libstdc++-v3/include/ext/random +++ b/libstdc++-v3/include/ext/random @@ -183,6 +183,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { #ifdef __SSE2__ __m128i _M_state[_M_nstate]; +#endif +#ifdef __ARM_NEON +#ifdef __aarch64__ + __Uint32x4_t _M_state[_M_nstate]; +#endif #endif uint32_t _M_state32[_M_nstate32]; result_type _M_stateT[state_size];