libstdc++: Limit memory allocation in stable_sort/inplace_merge (PR 83938)

Reduce memory allocation in stable_sort/inplace_merge algorithms to what is needed
by the implementation.

Co-authored-by: John Chang  <john.chang@samba.tv>

libstdc++-v3/ChangeLog:

	PR libstdc++/83938
	* include/bits/stl_tempbuf.h (get_temporary_buffer): Change __len
	computation in the loop to avoid truncation.
	* include/bits/stl_algo.h:
	(__inplace_merge): Take temporary buffer length from smallest range.
	(__stable_sort): Limit temporary buffer length.
	* testsuite/25_algorithms/inplace_merge/1.cc (test4): New.
	* testsuite/performance/25_algorithms/stable_sort.cc: Test stable_sort
	under different heap memory conditions.
	* testsuite/performance/25_algorithms/inplace_merge.cc: New test.
This commit is contained in:
François Dumont 2020-01-22 17:55:54 +01:00
parent 9e071b6e5e
commit ba23e045fc
5 changed files with 413 additions and 24 deletions

View File

@ -2523,6 +2523,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
_ValueType;
typedef typename iterator_traits<_BidirectionalIterator>::difference_type
_DistanceType;
typedef _Temporary_buffer<_BidirectionalIterator, _ValueType> _TmpBuf;
if (__first == __middle || __middle == __last)
return;
@ -2530,8 +2531,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
const _DistanceType __len1 = std::distance(__first, __middle);
const _DistanceType __len2 = std::distance(__middle, __last);
typedef _Temporary_buffer<_BidirectionalIterator, _ValueType> _TmpBuf;
_TmpBuf __buf(__first, __len1 + __len2);
// __merge_adaptive will use a buffer for the smaller of
// [first,middle) and [middle,last).
_TmpBuf __buf(__first, std::min(__len1, __len2));
if (__buf.begin() == 0)
std::__merge_without_buffer
@ -2740,6 +2742,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
std::__merge_sort_with_buffer(__first, __middle, __buffer, __comp);
std::__merge_sort_with_buffer(__middle, __last, __buffer, __comp);
}
std::__merge_adaptive(__first, __middle, __last,
_Distance(__middle - __first),
_Distance(__last - __middle),
@ -5005,9 +5008,14 @@ _GLIBCXX_BEGIN_NAMESPACE_ALGO
_ValueType;
typedef typename iterator_traits<_RandomAccessIterator>::difference_type
_DistanceType;
typedef _Temporary_buffer<_RandomAccessIterator, _ValueType> _TmpBuf;
_TmpBuf __buf(__first, std::distance(__first, __last));
if (__first == __last)
return;
// __stable_sort_adaptive sorts the range in two halves,
// so the buffer only needs to fit half the range at once.
_TmpBuf __buf(__first, (__last - __first + 1) / 2);
if (__buf.begin() == 0)
std::__inplace_stable_sort(__first, __last, __comp);

View File

@ -110,7 +110,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
std::nothrow));
if (__tmp != 0)
return std::pair<_Tp*, ptrdiff_t>(__tmp, __len);
__len /= 2;
__len = __len == 1 ? 0 : ((__len + 1) / 2);
}
return std::pair<_Tp*, ptrdiff_t>(static_cast<_Tp*>(0), 0);
}

View File

@ -28,7 +28,7 @@ using std::inplace_merge;
typedef test_container<int, bidirectional_iterator_wrapper> container;
void
void
test1()
{
int array[] = { 1 };
@ -39,7 +39,7 @@ test1()
inplace_merge(con2.begin(), con2.begin(), con2.end());
}
void
void
test2()
{
int array[] = { 0, 2, 4, 1, 3, 5 };
@ -86,6 +86,36 @@ test3()
VERIFY( s[0].b == 0 && s[1].b == 4 && s[2].b == 1 && s[3].b == 5 );
}
void
test4()
{
S s[8];
for (int pivot_idx = 0; pivot_idx < 8; ++pivot_idx)
{
int bval = 0;
for (int i = 0; i != pivot_idx; ++i)
{
s[i].a = i;
s[i].b = bval++;
}
for (int i = pivot_idx; i != 8; ++i)
{
s[i].a = i - pivot_idx;
s[i].b = bval++;
}
inplace_merge(s, s + pivot_idx, s + 8);
for (int i = 1; i < 8; ++i)
{
VERIFY( !(s[i] < s[i - 1]) );
if (s[i - 1].a == s[i].a)
VERIFY( s[i - 1].b < s[i].b );
}
}
}
int
main()
{
@ -95,12 +125,15 @@ main()
__gnu_test::set_new_limit(sizeof(S) * 4);
test3();
test4();
__gnu_test::set_new_limit(sizeof(S));
test3();
test4();
__gnu_test::set_new_limit(0);
test3();
test4();
return 0;
}

View File

@ -0,0 +1,290 @@
// Copyright (C) 2020 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library. This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License along
// with this library; see the file COPYING3. If not see
// <http://www.gnu.org/licenses/>.
#include <vector>
#include <algorithm>
#include <cmath>
#include <testsuite_new_operators.h>
#include <testsuite_performance.h>
const int max_size = 10000000;
const int small_size = 200000;
const int front_pivot_idx = 10000;
int middle_pivot_idx = max_size / 2;
int back_pivot_idx = max_size - front_pivot_idx;
void bench(int mem_threshold, int pivot_index,
std::vector<int> revv,
std::vector<int> fwdv,
std::vector<int> wstv,
std::vector<int> rndv)
{
using namespace __gnu_test;
time_counter time;
resource_counter resource;
set_new_limit(mem_threshold);
start_counters(time, resource);
std::inplace_merge(revv.begin(), revv.begin() + pivot_index, revv.end());
stop_counters(time, resource);
set_new_limit(~size_t(0));
report_performance(__FILE__, "reverse", time, resource);
clear_counters(time, resource);
set_new_limit(mem_threshold);
start_counters(time, resource);
std::inplace_merge(fwdv.begin(), fwdv.begin() + pivot_index, fwdv.end());
stop_counters(time, resource);
set_new_limit(~size_t(0));
report_performance(__FILE__, "forward", time, resource);
clear_counters(time, resource);
set_new_limit(mem_threshold);
start_counters(time, resource);
std::inplace_merge(wstv.begin(), wstv.begin() + pivot_index, wstv.end());
stop_counters(time, resource);
set_new_limit(~size_t(0));
report_performance(__FILE__, "worst", time, resource);
clear_counters(time, resource);
set_new_limit(mem_threshold);
start_counters(time, resource);
std::inplace_merge(rndv.begin(), rndv.begin() + pivot_index, rndv.end());
stop_counters(time, resource);
set_new_limit(~size_t(0));
report_performance(__FILE__, "random", time, resource);
}
void mem_bench(double mem_ratio,
const std::vector<int>& front_revv,
const std::vector<int>& middle_revv,
const std::vector<int>& back_revv,
const std::vector<int>& fwdv,
const std::vector<int>& front_wstv,
const std::vector<int>& middle_wstv,
const std::vector<int>& back_wstv,
const std::vector<int>& front_rndv,
const std::vector<int>& middle_rndv,
const std::vector<int>& back_rndv)
{
using namespace __gnu_test;
time_counter time;
resource_counter resource;
int max_mem = (int)std::ceil(front_pivot_idx * mem_ratio) * sizeof(int);
start_counters(time, resource);
bench(max_mem, front_pivot_idx, front_revv, fwdv, front_wstv, front_rndv);
stop_counters(time, resource);
report_performance(__FILE__, "front pivot", time, resource);
clear_counters(time, resource);
max_mem = (int)std::ceil(middle_pivot_idx * mem_ratio) * sizeof(int);
start_counters(time, resource);
bench(max_mem, middle_pivot_idx, middle_revv, fwdv, middle_wstv, middle_rndv);
stop_counters(time, resource);
report_performance(__FILE__, "middle pivot", time, resource);
clear_counters(time, resource);
max_mem = (int)std::ceil(front_pivot_idx * mem_ratio) * sizeof(int);
start_counters(time, resource);
bench(max_mem, back_pivot_idx, back_revv, fwdv, back_wstv, back_rndv);
stop_counters(time, resource);
report_performance(__FILE__, "back pivot", time, resource);
}
void init_reverse(std::vector<int>& v, size_t pivot_index)
{
int val = 0;
for (size_t i = pivot_index; i != v.size(); ++i)
v[i] = val++;
for (size_t i = 0; i != pivot_index; ++i)
v[i] = val++;
}
void init_forward(std::vector<int>& v)
{
int val = 0;
for (size_t i = 0; i != v.size(); ++i)
v[i] = val++;
}
void init_worst(std::vector<int>& v, size_t pivot_index)
{
int val = 0;
if (pivot_index + 1 > v.size() / 2)
{
for (size_t i = 0; i != pivot_index; val += 2, ++i)
v[i] = val;
val = 1;
}
else
{
for (size_t i = pivot_index; i != v.size(); val += 2, ++i)
v[i] = val;
val -= pivot_index * 2 + 1;
}
if (pivot_index + 1 > v.size() / 2)
for (size_t i = pivot_index; i != v.size(); val += 2, ++i)
v[i] = val;
else
for (size_t i = 0; i != pivot_index; val += 2, ++i)
v[i] = val;
}
void init_random(std::vector<int>& v)
{
// a simple pseudo-random series which does not rely on rand() and friends
v[0] = 0;
for (size_t i = 1; i != v.size(); ++i)
v[i] = (v[i-1] + 110211473) * 745988807;
}
void reduce_size(std::vector<int>& front_v,
std::vector<int>& middle_v,
std::vector<int>& back_v)
{
front_v.erase(front_v.begin() + front_pivot_idx,
front_v.end() - back_pivot_idx);
middle_v.erase(middle_v.begin() + small_size / 2,
middle_v.end() - small_size / 2);
back_v.erase(back_v.begin() + back_pivot_idx,
back_v.end() - front_pivot_idx);
}
int main()
{
using namespace __gnu_test;
// No constraint to build vectors.
set_new_limit(~size_t(0));
std::vector<int> front_revv(max_size);
init_reverse(front_revv, front_pivot_idx);
std::vector<int> middle_revv(max_size);
init_reverse(middle_revv, middle_pivot_idx);
std::vector<int> back_revv(max_size);
init_reverse(back_revv, back_pivot_idx);
std::vector<int> fwdv(max_size);
init_forward(fwdv);
std::vector<int> front_wstv(max_size);
init_worst(front_wstv, front_pivot_idx);
std::vector<int> middle_wstv(max_size);
init_worst(middle_wstv, middle_pivot_idx);
std::vector<int> back_wstv(max_size);
init_worst(back_wstv, back_pivot_idx);
std::vector<int> front_rndv(max_size);
init_random(front_rndv);
std::vector<int> middle_rndv(front_rndv);
std::vector<int> back_rndv(front_rndv);
sort(front_rndv.begin(), front_rndv.begin() + front_pivot_idx);
sort(front_rndv.begin() + front_pivot_idx, front_rndv.end());
sort(middle_rndv.begin(), middle_rndv.begin() + middle_pivot_idx);
sort(middle_rndv.begin() + middle_pivot_idx, middle_rndv.end());
sort(back_rndv.begin(), back_rndv.begin() + back_pivot_idx);
sort(back_rndv.begin() + back_pivot_idx, back_rndv.end());
time_counter time;
resource_counter resource;
start_counters(time, resource);
// No limit.
mem_bench(1.0,
front_revv, middle_revv, back_revv,
fwdv,
front_wstv, middle_wstv, back_wstv,
front_rndv, middle_rndv, back_rndv);
stop_counters(time, resource);
report_performance(__FILE__, "bench 1 / 1 memory", time, resource);
clear_counters(time, resource);
start_counters(time, resource);
// Limit to the fourth.
mem_bench(1.0 / 4,
front_revv, middle_revv, back_revv,
fwdv,
front_wstv, middle_wstv, back_wstv,
front_rndv, middle_rndv, back_rndv);
stop_counters(time, resource);
report_performance(__FILE__, "bench 1 / 4 memory", time, resource);
clear_counters(time, resource);
start_counters(time, resource);
// Really limit allocation.
mem_bench(1.0 / 64,
front_revv, middle_revv, back_revv,
fwdv,
front_wstv, middle_wstv, back_wstv,
front_rndv, middle_rndv, back_rndv);
stop_counters(time, resource);
report_performance(__FILE__, "bench 1 /64 memory", time, resource);
clear_counters(time, resource);
middle_pivot_idx = small_size / 2;
back_pivot_idx = small_size - front_pivot_idx;
reduce_size(front_revv, middle_revv, back_revv);
fwdv.resize(small_size);
reduce_size(front_wstv, middle_wstv, back_wstv);
reduce_size(front_rndv, middle_rndv, back_rndv);
start_counters(time, resource);
// No memory.
mem_bench(0.0,
front_revv, middle_revv, back_revv,
fwdv,
front_wstv, middle_wstv, back_wstv,
front_rndv, middle_rndv, back_rndv);
stop_counters(time, resource);
report_performance(__FILE__, "bench 0 / 1 memory", time, resource);
return 0;
}

View File

@ -17,49 +17,107 @@
#include <vector>
#include <algorithm>
#include <testsuite_new_operators.h>
#include <testsuite_performance.h>
int main()
const int max_size = 10000000;
const int small_size = 200000;
void bench(size_t mem_threshold,
std::vector<int> revv,
std::vector<int> fwdv,
std::vector<int> rndv)
{
using namespace __gnu_test;
time_counter time;
resource_counter resource;
const int max_size = 10000000;
std::vector<int> v(max_size);
for (int i = 0; i < max_size; ++i)
v[i] = -i;
set_new_limit(mem_threshold);
start_counters(time, resource);
std::stable_sort(v.begin(), v.end());
std::stable_sort(revv.begin(), revv.end());
stop_counters(time, resource);
set_new_limit(~size_t(0));
report_performance(__FILE__, "reverse", time, resource);
clear_counters(time, resource);
for (int i = 0; i < max_size; ++i)
v[i] = i;
set_new_limit(mem_threshold);
start_counters(time, resource);
std::stable_sort(v.begin(), v.end());
std::stable_sort(fwdv.begin(), fwdv.end());
stop_counters(time, resource);
set_new_limit(~size_t(0));
report_performance(__FILE__, "forwards", time, resource);
clear_counters(time, resource);
// a simple psuedo-random series which does not rely on rand() and friends
v[0] = 0;
for (int i = 1; i < max_size; ++i)
v[i] = (v[i-1] + 110211473) * 745988807;
start_counters(time, resource);
std::stable_sort(v.begin(), v.end());
std::stable_sort(rndv.begin(), rndv.end());
stop_counters(time, resource);
set_new_limit(~size_t(0));
report_performance(__FILE__, "random", time, resource);
}
int main()
{
using namespace __gnu_test;
// No memory constraint.
set_new_limit(~size_t(0));
std::vector<int> revv(max_size);
for (int i = 0; i < max_size; ++i)
revv[i] = -i;
std::vector<int> fwdv(max_size);
for (int i = 0; i < max_size; ++i)
fwdv[i] = i;
// a simple pseudo-random series which does not rely on rand() and friends
std::vector<int> rndv(max_size);
rndv[0] = 0;
for (int i = 1; i < max_size; ++i)
rndv[i] = (rndv[i-1] + 110211473) * 745988807;
time_counter time;
resource_counter resource;
start_counters(time, resource);
bench(~size_t(0), revv, fwdv, rndv);
stop_counters(time, resource);
report_performance(__FILE__, "bench 1 / 1 memory", time, resource);
clear_counters(time, resource);
start_counters(time, resource);
// Limit to fourth the expected size of the sorted array.
bench(max_size * sizeof(int) / 4, revv, fwdv, rndv);
stop_counters(time, resource);
report_performance(__FILE__, "bench 1 / 4 memory", time, resource);
clear_counters(time, resource);
start_counters(time, resource);
// Limit to 1/64 of range size.
bench(max_size * sizeof(int) / 64, revv, fwdv, rndv);
stop_counters(time, resource);
report_performance(__FILE__, "bench 1 /64 memory", time, resource);
clear_counters(time, resource);
revv.resize(small_size);
fwdv.resize(small_size);
rndv.resize(small_size);
start_counters(time, resource);
// Forbid any allocation.
bench(0, revv, fwdv, rndv);
stop_counters(time, resource);
report_performance(__FILE__, "bench 0 / 1 memory", time, resource);
return 0;
}