mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-12-11 13:04:03 +08:00
dm writecache: improve performance on DDR persistent memory (Optane)
When testing the dm-writecache target on a real DDR persistent memory (Intel Optane), it turned out that explicit cache flushing using the clflushopt instruction performs better than non-temporal stores for block sizes 1k, 2k and 4k. The dm-writecache target is singlethreaded (all the copying is done while holding the writecache lock), so it benefits from clwb, see: http://lore.kernel.org/r/alpine.LRH.2.02.2004160411460.7833@file01.intranet.prod.int.rdu2.redhat.com Add a new function memcpy_flushcache_optimized() that tests if clflushopt is present - and if it is, we use it instead of memcpy_flushcache. Signed-off-by: Mikulas Patocka <mpatocka@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
This commit is contained in:
parent
499c18045e
commit
48338daaa0
@ -1139,6 +1139,42 @@ static int writecache_message(struct dm_target *ti, unsigned argc, char **argv,
|
||||
return r;
|
||||
}
|
||||
|
||||
static void memcpy_flushcache_optimized(void *dest, void *source, size_t size)
|
||||
{
|
||||
/*
|
||||
* clflushopt performs better with block size 1024, 2048, 4096
|
||||
* non-temporal stores perform better with block size 512
|
||||
*
|
||||
* block size 512 1024 2048 4096
|
||||
* movnti 496 MB/s 642 MB/s 725 MB/s 744 MB/s
|
||||
* clflushopt 373 MB/s 688 MB/s 1.1 GB/s 1.2 GB/s
|
||||
*
|
||||
* We see that movnti performs better for 512-byte blocks, and
|
||||
* clflushopt performs better for 1024-byte and larger blocks. So, we
|
||||
* prefer clflushopt for sizes >= 768.
|
||||
*
|
||||
* NOTE: this happens to be the case now (with dm-writecache's single
|
||||
* threaded model) but re-evaluate this once memcpy_flushcache() is
|
||||
* enabled to use movdir64b which might invalidate this performance
|
||||
* advantage seen with cache-allocating-writes plus flushing.
|
||||
*/
|
||||
#ifdef CONFIG_X86
|
||||
if (static_cpu_has(X86_FEATURE_CLFLUSHOPT) &&
|
||||
likely(boot_cpu_data.x86_clflush_size == 64) &&
|
||||
likely(size >= 768)) {
|
||||
do {
|
||||
memcpy((void *)dest, (void *)source, 64);
|
||||
clflushopt((void *)dest);
|
||||
dest += 64;
|
||||
source += 64;
|
||||
size -= 64;
|
||||
} while (size >= 64);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
memcpy_flushcache(dest, source, size);
|
||||
}
|
||||
|
||||
static void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data)
|
||||
{
|
||||
void *buf;
|
||||
@ -1164,7 +1200,7 @@ static void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data
|
||||
}
|
||||
} else {
|
||||
flush_dcache_page(bio_page(bio));
|
||||
memcpy_flushcache(data, buf, size);
|
||||
memcpy_flushcache_optimized(data, buf, size);
|
||||
}
|
||||
|
||||
bvec_kunmap_irq(buf, &flags);
|
||||
|
Loading…
Reference in New Issue
Block a user