linux/arch/riscv/lib/csum.c
Linus Torvalds c150b809f7 RISC-V Patches for the 6.9 Merge Window
* Support for various vector-accelerated crypto routines.
 * Hibernation is now enabled for portable kernel builds.
 * mmap_rnd_bits_max is larger on systems with larger VAs.
 * Support for fast GUP.
 * Support for membarrier-based instruction cache synchronization.
 * Support for the Andes hart-level interrupt controller and PMU.
 * Some cleanups around unaligned access speed probing and Kconfig
   settings.
 * Support for ACPI LPI and CPPC.
 * Various cleanus related to barriers.
 * A handful of fixes.
 -----BEGIN PGP SIGNATURE-----
 
 iQJHBAABCAAxFiEEKzw3R0RoQ7JKlDp6LhMZ81+7GIkFAmX9icgTHHBhbG1lckBk
 YWJiZWx0LmNvbQAKCRAuExnzX7sYib+UD/4xyL6UMixx6A06BVBL9UT4vOrxRvNr
 JIihG5y5QNMjes9DHWL35mZTMqFtQ0tq94ViWFLmJWloV/8KRVM2C9R9KX7vplf3
 M/OwvP106spxgvNHoeQbycgs42RU1t2mpqT7N1iK2hCjqieP3vLn6hsSLXWTAG0L
 3gQbQw6XCLC3hPyLq+nbFY2i4faeCmpXWmixoy/IvQ5calZQrRU0LNlP6lcMBhVo
 uocjG0uGAhrahw2s81jxcMZcxa3AvUCiplapdD5H5v9rBM85SkYJj2Q9SqdSorkb
 xzuimRnKPI5s47yM3pTfZY0qnQUYHV7PXXuw4WujpCQVQdhaG+Ggq63UUZA61J9t
 IzZK2zdcfHqICrGTtXImUzRT3dcc3oq+IFq4tTY+rEJm29hrXkAtx+qBm5xtMvax
 fJz5feJ/iT0u7MDj4Oq24n+Kpl+Olm+MJaZX3m5Ovi/9V6a9iK9HXqxg9/Fs0fMO
 +J/0kTgd8Vu9CYH7KNWz3uztcO9eMAH3VyzuXuab4BGj1i1Y/9EjpALQi7rDN73S
 OsYQX6NnzMkBV4dvElJVLXiPlvNlMHZZwdak5CqPb48jaJu6iiIZAuvOrG6/naGP
 wnQSLVA2WWWoOkl3AJhxfpa11CLhbMl9E2gYm1VtNvASXoSFIxlAq1Yv3sG8yjty
 4ZT0rYFJOstYiQ==
 =3dL5
 -----END PGP SIGNATURE-----

Merge tag 'riscv-for-linus-6.9-mw2' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux

Pull RISC-V updates from Palmer Dabbelt:

 - Support for various vector-accelerated crypto routines

 - Hibernation is now enabled for portable kernel builds

 - mmap_rnd_bits_max is larger on systems with larger VAs

 - Support for fast GUP

 - Support for membarrier-based instruction cache synchronization

 - Support for the Andes hart-level interrupt controller and PMU

 - Some cleanups around unaligned access speed probing and Kconfig
   settings

 - Support for ACPI LPI and CPPC

 - Various cleanus related to barriers

 - A handful of fixes

* tag 'riscv-for-linus-6.9-mw2' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux: (66 commits)
  riscv: Fix syscall wrapper for >word-size arguments
  crypto: riscv - add vector crypto accelerated AES-CBC-CTS
  crypto: riscv - parallelize AES-CBC decryption
  riscv: Only flush the mm icache when setting an exec pte
  riscv: Use kcalloc() instead of kzalloc()
  riscv/barrier: Add missing space after ','
  riscv/barrier: Consolidate fence definitions
  riscv/barrier: Define RISCV_FULL_BARRIER
  riscv/barrier: Define __{mb,rmb,wmb}
  RISC-V: defconfig: Enable CONFIG_ACPI_CPPC_CPUFREQ
  cpufreq: Move CPPC configs to common Kconfig and add RISC-V
  ACPI: RISC-V: Add CPPC driver
  ACPI: Enable ACPI_PROCESSOR for RISC-V
  ACPI: RISC-V: Add LPI driver
  cpuidle: RISC-V: Move few functions to arch/riscv
  riscv: Introduce set_compat_task() in asm/compat.h
  riscv: Introduce is_compat_thread() into compat.h
  riscv: add compile-time test into is_compat_task()
  riscv: Replace direct thread flag check with is_compat_task()
  riscv: Improve arch_get_mmap_end() macro
  ...
2024-03-22 10:41:13 -07:00

326 lines
8.4 KiB
C

// SPDX-License-Identifier: GPL-2.0
/*
* Checksum library
*
* Influenced by arch/arm64/lib/csum.c
* Copyright (C) 2023-2024 Rivos Inc.
*/
#include <linux/bitops.h>
#include <linux/compiler.h>
#include <linux/jump_label.h>
#include <linux/kasan-checks.h>
#include <linux/kernel.h>
#include <asm/cpufeature.h>
#include <net/checksum.h>
/* Default version is sufficient for 32 bit */
#ifndef CONFIG_32BIT
__sum16 csum_ipv6_magic(const struct in6_addr *saddr,
const struct in6_addr *daddr,
__u32 len, __u8 proto, __wsum csum)
{
unsigned int ulen, uproto;
unsigned long sum = (__force unsigned long)csum;
sum += (__force unsigned long)saddr->s6_addr32[0];
sum += (__force unsigned long)saddr->s6_addr32[1];
sum += (__force unsigned long)saddr->s6_addr32[2];
sum += (__force unsigned long)saddr->s6_addr32[3];
sum += (__force unsigned long)daddr->s6_addr32[0];
sum += (__force unsigned long)daddr->s6_addr32[1];
sum += (__force unsigned long)daddr->s6_addr32[2];
sum += (__force unsigned long)daddr->s6_addr32[3];
ulen = (__force unsigned int)htonl((unsigned int)len);
sum += ulen;
uproto = (__force unsigned int)htonl(proto);
sum += uproto;
/*
* Zbb support saves 4 instructions, so not worth checking without
* alternatives if supported
*/
if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
unsigned long fold_temp;
/*
* Zbb is likely available when the kernel is compiled with Zbb
* support, so nop when Zbb is available and jump when Zbb is
* not available.
*/
asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
RISCV_ISA_EXT_ZBB, 1)
:
:
:
: no_zbb);
asm(".option push \n\
.option arch,+zbb \n\
rori %[fold_temp], %[sum], 32 \n\
add %[sum], %[fold_temp], %[sum] \n\
srli %[sum], %[sum], 32 \n\
not %[fold_temp], %[sum] \n\
roriw %[sum], %[sum], 16 \n\
subw %[sum], %[fold_temp], %[sum] \n\
.option pop"
: [sum] "+r" (sum), [fold_temp] "=&r" (fold_temp));
return (__force __sum16)(sum >> 16);
}
no_zbb:
sum += ror64(sum, 32);
sum >>= 32;
return csum_fold((__force __wsum)sum);
}
EXPORT_SYMBOL(csum_ipv6_magic);
#endif /* !CONFIG_32BIT */
#ifdef CONFIG_32BIT
#define OFFSET_MASK 3
#elif CONFIG_64BIT
#define OFFSET_MASK 7
#endif
static inline __no_sanitize_address unsigned long
do_csum_common(const unsigned long *ptr, const unsigned long *end,
unsigned long data)
{
unsigned int shift;
unsigned long csum = 0, carry = 0;
/*
* Do 32-bit reads on RV32 and 64-bit reads otherwise. This should be
* faster than doing 32-bit reads on architectures that support larger
* reads.
*/
while (ptr < end) {
csum += data;
carry += csum < data;
data = *(ptr++);
}
/*
* Perform alignment (and over-read) bytes on the tail if any bytes
* leftover.
*/
shift = ((long)ptr - (long)end) * 8;
#ifdef __LITTLE_ENDIAN
data = (data << shift) >> shift;
#else
data = (data >> shift) << shift;
#endif
csum += data;
carry += csum < data;
csum += carry;
csum += csum < carry;
return csum;
}
/*
* Algorithm accounts for buff being misaligned.
* If buff is not aligned, will over-read bytes but not use the bytes that it
* shouldn't. The same thing will occur on the tail-end of the read.
*/
static inline __no_sanitize_address unsigned int
do_csum_with_alignment(const unsigned char *buff, int len)
{
unsigned int offset, shift;
unsigned long csum, data;
const unsigned long *ptr, *end;
/*
* Align address to closest word (double word on rv64) that comes before
* buff. This should always be in the same page and cache line.
* Directly call KASAN with the alignment we will be using.
*/
offset = (unsigned long)buff & OFFSET_MASK;
kasan_check_read(buff, len);
ptr = (const unsigned long *)(buff - offset);
/*
* Clear the most significant bytes that were over-read if buff was not
* aligned.
*/
shift = offset * 8;
data = *(ptr++);
#ifdef __LITTLE_ENDIAN
data = (data >> shift) << shift;
#else
data = (data << shift) >> shift;
#endif
end = (const unsigned long *)(buff + len);
csum = do_csum_common(ptr, end, data);
#ifdef CC_HAS_ASM_GOTO_TIED_OUTPUT
/*
* Zbb support saves 6 instructions, so not worth checking without
* alternatives if supported
*/
if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
unsigned long fold_temp;
/*
* Zbb is likely available when the kernel is compiled with Zbb
* support, so nop when Zbb is available and jump when Zbb is
* not available.
*/
asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
RISCV_ISA_EXT_ZBB, 1)
:
:
:
: no_zbb);
#ifdef CONFIG_32BIT
asm_goto_output(".option push \n\
.option arch,+zbb \n\
rori %[fold_temp], %[csum], 16 \n\
andi %[offset], %[offset], 1 \n\
add %[csum], %[fold_temp], %[csum] \n\
beq %[offset], zero, %l[end] \n\
rev8 %[csum], %[csum] \n\
.option pop"
: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
: [offset] "r" (offset)
:
: end);
return (unsigned short)csum;
#else /* !CONFIG_32BIT */
asm_goto_output(".option push \n\
.option arch,+zbb \n\
rori %[fold_temp], %[csum], 32 \n\
add %[csum], %[fold_temp], %[csum] \n\
srli %[csum], %[csum], 32 \n\
roriw %[fold_temp], %[csum], 16 \n\
addw %[csum], %[fold_temp], %[csum] \n\
andi %[offset], %[offset], 1 \n\
beq %[offset], zero, %l[end] \n\
rev8 %[csum], %[csum] \n\
.option pop"
: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
: [offset] "r" (offset)
:
: end);
return (csum << 16) >> 48;
#endif /* !CONFIG_32BIT */
end:
return csum >> 16;
}
no_zbb:
#endif /* CC_HAS_ASM_GOTO_TIED_OUTPUT */
#ifndef CONFIG_32BIT
csum += ror64(csum, 32);
csum >>= 32;
#endif
csum = (u32)csum + ror32((u32)csum, 16);
if (offset & 1)
return (u16)swab32(csum);
return csum >> 16;
}
/*
* Does not perform alignment, should only be used if machine has fast
* misaligned accesses, or when buff is known to be aligned.
*/
static inline __no_sanitize_address unsigned int
do_csum_no_alignment(const unsigned char *buff, int len)
{
unsigned long csum, data;
const unsigned long *ptr, *end;
ptr = (const unsigned long *)(buff);
data = *(ptr++);
kasan_check_read(buff, len);
end = (const unsigned long *)(buff + len);
csum = do_csum_common(ptr, end, data);
/*
* Zbb support saves 6 instructions, so not worth checking without
* alternatives if supported
*/
if (IS_ENABLED(CONFIG_RISCV_ISA_ZBB) &&
IS_ENABLED(CONFIG_RISCV_ALTERNATIVE)) {
unsigned long fold_temp;
/*
* Zbb is likely available when the kernel is compiled with Zbb
* support, so nop when Zbb is available and jump when Zbb is
* not available.
*/
asm goto(ALTERNATIVE("j %l[no_zbb]", "nop", 0,
RISCV_ISA_EXT_ZBB, 1)
:
:
:
: no_zbb);
#ifdef CONFIG_32BIT
asm (".option push \n\
.option arch,+zbb \n\
rori %[fold_temp], %[csum], 16 \n\
add %[csum], %[fold_temp], %[csum] \n\
.option pop"
: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
:
: );
#else /* !CONFIG_32BIT */
asm (".option push \n\
.option arch,+zbb \n\
rori %[fold_temp], %[csum], 32 \n\
add %[csum], %[fold_temp], %[csum] \n\
srli %[csum], %[csum], 32 \n\
roriw %[fold_temp], %[csum], 16 \n\
addw %[csum], %[fold_temp], %[csum] \n\
.option pop"
: [csum] "+r" (csum), [fold_temp] "=&r" (fold_temp)
:
: );
#endif /* !CONFIG_32BIT */
return csum >> 16;
}
no_zbb:
#ifndef CONFIG_32BIT
csum += ror64(csum, 32);
csum >>= 32;
#endif
csum = (u32)csum + ror32((u32)csum, 16);
return csum >> 16;
}
/*
* Perform a checksum on an arbitrary memory address.
* Will do a light-weight address alignment if buff is misaligned, unless
* cpu supports fast misaligned accesses.
*/
unsigned int do_csum(const unsigned char *buff, int len)
{
if (unlikely(len <= 0))
return 0;
/*
* Significant performance gains can be seen by not doing alignment
* on machines with fast misaligned accesses.
*
* There is some duplicate code between the "with_alignment" and
* "no_alignment" implmentations, but the overlap is too awkward to be
* able to fit in one function without introducing multiple static
* branches. The largest chunk of overlap was delegated into the
* do_csum_common function.
*/
if (has_fast_unaligned_accesses() || (((unsigned long)buff & OFFSET_MASK) == 0))
return do_csum_no_alignment(buff, len);
return do_csum_with_alignment(buff, len);
}