2022-02-08 00:23:20 +08:00
|
|
|
/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
|
|
|
|
/*
|
|
|
|
* string function definitions for NOLIBC
|
|
|
|
* Copyright (C) 2017-2021 Willy Tarreau <w@1wt.eu>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _NOLIBC_STRING_H
|
|
|
|
#define _NOLIBC_STRING_H
|
|
|
|
|
|
|
|
#include "std.h"
|
|
|
|
|
2022-03-29 18:17:37 +08:00
|
|
|
static void *malloc(size_t len);
|
|
|
|
|
2022-02-08 00:23:20 +08:00
|
|
|
/*
|
|
|
|
* As much as possible, please keep functions alphabetically sorted.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static __attribute__((unused))
|
|
|
|
int memcmp(const void *s1, const void *s2, size_t n)
|
|
|
|
{
|
|
|
|
size_t ofs = 0;
|
tools/nolibc/string: Fix memcmp() implementation
The C standard says that memcmp() must treat the buffers as consisting
of "unsigned chars". If char happens to be unsigned, the casts are ok,
but then obviously the c1 variable can never contain a negative
value. And when char is signed, the casts are wrong, and there's still
a problem with using an 8-bit quantity to hold the difference, because
that can range from -255 to +255.
For example, assuming char is signed, comparing two 1-byte buffers,
one containing 0x00 and another 0x80, the current implementation would
return -128 for both memcmp(a, b, 1) and memcmp(b, a, 1), whereas one
of those should of course return something positive.
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Fixes: 66b6f755ad45 ("rcutorture: Import a copy of nolibc")
Cc: stable@vger.kernel.org # v5.0+
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
2022-10-21 14:01:53 +08:00
|
|
|
int c1 = 0;
|
2022-02-08 00:23:20 +08:00
|
|
|
|
tools/nolibc/string: Fix memcmp() implementation
The C standard says that memcmp() must treat the buffers as consisting
of "unsigned chars". If char happens to be unsigned, the casts are ok,
but then obviously the c1 variable can never contain a negative
value. And when char is signed, the casts are wrong, and there's still
a problem with using an 8-bit quantity to hold the difference, because
that can range from -255 to +255.
For example, assuming char is signed, comparing two 1-byte buffers,
one containing 0x00 and another 0x80, the current implementation would
return -128 for both memcmp(a, b, 1) and memcmp(b, a, 1), whereas one
of those should of course return something positive.
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Fixes: 66b6f755ad45 ("rcutorture: Import a copy of nolibc")
Cc: stable@vger.kernel.org # v5.0+
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
2022-10-21 14:01:53 +08:00
|
|
|
while (ofs < n && !(c1 = ((unsigned char *)s1)[ofs] - ((unsigned char *)s2)[ofs])) {
|
2022-02-08 00:23:20 +08:00
|
|
|
ofs++;
|
|
|
|
}
|
|
|
|
return c1;
|
|
|
|
}
|
|
|
|
|
tools/nolibc: x86-64: Use `rep movsb` for `memcpy()` and `memmove()`
Simplify memcpy() and memmove() on the x86-64 arch.
The x86-64 arch has a 'rep movsb' instruction, which can perform
memcpy() using only a single instruction, given:
%rdi = destination
%rsi = source
%rcx = length
Additionally, it can also handle the overlapping case by setting DF=1
(backward copy), which can be used as the memmove() implementation.
Before this patch:
```
00000000000010ab <memmove>:
10ab: 48 89 f8 mov %rdi,%rax
10ae: 31 c9 xor %ecx,%ecx
10b0: 48 39 f7 cmp %rsi,%rdi
10b3: 48 83 d1 ff adc $0xffffffffffffffff,%rcx
10b7: 48 85 d2 test %rdx,%rdx
10ba: 74 25 je 10e1 <memmove+0x36>
10bc: 48 83 c9 01 or $0x1,%rcx
10c0: 48 39 f0 cmp %rsi,%rax
10c3: 48 c7 c7 ff ff ff ff mov $0xffffffffffffffff,%rdi
10ca: 48 0f 43 fa cmovae %rdx,%rdi
10ce: 48 01 cf add %rcx,%rdi
10d1: 44 8a 04 3e mov (%rsi,%rdi,1),%r8b
10d5: 44 88 04 38 mov %r8b,(%rax,%rdi,1)
10d9: 48 01 cf add %rcx,%rdi
10dc: 48 ff ca dec %rdx
10df: 75 f0 jne 10d1 <memmove+0x26>
10e1: c3 ret
00000000000010e2 <memcpy>:
10e2: 48 89 f8 mov %rdi,%rax
10e5: 48 85 d2 test %rdx,%rdx
10e8: 74 12 je 10fc <memcpy+0x1a>
10ea: 31 c9 xor %ecx,%ecx
10ec: 40 8a 3c 0e mov (%rsi,%rcx,1),%dil
10f0: 40 88 3c 08 mov %dil,(%rax,%rcx,1)
10f4: 48 ff c1 inc %rcx
10f7: 48 39 ca cmp %rcx,%rdx
10fa: 75 f0 jne 10ec <memcpy+0xa>
10fc: c3 ret
```
After this patch:
```
// memmove is an alias for memcpy
000000000040133b <memcpy>:
40133b: 48 89 d1 mov %rdx,%rcx
40133e: 48 89 f8 mov %rdi,%rax
401341: 48 89 fa mov %rdi,%rdx
401344: 48 29 f2 sub %rsi,%rdx
401347: 48 39 ca cmp %rcx,%rdx
40134a: 72 03 jb 40134f <memcpy+0x14>
40134c: f3 a4 rep movsb %ds:(%rsi),%es:(%rdi)
40134e: c3 ret
40134f: 48 8d 7c 0f ff lea -0x1(%rdi,%rcx,1),%rdi
401354: 48 8d 74 0e ff lea -0x1(%rsi,%rcx,1),%rsi
401359: fd std
40135a: f3 a4 rep movsb %ds:(%rsi),%es:(%rdi)
40135c: fc cld
40135d: c3 ret
```
v3:
- Make memmove as an alias for memcpy (Willy).
- Make the forward copy the likely case (Alviro).
v2:
- Fix the broken memmove implementation (David).
Link: https://lore.kernel.org/lkml/20230902062237.GA23141@1wt.eu
Link: https://lore.kernel.org/lkml/5a821292d96a4dbc84c96ccdc6b5b666@AcuMS.aculab.com
Suggested-by: David Laight <David.Laight@aculab.com>
Signed-off-by: Ammar Faizi <ammarfaizi2@gnuweeb.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
2023-09-02 21:35:02 +08:00
|
|
|
#ifndef NOLIBC_ARCH_HAS_MEMMOVE
|
2022-02-08 00:23:47 +08:00
|
|
|
/* might be ignored by the compiler without -ffreestanding, then found as
|
|
|
|
* missing.
|
|
|
|
*/
|
|
|
|
__attribute__((weak,unused,section(".text.nolibc_memmove")))
|
2022-02-08 00:23:20 +08:00
|
|
|
void *memmove(void *dst, const void *src, size_t len)
|
|
|
|
{
|
2022-02-08 00:23:41 +08:00
|
|
|
size_t dir, pos;
|
2022-02-08 00:23:20 +08:00
|
|
|
|
2022-02-08 00:23:41 +08:00
|
|
|
pos = len;
|
|
|
|
dir = -1;
|
|
|
|
|
|
|
|
if (dst < src) {
|
|
|
|
pos = -1;
|
|
|
|
dir = 1;
|
2022-02-08 00:23:20 +08:00
|
|
|
}
|
2022-02-08 00:23:41 +08:00
|
|
|
|
|
|
|
while (len) {
|
|
|
|
pos += dir;
|
|
|
|
((char *)dst)[pos] = ((const char *)src)[pos];
|
|
|
|
len--;
|
|
|
|
}
|
|
|
|
return dst;
|
2022-02-08 00:23:20 +08:00
|
|
|
}
|
tools/nolibc: x86-64: Use `rep movsb` for `memcpy()` and `memmove()`
Simplify memcpy() and memmove() on the x86-64 arch.
The x86-64 arch has a 'rep movsb' instruction, which can perform
memcpy() using only a single instruction, given:
%rdi = destination
%rsi = source
%rcx = length
Additionally, it can also handle the overlapping case by setting DF=1
(backward copy), which can be used as the memmove() implementation.
Before this patch:
```
00000000000010ab <memmove>:
10ab: 48 89 f8 mov %rdi,%rax
10ae: 31 c9 xor %ecx,%ecx
10b0: 48 39 f7 cmp %rsi,%rdi
10b3: 48 83 d1 ff adc $0xffffffffffffffff,%rcx
10b7: 48 85 d2 test %rdx,%rdx
10ba: 74 25 je 10e1 <memmove+0x36>
10bc: 48 83 c9 01 or $0x1,%rcx
10c0: 48 39 f0 cmp %rsi,%rax
10c3: 48 c7 c7 ff ff ff ff mov $0xffffffffffffffff,%rdi
10ca: 48 0f 43 fa cmovae %rdx,%rdi
10ce: 48 01 cf add %rcx,%rdi
10d1: 44 8a 04 3e mov (%rsi,%rdi,1),%r8b
10d5: 44 88 04 38 mov %r8b,(%rax,%rdi,1)
10d9: 48 01 cf add %rcx,%rdi
10dc: 48 ff ca dec %rdx
10df: 75 f0 jne 10d1 <memmove+0x26>
10e1: c3 ret
00000000000010e2 <memcpy>:
10e2: 48 89 f8 mov %rdi,%rax
10e5: 48 85 d2 test %rdx,%rdx
10e8: 74 12 je 10fc <memcpy+0x1a>
10ea: 31 c9 xor %ecx,%ecx
10ec: 40 8a 3c 0e mov (%rsi,%rcx,1),%dil
10f0: 40 88 3c 08 mov %dil,(%rax,%rcx,1)
10f4: 48 ff c1 inc %rcx
10f7: 48 39 ca cmp %rcx,%rdx
10fa: 75 f0 jne 10ec <memcpy+0xa>
10fc: c3 ret
```
After this patch:
```
// memmove is an alias for memcpy
000000000040133b <memcpy>:
40133b: 48 89 d1 mov %rdx,%rcx
40133e: 48 89 f8 mov %rdi,%rax
401341: 48 89 fa mov %rdi,%rdx
401344: 48 29 f2 sub %rsi,%rdx
401347: 48 39 ca cmp %rcx,%rdx
40134a: 72 03 jb 40134f <memcpy+0x14>
40134c: f3 a4 rep movsb %ds:(%rsi),%es:(%rdi)
40134e: c3 ret
40134f: 48 8d 7c 0f ff lea -0x1(%rdi,%rcx,1),%rdi
401354: 48 8d 74 0e ff lea -0x1(%rsi,%rcx,1),%rsi
401359: fd std
40135a: f3 a4 rep movsb %ds:(%rsi),%es:(%rdi)
40135c: fc cld
40135d: c3 ret
```
v3:
- Make memmove as an alias for memcpy (Willy).
- Make the forward copy the likely case (Alviro).
v2:
- Fix the broken memmove implementation (David).
Link: https://lore.kernel.org/lkml/20230902062237.GA23141@1wt.eu
Link: https://lore.kernel.org/lkml/5a821292d96a4dbc84c96ccdc6b5b666@AcuMS.aculab.com
Suggested-by: David Laight <David.Laight@aculab.com>
Signed-off-by: Ammar Faizi <ammarfaizi2@gnuweeb.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
2023-09-02 21:35:02 +08:00
|
|
|
#endif /* #ifndef NOLIBC_ARCH_HAS_MEMMOVE */
|
2022-02-08 00:23:20 +08:00
|
|
|
|
tools/nolibc: x86-64: Use `rep movsb` for `memcpy()` and `memmove()`
Simplify memcpy() and memmove() on the x86-64 arch.
The x86-64 arch has a 'rep movsb' instruction, which can perform
memcpy() using only a single instruction, given:
%rdi = destination
%rsi = source
%rcx = length
Additionally, it can also handle the overlapping case by setting DF=1
(backward copy), which can be used as the memmove() implementation.
Before this patch:
```
00000000000010ab <memmove>:
10ab: 48 89 f8 mov %rdi,%rax
10ae: 31 c9 xor %ecx,%ecx
10b0: 48 39 f7 cmp %rsi,%rdi
10b3: 48 83 d1 ff adc $0xffffffffffffffff,%rcx
10b7: 48 85 d2 test %rdx,%rdx
10ba: 74 25 je 10e1 <memmove+0x36>
10bc: 48 83 c9 01 or $0x1,%rcx
10c0: 48 39 f0 cmp %rsi,%rax
10c3: 48 c7 c7 ff ff ff ff mov $0xffffffffffffffff,%rdi
10ca: 48 0f 43 fa cmovae %rdx,%rdi
10ce: 48 01 cf add %rcx,%rdi
10d1: 44 8a 04 3e mov (%rsi,%rdi,1),%r8b
10d5: 44 88 04 38 mov %r8b,(%rax,%rdi,1)
10d9: 48 01 cf add %rcx,%rdi
10dc: 48 ff ca dec %rdx
10df: 75 f0 jne 10d1 <memmove+0x26>
10e1: c3 ret
00000000000010e2 <memcpy>:
10e2: 48 89 f8 mov %rdi,%rax
10e5: 48 85 d2 test %rdx,%rdx
10e8: 74 12 je 10fc <memcpy+0x1a>
10ea: 31 c9 xor %ecx,%ecx
10ec: 40 8a 3c 0e mov (%rsi,%rcx,1),%dil
10f0: 40 88 3c 08 mov %dil,(%rax,%rcx,1)
10f4: 48 ff c1 inc %rcx
10f7: 48 39 ca cmp %rcx,%rdx
10fa: 75 f0 jne 10ec <memcpy+0xa>
10fc: c3 ret
```
After this patch:
```
// memmove is an alias for memcpy
000000000040133b <memcpy>:
40133b: 48 89 d1 mov %rdx,%rcx
40133e: 48 89 f8 mov %rdi,%rax
401341: 48 89 fa mov %rdi,%rdx
401344: 48 29 f2 sub %rsi,%rdx
401347: 48 39 ca cmp %rcx,%rdx
40134a: 72 03 jb 40134f <memcpy+0x14>
40134c: f3 a4 rep movsb %ds:(%rsi),%es:(%rdi)
40134e: c3 ret
40134f: 48 8d 7c 0f ff lea -0x1(%rdi,%rcx,1),%rdi
401354: 48 8d 74 0e ff lea -0x1(%rsi,%rcx,1),%rsi
401359: fd std
40135a: f3 a4 rep movsb %ds:(%rsi),%es:(%rdi)
40135c: fc cld
40135d: c3 ret
```
v3:
- Make memmove as an alias for memcpy (Willy).
- Make the forward copy the likely case (Alviro).
v2:
- Fix the broken memmove implementation (David).
Link: https://lore.kernel.org/lkml/20230902062237.GA23141@1wt.eu
Link: https://lore.kernel.org/lkml/5a821292d96a4dbc84c96ccdc6b5b666@AcuMS.aculab.com
Suggested-by: David Laight <David.Laight@aculab.com>
Signed-off-by: Ammar Faizi <ammarfaizi2@gnuweeb.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
2023-09-02 21:35:02 +08:00
|
|
|
#ifndef NOLIBC_ARCH_HAS_MEMCPY
|
2022-02-08 00:23:20 +08:00
|
|
|
/* must be exported, as it's used by libgcc on ARM */
|
2022-02-08 00:23:44 +08:00
|
|
|
__attribute__((weak,unused,section(".text.nolibc_memcpy")))
|
2022-02-08 00:23:20 +08:00
|
|
|
void *memcpy(void *dst, const void *src, size_t len)
|
|
|
|
{
|
2023-09-02 21:35:05 +08:00
|
|
|
size_t pos = 0;
|
|
|
|
|
|
|
|
while (pos < len) {
|
|
|
|
((char *)dst)[pos] = ((const char *)src)[pos];
|
|
|
|
pos++;
|
|
|
|
}
|
|
|
|
return dst;
|
2022-02-08 00:23:20 +08:00
|
|
|
}
|
tools/nolibc: x86-64: Use `rep movsb` for `memcpy()` and `memmove()`
Simplify memcpy() and memmove() on the x86-64 arch.
The x86-64 arch has a 'rep movsb' instruction, which can perform
memcpy() using only a single instruction, given:
%rdi = destination
%rsi = source
%rcx = length
Additionally, it can also handle the overlapping case by setting DF=1
(backward copy), which can be used as the memmove() implementation.
Before this patch:
```
00000000000010ab <memmove>:
10ab: 48 89 f8 mov %rdi,%rax
10ae: 31 c9 xor %ecx,%ecx
10b0: 48 39 f7 cmp %rsi,%rdi
10b3: 48 83 d1 ff adc $0xffffffffffffffff,%rcx
10b7: 48 85 d2 test %rdx,%rdx
10ba: 74 25 je 10e1 <memmove+0x36>
10bc: 48 83 c9 01 or $0x1,%rcx
10c0: 48 39 f0 cmp %rsi,%rax
10c3: 48 c7 c7 ff ff ff ff mov $0xffffffffffffffff,%rdi
10ca: 48 0f 43 fa cmovae %rdx,%rdi
10ce: 48 01 cf add %rcx,%rdi
10d1: 44 8a 04 3e mov (%rsi,%rdi,1),%r8b
10d5: 44 88 04 38 mov %r8b,(%rax,%rdi,1)
10d9: 48 01 cf add %rcx,%rdi
10dc: 48 ff ca dec %rdx
10df: 75 f0 jne 10d1 <memmove+0x26>
10e1: c3 ret
00000000000010e2 <memcpy>:
10e2: 48 89 f8 mov %rdi,%rax
10e5: 48 85 d2 test %rdx,%rdx
10e8: 74 12 je 10fc <memcpy+0x1a>
10ea: 31 c9 xor %ecx,%ecx
10ec: 40 8a 3c 0e mov (%rsi,%rcx,1),%dil
10f0: 40 88 3c 08 mov %dil,(%rax,%rcx,1)
10f4: 48 ff c1 inc %rcx
10f7: 48 39 ca cmp %rcx,%rdx
10fa: 75 f0 jne 10ec <memcpy+0xa>
10fc: c3 ret
```
After this patch:
```
// memmove is an alias for memcpy
000000000040133b <memcpy>:
40133b: 48 89 d1 mov %rdx,%rcx
40133e: 48 89 f8 mov %rdi,%rax
401341: 48 89 fa mov %rdi,%rdx
401344: 48 29 f2 sub %rsi,%rdx
401347: 48 39 ca cmp %rcx,%rdx
40134a: 72 03 jb 40134f <memcpy+0x14>
40134c: f3 a4 rep movsb %ds:(%rsi),%es:(%rdi)
40134e: c3 ret
40134f: 48 8d 7c 0f ff lea -0x1(%rdi,%rcx,1),%rdi
401354: 48 8d 74 0e ff lea -0x1(%rsi,%rcx,1),%rsi
401359: fd std
40135a: f3 a4 rep movsb %ds:(%rsi),%es:(%rdi)
40135c: fc cld
40135d: c3 ret
```
v3:
- Make memmove as an alias for memcpy (Willy).
- Make the forward copy the likely case (Alviro).
v2:
- Fix the broken memmove implementation (David).
Link: https://lore.kernel.org/lkml/20230902062237.GA23141@1wt.eu
Link: https://lore.kernel.org/lkml/5a821292d96a4dbc84c96ccdc6b5b666@AcuMS.aculab.com
Suggested-by: David Laight <David.Laight@aculab.com>
Signed-off-by: Ammar Faizi <ammarfaizi2@gnuweeb.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
2023-09-02 21:35:02 +08:00
|
|
|
#endif /* #ifndef NOLIBC_ARCH_HAS_MEMCPY */
|
2022-02-08 00:23:20 +08:00
|
|
|
|
tools/nolibc: x86-64: Use `rep stosb` for `memset()`
Simplify memset() on the x86-64 arch.
The x86-64 arch has a 'rep stosb' instruction, which can perform
memset() using only a single instruction, given:
%al = value (just like the second argument of memset())
%rdi = destination
%rcx = length
Before this patch:
```
00000000000010c9 <memset>:
10c9: 48 89 f8 mov %rdi,%rax
10cc: 48 85 d2 test %rdx,%rdx
10cf: 74 0e je 10df <memset+0x16>
10d1: 31 c9 xor %ecx,%ecx
10d3: 40 88 34 08 mov %sil,(%rax,%rcx,1)
10d7: 48 ff c1 inc %rcx
10da: 48 39 ca cmp %rcx,%rdx
10dd: 75 f4 jne 10d3 <memset+0xa>
10df: c3 ret
```
After this patch:
```
0000000000001511 <memset>:
1511: 96 xchg %eax,%esi
1512: 48 89 d1 mov %rdx,%rcx
1515: 57 push %rdi
1516: f3 aa rep stos %al,%es:(%rdi)
1518: 58 pop %rax
1519: c3 ret
```
v2:
- Use pushq %rdi / popq %rax (Alviro).
- Use xchg %eax, %esi (Willy).
Link: https://lore.kernel.org/lkml/ZO9e6h2jjVIMpBJP@1wt.eu
Suggested-by: Alviro Iskandar Setiawan <alviro.iskandar@gnuweeb.org>
Suggested-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Ammar Faizi <ammarfaizi2@gnuweeb.org>
Reviewed-by: Alviro Iskandar Setiawan <alviro.iskandar@gnuweeb.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
2023-09-02 21:35:03 +08:00
|
|
|
#ifndef NOLIBC_ARCH_HAS_MEMSET
|
2022-02-08 00:23:47 +08:00
|
|
|
/* might be ignored by the compiler without -ffreestanding, then found as
|
|
|
|
* missing.
|
|
|
|
*/
|
|
|
|
__attribute__((weak,unused,section(".text.nolibc_memset")))
|
2022-02-08 00:23:20 +08:00
|
|
|
void *memset(void *dst, int b, size_t len)
|
|
|
|
{
|
|
|
|
char *p = dst;
|
|
|
|
|
tools/nolibc: prevent gcc from making memset() loop over itself
When building on ARM in thumb mode with gcc-11.3 at -O2 or -O3,
nolibc-test segfaults during the select() tests. It turns out that at
this level, gcc recognizes an opportunity for using memset() to zero
the fd_set, but it miscompiles it because it also recognizes a memset
pattern as well, and decides to call memset() from the memset() code:
000122bc <memset>:
122bc: b510 push {r4, lr}
122be: 0004 movs r4, r0
122c0: 2a00 cmp r2, #0
122c2: d003 beq.n 122cc <memset+0x10>
122c4: 23ff movs r3, #255 ; 0xff
122c6: 4019 ands r1, r3
122c8: f7ff fff8 bl 122bc <memset>
122cc: 0020 movs r0, r4
122ce: bd10 pop {r4, pc}
Simply placing an empty asm() statement inside the loop suffices to
avoid this.
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
2023-01-09 15:54:41 +08:00
|
|
|
while (len--) {
|
|
|
|
/* prevent gcc from recognizing memset() here */
|
2023-04-07 05:54:47 +08:00
|
|
|
__asm__ volatile("");
|
2022-02-08 00:23:20 +08:00
|
|
|
*(p++) = b;
|
tools/nolibc: prevent gcc from making memset() loop over itself
When building on ARM in thumb mode with gcc-11.3 at -O2 or -O3,
nolibc-test segfaults during the select() tests. It turns out that at
this level, gcc recognizes an opportunity for using memset() to zero
the fd_set, but it miscompiles it because it also recognizes a memset
pattern as well, and decides to call memset() from the memset() code:
000122bc <memset>:
122bc: b510 push {r4, lr}
122be: 0004 movs r4, r0
122c0: 2a00 cmp r2, #0
122c2: d003 beq.n 122cc <memset+0x10>
122c4: 23ff movs r3, #255 ; 0xff
122c6: 4019 ands r1, r3
122c8: f7ff fff8 bl 122bc <memset>
122cc: 0020 movs r0, r4
122ce: bd10 pop {r4, pc}
Simply placing an empty asm() statement inside the loop suffices to
avoid this.
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
2023-01-09 15:54:41 +08:00
|
|
|
}
|
2022-02-08 00:23:20 +08:00
|
|
|
return dst;
|
|
|
|
}
|
tools/nolibc: x86-64: Use `rep stosb` for `memset()`
Simplify memset() on the x86-64 arch.
The x86-64 arch has a 'rep stosb' instruction, which can perform
memset() using only a single instruction, given:
%al = value (just like the second argument of memset())
%rdi = destination
%rcx = length
Before this patch:
```
00000000000010c9 <memset>:
10c9: 48 89 f8 mov %rdi,%rax
10cc: 48 85 d2 test %rdx,%rdx
10cf: 74 0e je 10df <memset+0x16>
10d1: 31 c9 xor %ecx,%ecx
10d3: 40 88 34 08 mov %sil,(%rax,%rcx,1)
10d7: 48 ff c1 inc %rcx
10da: 48 39 ca cmp %rcx,%rdx
10dd: 75 f4 jne 10d3 <memset+0xa>
10df: c3 ret
```
After this patch:
```
0000000000001511 <memset>:
1511: 96 xchg %eax,%esi
1512: 48 89 d1 mov %rdx,%rcx
1515: 57 push %rdi
1516: f3 aa rep stos %al,%es:(%rdi)
1518: 58 pop %rax
1519: c3 ret
```
v2:
- Use pushq %rdi / popq %rax (Alviro).
- Use xchg %eax, %esi (Willy).
Link: https://lore.kernel.org/lkml/ZO9e6h2jjVIMpBJP@1wt.eu
Suggested-by: Alviro Iskandar Setiawan <alviro.iskandar@gnuweeb.org>
Suggested-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Ammar Faizi <ammarfaizi2@gnuweeb.org>
Reviewed-by: Alviro Iskandar Setiawan <alviro.iskandar@gnuweeb.org>
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
2023-09-02 21:35:03 +08:00
|
|
|
#endif /* #ifndef NOLIBC_ARCH_HAS_MEMSET */
|
2022-02-08 00:23:20 +08:00
|
|
|
|
|
|
|
static __attribute__((unused))
|
|
|
|
char *strchr(const char *s, int c)
|
|
|
|
{
|
|
|
|
while (*s) {
|
|
|
|
if (*s == (char)c)
|
|
|
|
return (char *)s;
|
|
|
|
s++;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2022-03-22 01:33:10 +08:00
|
|
|
static __attribute__((unused))
|
|
|
|
int strcmp(const char *a, const char *b)
|
|
|
|
{
|
|
|
|
unsigned int c;
|
|
|
|
int diff;
|
|
|
|
|
|
|
|
while (!(diff = (unsigned char)*a++ - (c = (unsigned char)*b++)) && c)
|
|
|
|
;
|
|
|
|
return diff;
|
|
|
|
}
|
|
|
|
|
2022-02-08 00:23:20 +08:00
|
|
|
static __attribute__((unused))
|
|
|
|
char *strcpy(char *dst, const char *src)
|
|
|
|
{
|
|
|
|
char *ret = dst;
|
|
|
|
|
|
|
|
while ((*dst++ = *src++));
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2022-03-23 15:18:06 +08:00
|
|
|
/* this function is only used with arguments that are not constants or when
|
tools/nolibc: Fix missing strlen() definition and infinite loop with gcc-12
When built at -Os, gcc-12 recognizes an strlen() pattern in nolibc_strlen()
and replaces it with a jump to strlen(), which is not defined as a symbol
and breaks compilation. Worse, when the function is called strlen(), the
function is simply replaced with a jump to itself, hence becomes an
infinite loop.
One way to avoid this is to always set -ffreestanding, but the calling
code doesn't know this and there's no way (either via attributes or
pragmas) to globally enable it from include files, effectively leaving
a painful situation for the caller.
Alexey suggested to place an empty asm() statement inside the loop to
stop gcc from recognizing a well-known pattern, which happens to work
pretty fine. At least it allows us to make sure our local definition
is not replaced with a self jump.
The function only needs to be renamed back to strlen() so that the symbol
exists, which implies that nolibc_strlen() which is used on variable
strings has to be declared as a macro that points back to it before the
strlen() macro is redifined.
It was verified to produce valid code with gcc 3.4 to 12.1 at different
optimization levels, and both with constant and variable strings.
In case this problem surfaces again in the future, an alternate approach
consisting in adding an optimize("no-tree-loop-distribute-patterns")
function attribute for gcc>=12 worked as well but is less pretty.
Reported-by: kernel test robot <yujie.liu@intel.com>
Link: https://lore.kernel.org/r/202210081618.754a77db-yujie.liu@intel.com
Fixes: 66b6f755ad45 ("rcutorture: Import a copy of nolibc")
Fixes: 96980b833a21 ("tools/nolibc/string: do not use __builtin_strlen() at -O0")
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
2022-10-10 02:29:36 +08:00
|
|
|
* it's not known because optimizations are disabled. Note that gcc 12
|
|
|
|
* recognizes an strlen() pattern and replaces it with a jump to strlen(),
|
|
|
|
* thus itself, hence the asm() statement below that's meant to disable this
|
|
|
|
* confusing practice.
|
2022-03-23 15:18:06 +08:00
|
|
|
*/
|
2024-02-19 03:51:03 +08:00
|
|
|
__attribute__((weak,unused,section(".text.nolibc_strlen")))
|
tools/nolibc: Fix missing strlen() definition and infinite loop with gcc-12
When built at -Os, gcc-12 recognizes an strlen() pattern in nolibc_strlen()
and replaces it with a jump to strlen(), which is not defined as a symbol
and breaks compilation. Worse, when the function is called strlen(), the
function is simply replaced with a jump to itself, hence becomes an
infinite loop.
One way to avoid this is to always set -ffreestanding, but the calling
code doesn't know this and there's no way (either via attributes or
pragmas) to globally enable it from include files, effectively leaving
a painful situation for the caller.
Alexey suggested to place an empty asm() statement inside the loop to
stop gcc from recognizing a well-known pattern, which happens to work
pretty fine. At least it allows us to make sure our local definition
is not replaced with a self jump.
The function only needs to be renamed back to strlen() so that the symbol
exists, which implies that nolibc_strlen() which is used on variable
strings has to be declared as a macro that points back to it before the
strlen() macro is redifined.
It was verified to produce valid code with gcc 3.4 to 12.1 at different
optimization levels, and both with constant and variable strings.
In case this problem surfaces again in the future, an alternate approach
consisting in adding an optimize("no-tree-loop-distribute-patterns")
function attribute for gcc>=12 worked as well but is less pretty.
Reported-by: kernel test robot <yujie.liu@intel.com>
Link: https://lore.kernel.org/r/202210081618.754a77db-yujie.liu@intel.com
Fixes: 66b6f755ad45 ("rcutorture: Import a copy of nolibc")
Fixes: 96980b833a21 ("tools/nolibc/string: do not use __builtin_strlen() at -O0")
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
2022-10-10 02:29:36 +08:00
|
|
|
size_t strlen(const char *str)
|
2022-02-08 00:23:20 +08:00
|
|
|
{
|
|
|
|
size_t len;
|
|
|
|
|
tools/nolibc: Fix missing strlen() definition and infinite loop with gcc-12
When built at -Os, gcc-12 recognizes an strlen() pattern in nolibc_strlen()
and replaces it with a jump to strlen(), which is not defined as a symbol
and breaks compilation. Worse, when the function is called strlen(), the
function is simply replaced with a jump to itself, hence becomes an
infinite loop.
One way to avoid this is to always set -ffreestanding, but the calling
code doesn't know this and there's no way (either via attributes or
pragmas) to globally enable it from include files, effectively leaving
a painful situation for the caller.
Alexey suggested to place an empty asm() statement inside the loop to
stop gcc from recognizing a well-known pattern, which happens to work
pretty fine. At least it allows us to make sure our local definition
is not replaced with a self jump.
The function only needs to be renamed back to strlen() so that the symbol
exists, which implies that nolibc_strlen() which is used on variable
strings has to be declared as a macro that points back to it before the
strlen() macro is redifined.
It was verified to produce valid code with gcc 3.4 to 12.1 at different
optimization levels, and both with constant and variable strings.
In case this problem surfaces again in the future, an alternate approach
consisting in adding an optimize("no-tree-loop-distribute-patterns")
function attribute for gcc>=12 worked as well but is less pretty.
Reported-by: kernel test robot <yujie.liu@intel.com>
Link: https://lore.kernel.org/r/202210081618.754a77db-yujie.liu@intel.com
Fixes: 66b6f755ad45 ("rcutorture: Import a copy of nolibc")
Fixes: 96980b833a21 ("tools/nolibc/string: do not use __builtin_strlen() at -O0")
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
2022-10-10 02:29:36 +08:00
|
|
|
for (len = 0; str[len]; len++)
|
2023-04-07 05:54:47 +08:00
|
|
|
__asm__("");
|
2022-02-08 00:23:20 +08:00
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
2022-03-23 15:18:06 +08:00
|
|
|
/* do not trust __builtin_constant_p() at -O0, as clang will emit a test and
|
|
|
|
* the two branches, then will rely on an external definition of strlen().
|
|
|
|
*/
|
|
|
|
#if defined(__OPTIMIZE__)
|
tools/nolibc: Fix missing strlen() definition and infinite loop with gcc-12
When built at -Os, gcc-12 recognizes an strlen() pattern in nolibc_strlen()
and replaces it with a jump to strlen(), which is not defined as a symbol
and breaks compilation. Worse, when the function is called strlen(), the
function is simply replaced with a jump to itself, hence becomes an
infinite loop.
One way to avoid this is to always set -ffreestanding, but the calling
code doesn't know this and there's no way (either via attributes or
pragmas) to globally enable it from include files, effectively leaving
a painful situation for the caller.
Alexey suggested to place an empty asm() statement inside the loop to
stop gcc from recognizing a well-known pattern, which happens to work
pretty fine. At least it allows us to make sure our local definition
is not replaced with a self jump.
The function only needs to be renamed back to strlen() so that the symbol
exists, which implies that nolibc_strlen() which is used on variable
strings has to be declared as a macro that points back to it before the
strlen() macro is redifined.
It was verified to produce valid code with gcc 3.4 to 12.1 at different
optimization levels, and both with constant and variable strings.
In case this problem surfaces again in the future, an alternate approach
consisting in adding an optimize("no-tree-loop-distribute-patterns")
function attribute for gcc>=12 worked as well but is less pretty.
Reported-by: kernel test robot <yujie.liu@intel.com>
Link: https://lore.kernel.org/r/202210081618.754a77db-yujie.liu@intel.com
Fixes: 66b6f755ad45 ("rcutorture: Import a copy of nolibc")
Fixes: 96980b833a21 ("tools/nolibc/string: do not use __builtin_strlen() at -O0")
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Willy Tarreau <w@1wt.eu>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
2022-10-10 02:29:36 +08:00
|
|
|
#define nolibc_strlen(x) strlen(x)
|
2022-02-08 00:23:20 +08:00
|
|
|
#define strlen(str) ({ \
|
|
|
|
__builtin_constant_p((str)) ? \
|
|
|
|
__builtin_strlen((str)) : \
|
|
|
|
nolibc_strlen((str)); \
|
|
|
|
})
|
2022-03-23 15:18:06 +08:00
|
|
|
#endif
|
2022-02-08 00:23:20 +08:00
|
|
|
|
2022-03-29 18:17:36 +08:00
|
|
|
static __attribute__((unused))
|
|
|
|
size_t strnlen(const char *str, size_t maxlen)
|
|
|
|
{
|
|
|
|
size_t len;
|
|
|
|
|
|
|
|
for (len = 0; (len < maxlen) && str[len]; len++);
|
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
2022-03-29 18:17:37 +08:00
|
|
|
static __attribute__((unused))
|
|
|
|
char *strdup(const char *str)
|
|
|
|
{
|
|
|
|
size_t len;
|
|
|
|
char *ret;
|
|
|
|
|
|
|
|
len = strlen(str);
|
|
|
|
ret = malloc(len + 1);
|
|
|
|
if (__builtin_expect(ret != NULL, 1))
|
|
|
|
memcpy(ret, str, len + 1);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static __attribute__((unused))
|
|
|
|
char *strndup(const char *str, size_t maxlen)
|
|
|
|
{
|
|
|
|
size_t len;
|
|
|
|
char *ret;
|
|
|
|
|
|
|
|
len = strnlen(str, maxlen);
|
|
|
|
ret = malloc(len + 1);
|
|
|
|
if (__builtin_expect(ret != NULL, 1)) {
|
|
|
|
memcpy(ret, str, len);
|
|
|
|
ret[len] = '\0';
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2022-02-08 00:23:43 +08:00
|
|
|
static __attribute__((unused))
|
|
|
|
size_t strlcat(char *dst, const char *src, size_t size)
|
|
|
|
{
|
2024-04-11 05:27:06 +08:00
|
|
|
size_t len = strnlen(dst, size);
|
2022-02-08 00:23:43 +08:00
|
|
|
|
2024-02-19 03:51:04 +08:00
|
|
|
/*
|
|
|
|
* We want len < size-1. But as size is unsigned and can wrap
|
|
|
|
* around, we use len + 1 instead.
|
|
|
|
*/
|
|
|
|
while (len + 1 < size) {
|
|
|
|
dst[len] = *src;
|
|
|
|
if (*src == '\0')
|
2022-02-08 00:23:43 +08:00
|
|
|
break;
|
|
|
|
len++;
|
|
|
|
src++;
|
|
|
|
}
|
|
|
|
|
2024-02-19 03:51:04 +08:00
|
|
|
if (len < size)
|
|
|
|
dst[len] = '\0';
|
|
|
|
|
|
|
|
while (*src++)
|
|
|
|
len++;
|
|
|
|
|
2022-02-08 00:23:43 +08:00
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
2022-02-08 00:23:42 +08:00
|
|
|
static __attribute__((unused))
|
|
|
|
size_t strlcpy(char *dst, const char *src, size_t size)
|
|
|
|
{
|
|
|
|
size_t len;
|
|
|
|
|
2024-02-19 03:51:05 +08:00
|
|
|
for (len = 0; len < size; len++) {
|
|
|
|
dst[len] = src[len];
|
|
|
|
if (!dst[len])
|
|
|
|
return len;
|
2022-02-08 00:23:42 +08:00
|
|
|
}
|
2024-02-19 03:51:05 +08:00
|
|
|
if (size)
|
|
|
|
dst[size-1] = '\0';
|
|
|
|
|
|
|
|
while (src[len])
|
|
|
|
len++;
|
|
|
|
|
2022-02-08 00:23:42 +08:00
|
|
|
return len;
|
|
|
|
}
|
|
|
|
|
2022-02-08 00:23:43 +08:00
|
|
|
static __attribute__((unused))
|
|
|
|
char *strncat(char *dst, const char *src, size_t size)
|
|
|
|
{
|
|
|
|
char *orig = dst;
|
|
|
|
|
|
|
|
while (*dst)
|
|
|
|
dst++;
|
|
|
|
|
|
|
|
while (size && (*dst = *src)) {
|
|
|
|
src++;
|
|
|
|
dst++;
|
|
|
|
size--;
|
|
|
|
}
|
|
|
|
|
|
|
|
*dst = 0;
|
|
|
|
return orig;
|
|
|
|
}
|
|
|
|
|
2022-03-22 01:33:10 +08:00
|
|
|
static __attribute__((unused))
|
|
|
|
int strncmp(const char *a, const char *b, size_t size)
|
|
|
|
{
|
|
|
|
unsigned int c;
|
|
|
|
int diff = 0;
|
|
|
|
|
|
|
|
while (size-- &&
|
|
|
|
!(diff = (unsigned char)*a++ - (c = (unsigned char)*b++)) && c)
|
|
|
|
;
|
|
|
|
|
|
|
|
return diff;
|
|
|
|
}
|
2022-02-08 00:23:43 +08:00
|
|
|
|
2022-02-08 00:23:42 +08:00
|
|
|
static __attribute__((unused))
|
|
|
|
char *strncpy(char *dst, const char *src, size_t size)
|
|
|
|
{
|
|
|
|
size_t len;
|
|
|
|
|
|
|
|
for (len = 0; len < size; len++)
|
|
|
|
if ((dst[len] = *src))
|
|
|
|
src++;
|
|
|
|
return dst;
|
|
|
|
}
|
|
|
|
|
2022-02-08 00:23:20 +08:00
|
|
|
static __attribute__((unused))
|
|
|
|
char *strrchr(const char *s, int c)
|
|
|
|
{
|
|
|
|
const char *ret = NULL;
|
|
|
|
|
|
|
|
while (*s) {
|
|
|
|
if (*s == (char)c)
|
|
|
|
ret = s;
|
|
|
|
s++;
|
|
|
|
}
|
|
|
|
return (char *)ret;
|
|
|
|
}
|
|
|
|
|
2023-01-09 15:54:40 +08:00
|
|
|
/* make sure to include all global symbols */
|
|
|
|
#include "nolibc.h"
|
|
|
|
|
2022-02-08 00:23:20 +08:00
|
|
|
#endif /* _NOLIBC_STRING_H */
|