mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2025-01-09 23:34:42 +08:00
e683014c21
Historically a lot of these existed because we did not have a distinction between what was modular code and what was providing support to modules via EXPORT_SYMBOL and friends. That changed when we forked out support for the latter into the export.h file. This means we should be able to reduce the usage of module.h in code that is obj-y Makefile or bool Kconfig. The advantage in doing so is that module.h itself sources about 15 other headers; adding significantly to what we feed cpp, and it can obscure what headers we are effectively using. Since module.h was the source for init.h (for __init) and for export.h (for EXPORT_SYMBOL) we consider each obj-y/bool instance for the presence of either and replace as needed. Build testing revealed a couple implicit header usage issues that were fixed. Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/20160714001901.31603-5-paul.gortmaker@windriver.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
378 lines
8.0 KiB
C
378 lines
8.0 KiB
C
/*
|
|
* MMX 3DNow! library helper functions
|
|
*
|
|
* To do:
|
|
* We can use MMX just for prefetch in IRQ's. This may be a win.
|
|
* (reported so on K6-III)
|
|
* We should use a better code neutral filler for the short jump
|
|
* leal ebx. [ebx] is apparently best for K6-2, but Cyrix ??
|
|
* We also want to clobber the filler register so we don't get any
|
|
* register forwarding stalls on the filler.
|
|
*
|
|
* Add *user handling. Checksums are not a win with MMX on any CPU
|
|
* tested so far for any MMX solution figured.
|
|
*
|
|
* 22/09/2000 - Arjan van de Ven
|
|
* Improved for non-egineering-sample Athlons
|
|
*
|
|
*/
|
|
#include <linux/hardirq.h>
|
|
#include <linux/string.h>
|
|
#include <linux/export.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/types.h>
|
|
|
|
#include <asm/fpu/api.h>
|
|
#include <asm/asm.h>
|
|
|
|
void *_mmx_memcpy(void *to, const void *from, size_t len)
|
|
{
|
|
void *p;
|
|
int i;
|
|
|
|
if (unlikely(in_interrupt()))
|
|
return __memcpy(to, from, len);
|
|
|
|
p = to;
|
|
i = len >> 6; /* len/64 */
|
|
|
|
kernel_fpu_begin();
|
|
|
|
__asm__ __volatile__ (
|
|
"1: prefetch (%0)\n" /* This set is 28 bytes */
|
|
" prefetch 64(%0)\n"
|
|
" prefetch 128(%0)\n"
|
|
" prefetch 192(%0)\n"
|
|
" prefetch 256(%0)\n"
|
|
"2: \n"
|
|
".section .fixup, \"ax\"\n"
|
|
"3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
|
|
" jmp 2b\n"
|
|
".previous\n"
|
|
_ASM_EXTABLE(1b, 3b)
|
|
: : "r" (from));
|
|
|
|
for ( ; i > 5; i--) {
|
|
__asm__ __volatile__ (
|
|
"1: prefetch 320(%0)\n"
|
|
"2: movq (%0), %%mm0\n"
|
|
" movq 8(%0), %%mm1\n"
|
|
" movq 16(%0), %%mm2\n"
|
|
" movq 24(%0), %%mm3\n"
|
|
" movq %%mm0, (%1)\n"
|
|
" movq %%mm1, 8(%1)\n"
|
|
" movq %%mm2, 16(%1)\n"
|
|
" movq %%mm3, 24(%1)\n"
|
|
" movq 32(%0), %%mm0\n"
|
|
" movq 40(%0), %%mm1\n"
|
|
" movq 48(%0), %%mm2\n"
|
|
" movq 56(%0), %%mm3\n"
|
|
" movq %%mm0, 32(%1)\n"
|
|
" movq %%mm1, 40(%1)\n"
|
|
" movq %%mm2, 48(%1)\n"
|
|
" movq %%mm3, 56(%1)\n"
|
|
".section .fixup, \"ax\"\n"
|
|
"3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
|
|
" jmp 2b\n"
|
|
".previous\n"
|
|
_ASM_EXTABLE(1b, 3b)
|
|
: : "r" (from), "r" (to) : "memory");
|
|
|
|
from += 64;
|
|
to += 64;
|
|
}
|
|
|
|
for ( ; i > 0; i--) {
|
|
__asm__ __volatile__ (
|
|
" movq (%0), %%mm0\n"
|
|
" movq 8(%0), %%mm1\n"
|
|
" movq 16(%0), %%mm2\n"
|
|
" movq 24(%0), %%mm3\n"
|
|
" movq %%mm0, (%1)\n"
|
|
" movq %%mm1, 8(%1)\n"
|
|
" movq %%mm2, 16(%1)\n"
|
|
" movq %%mm3, 24(%1)\n"
|
|
" movq 32(%0), %%mm0\n"
|
|
" movq 40(%0), %%mm1\n"
|
|
" movq 48(%0), %%mm2\n"
|
|
" movq 56(%0), %%mm3\n"
|
|
" movq %%mm0, 32(%1)\n"
|
|
" movq %%mm1, 40(%1)\n"
|
|
" movq %%mm2, 48(%1)\n"
|
|
" movq %%mm3, 56(%1)\n"
|
|
: : "r" (from), "r" (to) : "memory");
|
|
|
|
from += 64;
|
|
to += 64;
|
|
}
|
|
/*
|
|
* Now do the tail of the block:
|
|
*/
|
|
__memcpy(to, from, len & 63);
|
|
kernel_fpu_end();
|
|
|
|
return p;
|
|
}
|
|
EXPORT_SYMBOL(_mmx_memcpy);
|
|
|
|
#ifdef CONFIG_MK7
|
|
|
|
/*
|
|
* The K7 has streaming cache bypass load/store. The Cyrix III, K6 and
|
|
* other MMX using processors do not.
|
|
*/
|
|
|
|
static void fast_clear_page(void *page)
|
|
{
|
|
int i;
|
|
|
|
kernel_fpu_begin();
|
|
|
|
__asm__ __volatile__ (
|
|
" pxor %%mm0, %%mm0\n" : :
|
|
);
|
|
|
|
for (i = 0; i < 4096/64; i++) {
|
|
__asm__ __volatile__ (
|
|
" movntq %%mm0, (%0)\n"
|
|
" movntq %%mm0, 8(%0)\n"
|
|
" movntq %%mm0, 16(%0)\n"
|
|
" movntq %%mm0, 24(%0)\n"
|
|
" movntq %%mm0, 32(%0)\n"
|
|
" movntq %%mm0, 40(%0)\n"
|
|
" movntq %%mm0, 48(%0)\n"
|
|
" movntq %%mm0, 56(%0)\n"
|
|
: : "r" (page) : "memory");
|
|
page += 64;
|
|
}
|
|
|
|
/*
|
|
* Since movntq is weakly-ordered, a "sfence" is needed to become
|
|
* ordered again:
|
|
*/
|
|
__asm__ __volatile__("sfence\n"::);
|
|
|
|
kernel_fpu_end();
|
|
}
|
|
|
|
static void fast_copy_page(void *to, void *from)
|
|
{
|
|
int i;
|
|
|
|
kernel_fpu_begin();
|
|
|
|
/*
|
|
* maybe the prefetch stuff can go before the expensive fnsave...
|
|
* but that is for later. -AV
|
|
*/
|
|
__asm__ __volatile__(
|
|
"1: prefetch (%0)\n"
|
|
" prefetch 64(%0)\n"
|
|
" prefetch 128(%0)\n"
|
|
" prefetch 192(%0)\n"
|
|
" prefetch 256(%0)\n"
|
|
"2: \n"
|
|
".section .fixup, \"ax\"\n"
|
|
"3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
|
|
" jmp 2b\n"
|
|
".previous\n"
|
|
_ASM_EXTABLE(1b, 3b) : : "r" (from));
|
|
|
|
for (i = 0; i < (4096-320)/64; i++) {
|
|
__asm__ __volatile__ (
|
|
"1: prefetch 320(%0)\n"
|
|
"2: movq (%0), %%mm0\n"
|
|
" movntq %%mm0, (%1)\n"
|
|
" movq 8(%0), %%mm1\n"
|
|
" movntq %%mm1, 8(%1)\n"
|
|
" movq 16(%0), %%mm2\n"
|
|
" movntq %%mm2, 16(%1)\n"
|
|
" movq 24(%0), %%mm3\n"
|
|
" movntq %%mm3, 24(%1)\n"
|
|
" movq 32(%0), %%mm4\n"
|
|
" movntq %%mm4, 32(%1)\n"
|
|
" movq 40(%0), %%mm5\n"
|
|
" movntq %%mm5, 40(%1)\n"
|
|
" movq 48(%0), %%mm6\n"
|
|
" movntq %%mm6, 48(%1)\n"
|
|
" movq 56(%0), %%mm7\n"
|
|
" movntq %%mm7, 56(%1)\n"
|
|
".section .fixup, \"ax\"\n"
|
|
"3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
|
|
" jmp 2b\n"
|
|
".previous\n"
|
|
_ASM_EXTABLE(1b, 3b) : : "r" (from), "r" (to) : "memory");
|
|
|
|
from += 64;
|
|
to += 64;
|
|
}
|
|
|
|
for (i = (4096-320)/64; i < 4096/64; i++) {
|
|
__asm__ __volatile__ (
|
|
"2: movq (%0), %%mm0\n"
|
|
" movntq %%mm0, (%1)\n"
|
|
" movq 8(%0), %%mm1\n"
|
|
" movntq %%mm1, 8(%1)\n"
|
|
" movq 16(%0), %%mm2\n"
|
|
" movntq %%mm2, 16(%1)\n"
|
|
" movq 24(%0), %%mm3\n"
|
|
" movntq %%mm3, 24(%1)\n"
|
|
" movq 32(%0), %%mm4\n"
|
|
" movntq %%mm4, 32(%1)\n"
|
|
" movq 40(%0), %%mm5\n"
|
|
" movntq %%mm5, 40(%1)\n"
|
|
" movq 48(%0), %%mm6\n"
|
|
" movntq %%mm6, 48(%1)\n"
|
|
" movq 56(%0), %%mm7\n"
|
|
" movntq %%mm7, 56(%1)\n"
|
|
: : "r" (from), "r" (to) : "memory");
|
|
from += 64;
|
|
to += 64;
|
|
}
|
|
/*
|
|
* Since movntq is weakly-ordered, a "sfence" is needed to become
|
|
* ordered again:
|
|
*/
|
|
__asm__ __volatile__("sfence \n"::);
|
|
kernel_fpu_end();
|
|
}
|
|
|
|
#else /* CONFIG_MK7 */
|
|
|
|
/*
|
|
* Generic MMX implementation without K7 specific streaming
|
|
*/
|
|
static void fast_clear_page(void *page)
|
|
{
|
|
int i;
|
|
|
|
kernel_fpu_begin();
|
|
|
|
__asm__ __volatile__ (
|
|
" pxor %%mm0, %%mm0\n" : :
|
|
);
|
|
|
|
for (i = 0; i < 4096/128; i++) {
|
|
__asm__ __volatile__ (
|
|
" movq %%mm0, (%0)\n"
|
|
" movq %%mm0, 8(%0)\n"
|
|
" movq %%mm0, 16(%0)\n"
|
|
" movq %%mm0, 24(%0)\n"
|
|
" movq %%mm0, 32(%0)\n"
|
|
" movq %%mm0, 40(%0)\n"
|
|
" movq %%mm0, 48(%0)\n"
|
|
" movq %%mm0, 56(%0)\n"
|
|
" movq %%mm0, 64(%0)\n"
|
|
" movq %%mm0, 72(%0)\n"
|
|
" movq %%mm0, 80(%0)\n"
|
|
" movq %%mm0, 88(%0)\n"
|
|
" movq %%mm0, 96(%0)\n"
|
|
" movq %%mm0, 104(%0)\n"
|
|
" movq %%mm0, 112(%0)\n"
|
|
" movq %%mm0, 120(%0)\n"
|
|
: : "r" (page) : "memory");
|
|
page += 128;
|
|
}
|
|
|
|
kernel_fpu_end();
|
|
}
|
|
|
|
static void fast_copy_page(void *to, void *from)
|
|
{
|
|
int i;
|
|
|
|
kernel_fpu_begin();
|
|
|
|
__asm__ __volatile__ (
|
|
"1: prefetch (%0)\n"
|
|
" prefetch 64(%0)\n"
|
|
" prefetch 128(%0)\n"
|
|
" prefetch 192(%0)\n"
|
|
" prefetch 256(%0)\n"
|
|
"2: \n"
|
|
".section .fixup, \"ax\"\n"
|
|
"3: movw $0x1AEB, 1b\n" /* jmp on 26 bytes */
|
|
" jmp 2b\n"
|
|
".previous\n"
|
|
_ASM_EXTABLE(1b, 3b) : : "r" (from));
|
|
|
|
for (i = 0; i < 4096/64; i++) {
|
|
__asm__ __volatile__ (
|
|
"1: prefetch 320(%0)\n"
|
|
"2: movq (%0), %%mm0\n"
|
|
" movq 8(%0), %%mm1\n"
|
|
" movq 16(%0), %%mm2\n"
|
|
" movq 24(%0), %%mm3\n"
|
|
" movq %%mm0, (%1)\n"
|
|
" movq %%mm1, 8(%1)\n"
|
|
" movq %%mm2, 16(%1)\n"
|
|
" movq %%mm3, 24(%1)\n"
|
|
" movq 32(%0), %%mm0\n"
|
|
" movq 40(%0), %%mm1\n"
|
|
" movq 48(%0), %%mm2\n"
|
|
" movq 56(%0), %%mm3\n"
|
|
" movq %%mm0, 32(%1)\n"
|
|
" movq %%mm1, 40(%1)\n"
|
|
" movq %%mm2, 48(%1)\n"
|
|
" movq %%mm3, 56(%1)\n"
|
|
".section .fixup, \"ax\"\n"
|
|
"3: movw $0x05EB, 1b\n" /* jmp on 5 bytes */
|
|
" jmp 2b\n"
|
|
".previous\n"
|
|
_ASM_EXTABLE(1b, 3b)
|
|
: : "r" (from), "r" (to) : "memory");
|
|
|
|
from += 64;
|
|
to += 64;
|
|
}
|
|
kernel_fpu_end();
|
|
}
|
|
|
|
#endif /* !CONFIG_MK7 */
|
|
|
|
/*
|
|
* Favour MMX for page clear and copy:
|
|
*/
|
|
static void slow_zero_page(void *page)
|
|
{
|
|
int d0, d1;
|
|
|
|
__asm__ __volatile__(
|
|
"cld\n\t"
|
|
"rep ; stosl"
|
|
|
|
: "=&c" (d0), "=&D" (d1)
|
|
:"a" (0), "1" (page), "0" (1024)
|
|
:"memory");
|
|
}
|
|
|
|
void mmx_clear_page(void *page)
|
|
{
|
|
if (unlikely(in_interrupt()))
|
|
slow_zero_page(page);
|
|
else
|
|
fast_clear_page(page);
|
|
}
|
|
EXPORT_SYMBOL(mmx_clear_page);
|
|
|
|
static void slow_copy_page(void *to, void *from)
|
|
{
|
|
int d0, d1, d2;
|
|
|
|
__asm__ __volatile__(
|
|
"cld\n\t"
|
|
"rep ; movsl"
|
|
: "=&c" (d0), "=&D" (d1), "=&S" (d2)
|
|
: "0" (1024), "1" ((long) to), "2" ((long) from)
|
|
: "memory");
|
|
}
|
|
|
|
void mmx_copy_page(void *to, void *from)
|
|
{
|
|
if (unlikely(in_interrupt()))
|
|
slow_copy_page(to, from);
|
|
else
|
|
fast_copy_page(to, from);
|
|
}
|
|
EXPORT_SYMBOL(mmx_copy_page);
|