mirror of
https://mirrors.bfsu.edu.cn/git/linux.git
synced 2024-11-24 20:54:10 +08:00
Merge branch 'for-linus' of git://ftp.arm.linux.org.uk/~rmk/linux-arm
Pull ARM updates from Russell King: "Included in this update: - perf updates from Will Deacon: The main changes are callchain stability fixes from Jean Pihet and event mapping and PMU name rework from Mark Rutland The latter is preparatory work for enabling some code re-use with arm64 in the future. - updates for nommu from Uwe Kleine-König: Two different fixes for the same problem making some ARM nommu configurations not boot since 3.6-rc1. The problem is that user_addr_max returned the biggest available RAM address which makes some copy_from_user variants fail to read from XIP memory. - deprecate legacy OMAP DMA API, in preparation for it's removal. The popular drivers have been converted over, leaving a very small number of rarely used drivers, which hopefully can be converted during the next cycle with a bit more visibility (and hopefully people popping out of the woodwork to help test) - more tweaks for BE systems, particularly with the kernel image format. In connection with this, I've cleaned up the way we generate the linker script for the decompressor. - removal of hard-coded assumptions of the kernel stack size, making everywhere depend on the value of THREAD_SIZE_ORDER. - MCPM updates from Nicolas Pitre. - Make it easier for proper CPU part number checks (which should always include the vendor field). - Assembly code optimisation - use the "bx" instruction when returning from a function on ARMv6+ rather than "mov pc, reg". - Save the last kernel misaligned fault location and report it via the procfs alignment file. - Clean up the way we create the initial stack frame, which is a repeated pattern in several different locations. - Support for 8-byte get_user(), needed for some DRM implementations. - mcs locking from Will Deacon. - Save and restore a few more Cortex-A9 registers (for errata workarounds) - Fix various aspects of the SWP emulation, and the ELF hwcap for the SWP instruction. - Update LPAE logic for pte_write and pmd_write to make it more correct. - Support for Broadcom Brahma15 CPU cores. - ARM assembly crypto updates from Ard Biesheuvel" * 'for-linus' of git://ftp.arm.linux.org.uk/~rmk/linux-arm: (53 commits) ARM: add comments to the early page table remap code ARM: 8122/1: smp_scu: enable SCU standby support ARM: 8121/1: smp_scu: use macro for SCU enable bit ARM: 8120/1: crypto: sha512: add ARM NEON implementation ARM: 8119/1: crypto: sha1: add ARM NEON implementation ARM: 8118/1: crypto: sha1/make use of common SHA-1 structures ARM: 8113/1: remove remaining definitions of PLAT_PHYS_OFFSET from <mach/memory.h> ARM: 8111/1: Enable erratum 798181 for Broadcom Brahma-B15 ARM: 8110/1: do CPU-specific init for Broadcom Brahma15 cores ARM: 8109/1: mm: Modify pte_write and pmd_write logic for LPAE ARM: 8108/1: mm: Introduce {pte,pmd}_isset and {pte,pmd}_isclear ARM: hwcap: disable HWCAP_SWP if the CPU advertises it has exclusives ARM: SWP emulation: only initialise on ARMv7 CPUs ARM: SWP emulation: always enable when SMP is enabled ARM: 8103/1: save/restore Cortex-A9 CP15 registers on suspend/resume ARM: 8098/1: mcs lock: implement wfe-based polling for MCS locking ARM: 8091/2: add get_user() support for 8 byte types ARM: 8097/1: unistd.h: relocate comments back to place ARM: 8096/1: Describe required sort order for textofs-y (TEXT_OFFSET) ARM: 8090/1: add revision info for PL310 errata 588369 and 727915 ...
This commit is contained in:
commit
c489d98c8c
@ -263,8 +263,22 @@ config NEED_MACH_MEMORY_H
|
||||
|
||||
config PHYS_OFFSET
|
||||
hex "Physical address of main memory" if MMU
|
||||
depends on !ARM_PATCH_PHYS_VIRT && !NEED_MACH_MEMORY_H
|
||||
depends on !ARM_PATCH_PHYS_VIRT
|
||||
default DRAM_BASE if !MMU
|
||||
default 0x00000000 if ARCH_EBSA110 || \
|
||||
EP93XX_SDCE3_SYNC_PHYS_OFFSET || \
|
||||
ARCH_FOOTBRIDGE || \
|
||||
ARCH_INTEGRATOR || \
|
||||
ARCH_IOP13XX || \
|
||||
ARCH_KS8695 || \
|
||||
(ARCH_REALVIEW && !REALVIEW_HIGH_PHYS_OFFSET)
|
||||
default 0x10000000 if ARCH_OMAP1 || ARCH_RPC
|
||||
default 0x20000000 if ARCH_S5PV210
|
||||
default 0x70000000 if REALVIEW_HIGH_PHYS_OFFSET
|
||||
default 0xc0000000 if EP93XX_SDCE0_PHYS_OFFSET || ARCH_SA1100
|
||||
default 0xd0000000 if EP93XX_SDCE1_PHYS_OFFSET
|
||||
default 0xe0000000 if EP93XX_SDCE2_PHYS_OFFSET
|
||||
default 0xf0000000 if EP93XX_SDCE3_ASYNC_PHYS_OFFSET
|
||||
help
|
||||
Please provide the physical address corresponding to the
|
||||
location of main memory in your system.
|
||||
@ -436,7 +450,6 @@ config ARCH_EP93XX
|
||||
select ARM_VIC
|
||||
select CLKDEV_LOOKUP
|
||||
select CPU_ARM920T
|
||||
select NEED_MACH_MEMORY_H
|
||||
help
|
||||
This enables support for the Cirrus EP93xx series of CPUs.
|
||||
|
||||
|
@ -127,6 +127,9 @@ CHECKFLAGS += -D__arm__
|
||||
|
||||
#Default value
|
||||
head-y := arch/arm/kernel/head$(MMUEXT).o
|
||||
|
||||
# Text offset. This list is sorted numerically by address in order to
|
||||
# provide a means to avoid/resolve conflicts in multi-arch kernels.
|
||||
textofs-y := 0x00008000
|
||||
textofs-$(CONFIG_ARCH_CLPS711X) := 0x00028000
|
||||
# We don't want the htc bootloader to corrupt kernel during resume
|
||||
|
@ -81,7 +81,7 @@ ZTEXTADDR := 0
|
||||
ZBSSADDR := ALIGN(8)
|
||||
endif
|
||||
|
||||
SEDFLAGS = s/TEXT_START/$(ZTEXTADDR)/;s/BSS_START/$(ZBSSADDR)/
|
||||
CPPFLAGS_vmlinux.lds := -DTEXT_START="$(ZTEXTADDR)" -DBSS_START="$(ZBSSADDR)"
|
||||
|
||||
suffix_$(CONFIG_KERNEL_GZIP) = gzip
|
||||
suffix_$(CONFIG_KERNEL_LZO) = lzo
|
||||
@ -199,8 +199,5 @@ CFLAGS_font.o := -Dstatic=
|
||||
$(obj)/font.c: $(FONTC)
|
||||
$(call cmd,shipped)
|
||||
|
||||
$(obj)/vmlinux.lds: $(obj)/vmlinux.lds.in arch/arm/boot/Makefile $(KCONFIG_CONFIG)
|
||||
@sed "$(SEDFLAGS)" < $< > $@
|
||||
|
||||
$(obj)/hyp-stub.S: $(srctree)/arch/$(SRCARCH)/kernel/hyp-stub.S
|
||||
$(call cmd,shipped)
|
||||
|
@ -125,9 +125,11 @@ start:
|
||||
THUMB( adr r12, BSYM(1f) )
|
||||
THUMB( bx r12 )
|
||||
|
||||
.word 0x016f2818 @ Magic numbers to help the loader
|
||||
.word start @ absolute load/run zImage address
|
||||
.word _edata @ zImage end address
|
||||
.word _magic_sig @ Magic numbers to help the loader
|
||||
.word _magic_start @ absolute load/run zImage address
|
||||
.word _magic_end @ zImage end address
|
||||
.word 0x04030201 @ endianness flag
|
||||
|
||||
THUMB( .thumb )
|
||||
1:
|
||||
ARM_BE8( setend be ) @ go BE8 if compiled for BE8
|
||||
|
@ -1,12 +1,20 @@
|
||||
/*
|
||||
* linux/arch/arm/boot/compressed/vmlinux.lds.in
|
||||
*
|
||||
* Copyright (C) 2000 Russell King
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_CPU_ENDIAN_BE8
|
||||
#define ZIMAGE_MAGIC(x) ( (((x) >> 24) & 0x000000ff) | \
|
||||
(((x) >> 8) & 0x0000ff00) | \
|
||||
(((x) << 8) & 0x00ff0000) | \
|
||||
(((x) << 24) & 0xff000000) )
|
||||
#else
|
||||
#define ZIMAGE_MAGIC(x) (x)
|
||||
#endif
|
||||
|
||||
OUTPUT_ARCH(arm)
|
||||
ENTRY(_start)
|
||||
SECTIONS
|
||||
@ -57,6 +65,10 @@ SECTIONS
|
||||
.pad : { BYTE(0); . = ALIGN(8); }
|
||||
_edata = .;
|
||||
|
||||
_magic_sig = ZIMAGE_MAGIC(0x016f2818);
|
||||
_magic_start = ZIMAGE_MAGIC(_start);
|
||||
_magic_end = ZIMAGE_MAGIC(_edata);
|
||||
|
||||
. = BSS_START;
|
||||
__bss_start = .;
|
||||
.bss : { *(.bss) }
|
||||
@ -73,4 +85,3 @@ SECTIONS
|
||||
.stab.indexstr 0 : { *(.stab.indexstr) }
|
||||
.comment 0 : { *(.comment) }
|
||||
}
|
||||
|
@ -12,11 +12,13 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/irqflags.h>
|
||||
#include <linux/cpu_pm.h>
|
||||
|
||||
#include <asm/mcpm.h>
|
||||
#include <asm/cacheflush.h>
|
||||
#include <asm/idmap.h>
|
||||
#include <asm/cputype.h>
|
||||
#include <asm/suspend.h>
|
||||
|
||||
extern unsigned long mcpm_entry_vectors[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER];
|
||||
|
||||
@ -146,6 +148,56 @@ int mcpm_cpu_powered_up(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_ARM_CPU_SUSPEND
|
||||
|
||||
static int __init nocache_trampoline(unsigned long _arg)
|
||||
{
|
||||
void (*cache_disable)(void) = (void *)_arg;
|
||||
unsigned int mpidr = read_cpuid_mpidr();
|
||||
unsigned int cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
|
||||
unsigned int cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
|
||||
phys_reset_t phys_reset;
|
||||
|
||||
mcpm_set_entry_vector(cpu, cluster, cpu_resume);
|
||||
setup_mm_for_reboot();
|
||||
|
||||
__mcpm_cpu_going_down(cpu, cluster);
|
||||
BUG_ON(!__mcpm_outbound_enter_critical(cpu, cluster));
|
||||
cache_disable();
|
||||
__mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
|
||||
__mcpm_cpu_down(cpu, cluster);
|
||||
|
||||
phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
|
||||
phys_reset(virt_to_phys(mcpm_entry_point));
|
||||
BUG();
|
||||
}
|
||||
|
||||
int __init mcpm_loopback(void (*cache_disable)(void))
|
||||
{
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* We're going to soft-restart the current CPU through the
|
||||
* low-level MCPM code by leveraging the suspend/resume
|
||||
* infrastructure. Let's play it safe by using cpu_pm_enter()
|
||||
* in case the CPU init code path resets the VFP or similar.
|
||||
*/
|
||||
local_irq_disable();
|
||||
local_fiq_disable();
|
||||
ret = cpu_pm_enter();
|
||||
if (!ret) {
|
||||
ret = cpu_suspend((unsigned long)cache_disable, nocache_trampoline);
|
||||
cpu_pm_exit();
|
||||
}
|
||||
local_fiq_enable();
|
||||
local_irq_enable();
|
||||
if (ret)
|
||||
pr_err("%s returned %d\n", __func__, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
struct sync_struct mcpm_sync;
|
||||
|
||||
/*
|
||||
|
@ -5,10 +5,14 @@
|
||||
obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o
|
||||
obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o
|
||||
obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
|
||||
obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
|
||||
obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o
|
||||
|
||||
aes-arm-y := aes-armv4.o aes_glue.o
|
||||
aes-arm-bs-y := aesbs-core.o aesbs-glue.o
|
||||
sha1-arm-y := sha1-armv4-large.o sha1_glue.o
|
||||
sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o
|
||||
sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o
|
||||
|
||||
quiet_cmd_perl = PERL $@
|
||||
cmd_perl = $(PERL) $(<) > $(@)
|
||||
|
@ -35,6 +35,7 @@
|
||||
@ that is being targetted.
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
.text
|
||||
|
||||
@ -648,7 +649,7 @@ _armv4_AES_set_encrypt_key:
|
||||
|
||||
.Ldone: mov r0,#0
|
||||
ldmia sp!,{r4-r12,lr}
|
||||
.Labrt: mov pc,lr
|
||||
.Labrt: ret lr
|
||||
ENDPROC(private_AES_set_encrypt_key)
|
||||
|
||||
.align 5
|
||||
|
634
arch/arm/crypto/sha1-armv7-neon.S
Normal file
634
arch/arm/crypto/sha1-armv7-neon.S
Normal file
@ -0,0 +1,634 @@
|
||||
/* sha1-armv7-neon.S - ARM/NEON accelerated SHA-1 transform function
|
||||
*
|
||||
* Copyright © 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
|
||||
.syntax unified
|
||||
.code 32
|
||||
.fpu neon
|
||||
|
||||
.text
|
||||
|
||||
|
||||
/* Context structure */
|
||||
|
||||
#define state_h0 0
|
||||
#define state_h1 4
|
||||
#define state_h2 8
|
||||
#define state_h3 12
|
||||
#define state_h4 16
|
||||
|
||||
|
||||
/* Constants */
|
||||
|
||||
#define K1 0x5A827999
|
||||
#define K2 0x6ED9EBA1
|
||||
#define K3 0x8F1BBCDC
|
||||
#define K4 0xCA62C1D6
|
||||
.align 4
|
||||
.LK_VEC:
|
||||
.LK1: .long K1, K1, K1, K1
|
||||
.LK2: .long K2, K2, K2, K2
|
||||
.LK3: .long K3, K3, K3, K3
|
||||
.LK4: .long K4, K4, K4, K4
|
||||
|
||||
|
||||
/* Register macros */
|
||||
|
||||
#define RSTATE r0
|
||||
#define RDATA r1
|
||||
#define RNBLKS r2
|
||||
#define ROLDSTACK r3
|
||||
#define RWK lr
|
||||
|
||||
#define _a r4
|
||||
#define _b r5
|
||||
#define _c r6
|
||||
#define _d r7
|
||||
#define _e r8
|
||||
|
||||
#define RT0 r9
|
||||
#define RT1 r10
|
||||
#define RT2 r11
|
||||
#define RT3 r12
|
||||
|
||||
#define W0 q0
|
||||
#define W1 q1
|
||||
#define W2 q2
|
||||
#define W3 q3
|
||||
#define W4 q4
|
||||
#define W5 q5
|
||||
#define W6 q6
|
||||
#define W7 q7
|
||||
|
||||
#define tmp0 q8
|
||||
#define tmp1 q9
|
||||
#define tmp2 q10
|
||||
#define tmp3 q11
|
||||
|
||||
#define qK1 q12
|
||||
#define qK2 q13
|
||||
#define qK3 q14
|
||||
#define qK4 q15
|
||||
|
||||
|
||||
/* Round function macros. */
|
||||
|
||||
#define WK_offs(i) (((i) & 15) * 4)
|
||||
|
||||
#define _R_F1(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
|
||||
W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
ldr RT3, [sp, WK_offs(i)]; \
|
||||
pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
|
||||
bic RT0, d, b; \
|
||||
add e, e, a, ror #(32 - 5); \
|
||||
and RT1, c, b; \
|
||||
pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
|
||||
add RT0, RT0, RT3; \
|
||||
add e, e, RT1; \
|
||||
ror b, #(32 - 30); \
|
||||
pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
|
||||
add e, e, RT0;
|
||||
|
||||
#define _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
|
||||
W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
ldr RT3, [sp, WK_offs(i)]; \
|
||||
pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
|
||||
eor RT0, d, b; \
|
||||
add e, e, a, ror #(32 - 5); \
|
||||
eor RT0, RT0, c; \
|
||||
pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
|
||||
add e, e, RT3; \
|
||||
ror b, #(32 - 30); \
|
||||
pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
|
||||
add e, e, RT0; \
|
||||
|
||||
#define _R_F3(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
|
||||
W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
ldr RT3, [sp, WK_offs(i)]; \
|
||||
pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
|
||||
eor RT0, b, c; \
|
||||
and RT1, b, c; \
|
||||
add e, e, a, ror #(32 - 5); \
|
||||
pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
|
||||
and RT0, RT0, d; \
|
||||
add RT1, RT1, RT3; \
|
||||
add e, e, RT0; \
|
||||
ror b, #(32 - 30); \
|
||||
pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
|
||||
add e, e, RT1;
|
||||
|
||||
#define _R_F4(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
|
||||
W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
_R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
|
||||
W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
|
||||
|
||||
#define _R(a,b,c,d,e,f,i,pre1,pre2,pre3,i16,\
|
||||
W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
_R_##f(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
|
||||
W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
|
||||
|
||||
#define R(a,b,c,d,e,f,i) \
|
||||
_R_##f(a,b,c,d,e,i,dummy,dummy,dummy,i16,\
|
||||
W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
|
||||
|
||||
#define dummy(...)
|
||||
|
||||
|
||||
/* Input expansion macros. */
|
||||
|
||||
/********* Precalc macros for rounds 0-15 *************************************/
|
||||
|
||||
#define W_PRECALC_00_15() \
|
||||
add RWK, sp, #(WK_offs(0)); \
|
||||
\
|
||||
vld1.32 {tmp0, tmp1}, [RDATA]!; \
|
||||
vrev32.8 W0, tmp0; /* big => little */ \
|
||||
vld1.32 {tmp2, tmp3}, [RDATA]!; \
|
||||
vadd.u32 tmp0, W0, curK; \
|
||||
vrev32.8 W7, tmp1; /* big => little */ \
|
||||
vrev32.8 W6, tmp2; /* big => little */ \
|
||||
vadd.u32 tmp1, W7, curK; \
|
||||
vrev32.8 W5, tmp3; /* big => little */ \
|
||||
vadd.u32 tmp2, W6, curK; \
|
||||
vst1.32 {tmp0, tmp1}, [RWK]!; \
|
||||
vadd.u32 tmp3, W5, curK; \
|
||||
vst1.32 {tmp2, tmp3}, [RWK]; \
|
||||
|
||||
#define WPRECALC_00_15_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
vld1.32 {tmp0, tmp1}, [RDATA]!; \
|
||||
|
||||
#define WPRECALC_00_15_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
add RWK, sp, #(WK_offs(0)); \
|
||||
|
||||
#define WPRECALC_00_15_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
vrev32.8 W0, tmp0; /* big => little */ \
|
||||
|
||||
#define WPRECALC_00_15_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
vld1.32 {tmp2, tmp3}, [RDATA]!; \
|
||||
|
||||
#define WPRECALC_00_15_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
vadd.u32 tmp0, W0, curK; \
|
||||
|
||||
#define WPRECALC_00_15_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
vrev32.8 W7, tmp1; /* big => little */ \
|
||||
|
||||
#define WPRECALC_00_15_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
vrev32.8 W6, tmp2; /* big => little */ \
|
||||
|
||||
#define WPRECALC_00_15_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
vadd.u32 tmp1, W7, curK; \
|
||||
|
||||
#define WPRECALC_00_15_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
vrev32.8 W5, tmp3; /* big => little */ \
|
||||
|
||||
#define WPRECALC_00_15_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
vadd.u32 tmp2, W6, curK; \
|
||||
|
||||
#define WPRECALC_00_15_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
vst1.32 {tmp0, tmp1}, [RWK]!; \
|
||||
|
||||
#define WPRECALC_00_15_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
vadd.u32 tmp3, W5, curK; \
|
||||
|
||||
#define WPRECALC_00_15_12(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
vst1.32 {tmp2, tmp3}, [RWK]; \
|
||||
|
||||
|
||||
/********* Precalc macros for rounds 16-31 ************************************/
|
||||
|
||||
#define WPRECALC_16_31_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
veor tmp0, tmp0; \
|
||||
vext.8 W, W_m16, W_m12, #8; \
|
||||
|
||||
#define WPRECALC_16_31_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
add RWK, sp, #(WK_offs(i)); \
|
||||
vext.8 tmp0, W_m04, tmp0, #4; \
|
||||
|
||||
#define WPRECALC_16_31_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
veor tmp0, tmp0, W_m16; \
|
||||
veor.32 W, W, W_m08; \
|
||||
|
||||
#define WPRECALC_16_31_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
veor tmp1, tmp1; \
|
||||
veor W, W, tmp0; \
|
||||
|
||||
#define WPRECALC_16_31_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
vshl.u32 tmp0, W, #1; \
|
||||
|
||||
#define WPRECALC_16_31_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
vext.8 tmp1, tmp1, W, #(16-12); \
|
||||
vshr.u32 W, W, #31; \
|
||||
|
||||
#define WPRECALC_16_31_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
vorr tmp0, tmp0, W; \
|
||||
vshr.u32 W, tmp1, #30; \
|
||||
|
||||
#define WPRECALC_16_31_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
vshl.u32 tmp1, tmp1, #2; \
|
||||
|
||||
#define WPRECALC_16_31_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
veor tmp0, tmp0, W; \
|
||||
|
||||
#define WPRECALC_16_31_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
veor W, tmp0, tmp1; \
|
||||
|
||||
#define WPRECALC_16_31_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
vadd.u32 tmp0, W, curK; \
|
||||
|
||||
#define WPRECALC_16_31_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
vst1.32 {tmp0}, [RWK];
|
||||
|
||||
|
||||
/********* Precalc macros for rounds 32-79 ************************************/
|
||||
|
||||
#define WPRECALC_32_79_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
veor W, W_m28; \
|
||||
|
||||
#define WPRECALC_32_79_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
vext.8 tmp0, W_m08, W_m04, #8; \
|
||||
|
||||
#define WPRECALC_32_79_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
veor W, W_m16; \
|
||||
|
||||
#define WPRECALC_32_79_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
veor W, tmp0; \
|
||||
|
||||
#define WPRECALC_32_79_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
add RWK, sp, #(WK_offs(i&~3)); \
|
||||
|
||||
#define WPRECALC_32_79_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
vshl.u32 tmp1, W, #2; \
|
||||
|
||||
#define WPRECALC_32_79_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
vshr.u32 tmp0, W, #30; \
|
||||
|
||||
#define WPRECALC_32_79_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
vorr W, tmp0, tmp1; \
|
||||
|
||||
#define WPRECALC_32_79_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
vadd.u32 tmp0, W, curK; \
|
||||
|
||||
#define WPRECALC_32_79_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
|
||||
vst1.32 {tmp0}, [RWK];
|
||||
|
||||
|
||||
/*
|
||||
* Transform nblks*64 bytes (nblks*16 32-bit words) at DATA.
|
||||
*
|
||||
* unsigned int
|
||||
* sha1_transform_neon (void *ctx, const unsigned char *data,
|
||||
* unsigned int nblks)
|
||||
*/
|
||||
.align 3
|
||||
ENTRY(sha1_transform_neon)
|
||||
/* input:
|
||||
* r0: ctx, CTX
|
||||
* r1: data (64*nblks bytes)
|
||||
* r2: nblks
|
||||
*/
|
||||
|
||||
cmp RNBLKS, #0;
|
||||
beq .Ldo_nothing;
|
||||
|
||||
push {r4-r12, lr};
|
||||
/*vpush {q4-q7};*/
|
||||
|
||||
adr RT3, .LK_VEC;
|
||||
|
||||
mov ROLDSTACK, sp;
|
||||
|
||||
/* Align stack. */
|
||||
sub RT0, sp, #(16*4);
|
||||
and RT0, #(~(16-1));
|
||||
mov sp, RT0;
|
||||
|
||||
vld1.32 {qK1-qK2}, [RT3]!; /* Load K1,K2 */
|
||||
|
||||
/* Get the values of the chaining variables. */
|
||||
ldm RSTATE, {_a-_e};
|
||||
|
||||
vld1.32 {qK3-qK4}, [RT3]; /* Load K3,K4 */
|
||||
|
||||
#undef curK
|
||||
#define curK qK1
|
||||
/* Precalc 0-15. */
|
||||
W_PRECALC_00_15();
|
||||
|
||||
.Loop:
|
||||
/* Transform 0-15 + Precalc 16-31. */
|
||||
_R( _a, _b, _c, _d, _e, F1, 0,
|
||||
WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 16,
|
||||
W4, W5, W6, W7, W0, _, _, _ );
|
||||
_R( _e, _a, _b, _c, _d, F1, 1,
|
||||
WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 16,
|
||||
W4, W5, W6, W7, W0, _, _, _ );
|
||||
_R( _d, _e, _a, _b, _c, F1, 2,
|
||||
WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 16,
|
||||
W4, W5, W6, W7, W0, _, _, _ );
|
||||
_R( _c, _d, _e, _a, _b, F1, 3,
|
||||
WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,16,
|
||||
W4, W5, W6, W7, W0, _, _, _ );
|
||||
|
||||
#undef curK
|
||||
#define curK qK2
|
||||
_R( _b, _c, _d, _e, _a, F1, 4,
|
||||
WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 20,
|
||||
W3, W4, W5, W6, W7, _, _, _ );
|
||||
_R( _a, _b, _c, _d, _e, F1, 5,
|
||||
WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 20,
|
||||
W3, W4, W5, W6, W7, _, _, _ );
|
||||
_R( _e, _a, _b, _c, _d, F1, 6,
|
||||
WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 20,
|
||||
W3, W4, W5, W6, W7, _, _, _ );
|
||||
_R( _d, _e, _a, _b, _c, F1, 7,
|
||||
WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,20,
|
||||
W3, W4, W5, W6, W7, _, _, _ );
|
||||
|
||||
_R( _c, _d, _e, _a, _b, F1, 8,
|
||||
WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 24,
|
||||
W2, W3, W4, W5, W6, _, _, _ );
|
||||
_R( _b, _c, _d, _e, _a, F1, 9,
|
||||
WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 24,
|
||||
W2, W3, W4, W5, W6, _, _, _ );
|
||||
_R( _a, _b, _c, _d, _e, F1, 10,
|
||||
WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 24,
|
||||
W2, W3, W4, W5, W6, _, _, _ );
|
||||
_R( _e, _a, _b, _c, _d, F1, 11,
|
||||
WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,24,
|
||||
W2, W3, W4, W5, W6, _, _, _ );
|
||||
|
||||
_R( _d, _e, _a, _b, _c, F1, 12,
|
||||
WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 28,
|
||||
W1, W2, W3, W4, W5, _, _, _ );
|
||||
_R( _c, _d, _e, _a, _b, F1, 13,
|
||||
WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 28,
|
||||
W1, W2, W3, W4, W5, _, _, _ );
|
||||
_R( _b, _c, _d, _e, _a, F1, 14,
|
||||
WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 28,
|
||||
W1, W2, W3, W4, W5, _, _, _ );
|
||||
_R( _a, _b, _c, _d, _e, F1, 15,
|
||||
WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,28,
|
||||
W1, W2, W3, W4, W5, _, _, _ );
|
||||
|
||||
/* Transform 16-63 + Precalc 32-79. */
|
||||
_R( _e, _a, _b, _c, _d, F1, 16,
|
||||
WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 32,
|
||||
W0, W1, W2, W3, W4, W5, W6, W7);
|
||||
_R( _d, _e, _a, _b, _c, F1, 17,
|
||||
WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 32,
|
||||
W0, W1, W2, W3, W4, W5, W6, W7);
|
||||
_R( _c, _d, _e, _a, _b, F1, 18,
|
||||
WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 32,
|
||||
W0, W1, W2, W3, W4, W5, W6, W7);
|
||||
_R( _b, _c, _d, _e, _a, F1, 19,
|
||||
WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 32,
|
||||
W0, W1, W2, W3, W4, W5, W6, W7);
|
||||
|
||||
_R( _a, _b, _c, _d, _e, F2, 20,
|
||||
WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 36,
|
||||
W7, W0, W1, W2, W3, W4, W5, W6);
|
||||
_R( _e, _a, _b, _c, _d, F2, 21,
|
||||
WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 36,
|
||||
W7, W0, W1, W2, W3, W4, W5, W6);
|
||||
_R( _d, _e, _a, _b, _c, F2, 22,
|
||||
WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 36,
|
||||
W7, W0, W1, W2, W3, W4, W5, W6);
|
||||
_R( _c, _d, _e, _a, _b, F2, 23,
|
||||
WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 36,
|
||||
W7, W0, W1, W2, W3, W4, W5, W6);
|
||||
|
||||
#undef curK
|
||||
#define curK qK3
|
||||
_R( _b, _c, _d, _e, _a, F2, 24,
|
||||
WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 40,
|
||||
W6, W7, W0, W1, W2, W3, W4, W5);
|
||||
_R( _a, _b, _c, _d, _e, F2, 25,
|
||||
WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 40,
|
||||
W6, W7, W0, W1, W2, W3, W4, W5);
|
||||
_R( _e, _a, _b, _c, _d, F2, 26,
|
||||
WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 40,
|
||||
W6, W7, W0, W1, W2, W3, W4, W5);
|
||||
_R( _d, _e, _a, _b, _c, F2, 27,
|
||||
WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 40,
|
||||
W6, W7, W0, W1, W2, W3, W4, W5);
|
||||
|
||||
_R( _c, _d, _e, _a, _b, F2, 28,
|
||||
WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 44,
|
||||
W5, W6, W7, W0, W1, W2, W3, W4);
|
||||
_R( _b, _c, _d, _e, _a, F2, 29,
|
||||
WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 44,
|
||||
W5, W6, W7, W0, W1, W2, W3, W4);
|
||||
_R( _a, _b, _c, _d, _e, F2, 30,
|
||||
WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 44,
|
||||
W5, W6, W7, W0, W1, W2, W3, W4);
|
||||
_R( _e, _a, _b, _c, _d, F2, 31,
|
||||
WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 44,
|
||||
W5, W6, W7, W0, W1, W2, W3, W4);
|
||||
|
||||
_R( _d, _e, _a, _b, _c, F2, 32,
|
||||
WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 48,
|
||||
W4, W5, W6, W7, W0, W1, W2, W3);
|
||||
_R( _c, _d, _e, _a, _b, F2, 33,
|
||||
WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 48,
|
||||
W4, W5, W6, W7, W0, W1, W2, W3);
|
||||
_R( _b, _c, _d, _e, _a, F2, 34,
|
||||
WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 48,
|
||||
W4, W5, W6, W7, W0, W1, W2, W3);
|
||||
_R( _a, _b, _c, _d, _e, F2, 35,
|
||||
WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 48,
|
||||
W4, W5, W6, W7, W0, W1, W2, W3);
|
||||
|
||||
_R( _e, _a, _b, _c, _d, F2, 36,
|
||||
WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 52,
|
||||
W3, W4, W5, W6, W7, W0, W1, W2);
|
||||
_R( _d, _e, _a, _b, _c, F2, 37,
|
||||
WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 52,
|
||||
W3, W4, W5, W6, W7, W0, W1, W2);
|
||||
_R( _c, _d, _e, _a, _b, F2, 38,
|
||||
WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 52,
|
||||
W3, W4, W5, W6, W7, W0, W1, W2);
|
||||
_R( _b, _c, _d, _e, _a, F2, 39,
|
||||
WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 52,
|
||||
W3, W4, W5, W6, W7, W0, W1, W2);
|
||||
|
||||
_R( _a, _b, _c, _d, _e, F3, 40,
|
||||
WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 56,
|
||||
W2, W3, W4, W5, W6, W7, W0, W1);
|
||||
_R( _e, _a, _b, _c, _d, F3, 41,
|
||||
WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 56,
|
||||
W2, W3, W4, W5, W6, W7, W0, W1);
|
||||
_R( _d, _e, _a, _b, _c, F3, 42,
|
||||
WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 56,
|
||||
W2, W3, W4, W5, W6, W7, W0, W1);
|
||||
_R( _c, _d, _e, _a, _b, F3, 43,
|
||||
WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 56,
|
||||
W2, W3, W4, W5, W6, W7, W0, W1);
|
||||
|
||||
#undef curK
|
||||
#define curK qK4
|
||||
_R( _b, _c, _d, _e, _a, F3, 44,
|
||||
WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 60,
|
||||
W1, W2, W3, W4, W5, W6, W7, W0);
|
||||
_R( _a, _b, _c, _d, _e, F3, 45,
|
||||
WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 60,
|
||||
W1, W2, W3, W4, W5, W6, W7, W0);
|
||||
_R( _e, _a, _b, _c, _d, F3, 46,
|
||||
WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 60,
|
||||
W1, W2, W3, W4, W5, W6, W7, W0);
|
||||
_R( _d, _e, _a, _b, _c, F3, 47,
|
||||
WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 60,
|
||||
W1, W2, W3, W4, W5, W6, W7, W0);
|
||||
|
||||
_R( _c, _d, _e, _a, _b, F3, 48,
|
||||
WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 64,
|
||||
W0, W1, W2, W3, W4, W5, W6, W7);
|
||||
_R( _b, _c, _d, _e, _a, F3, 49,
|
||||
WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 64,
|
||||
W0, W1, W2, W3, W4, W5, W6, W7);
|
||||
_R( _a, _b, _c, _d, _e, F3, 50,
|
||||
WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 64,
|
||||
W0, W1, W2, W3, W4, W5, W6, W7);
|
||||
_R( _e, _a, _b, _c, _d, F3, 51,
|
||||
WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 64,
|
||||
W0, W1, W2, W3, W4, W5, W6, W7);
|
||||
|
||||
_R( _d, _e, _a, _b, _c, F3, 52,
|
||||
WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 68,
|
||||
W7, W0, W1, W2, W3, W4, W5, W6);
|
||||
_R( _c, _d, _e, _a, _b, F3, 53,
|
||||
WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 68,
|
||||
W7, W0, W1, W2, W3, W4, W5, W6);
|
||||
_R( _b, _c, _d, _e, _a, F3, 54,
|
||||
WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 68,
|
||||
W7, W0, W1, W2, W3, W4, W5, W6);
|
||||
_R( _a, _b, _c, _d, _e, F3, 55,
|
||||
WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 68,
|
||||
W7, W0, W1, W2, W3, W4, W5, W6);
|
||||
|
||||
_R( _e, _a, _b, _c, _d, F3, 56,
|
||||
WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 72,
|
||||
W6, W7, W0, W1, W2, W3, W4, W5);
|
||||
_R( _d, _e, _a, _b, _c, F3, 57,
|
||||
WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 72,
|
||||
W6, W7, W0, W1, W2, W3, W4, W5);
|
||||
_R( _c, _d, _e, _a, _b, F3, 58,
|
||||
WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 72,
|
||||
W6, W7, W0, W1, W2, W3, W4, W5);
|
||||
_R( _b, _c, _d, _e, _a, F3, 59,
|
||||
WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 72,
|
||||
W6, W7, W0, W1, W2, W3, W4, W5);
|
||||
|
||||
subs RNBLKS, #1;
|
||||
|
||||
_R( _a, _b, _c, _d, _e, F4, 60,
|
||||
WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 76,
|
||||
W5, W6, W7, W0, W1, W2, W3, W4);
|
||||
_R( _e, _a, _b, _c, _d, F4, 61,
|
||||
WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 76,
|
||||
W5, W6, W7, W0, W1, W2, W3, W4);
|
||||
_R( _d, _e, _a, _b, _c, F4, 62,
|
||||
WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 76,
|
||||
W5, W6, W7, W0, W1, W2, W3, W4);
|
||||
_R( _c, _d, _e, _a, _b, F4, 63,
|
||||
WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 76,
|
||||
W5, W6, W7, W0, W1, W2, W3, W4);
|
||||
|
||||
beq .Lend;
|
||||
|
||||
/* Transform 64-79 + Precalc 0-15 of next block. */
|
||||
#undef curK
|
||||
#define curK qK1
|
||||
_R( _b, _c, _d, _e, _a, F4, 64,
|
||||
WPRECALC_00_15_0, dummy, dummy, _, _, _, _, _, _, _, _, _ );
|
||||
_R( _a, _b, _c, _d, _e, F4, 65,
|
||||
WPRECALC_00_15_1, dummy, dummy, _, _, _, _, _, _, _, _, _ );
|
||||
_R( _e, _a, _b, _c, _d, F4, 66,
|
||||
WPRECALC_00_15_2, dummy, dummy, _, _, _, _, _, _, _, _, _ );
|
||||
_R( _d, _e, _a, _b, _c, F4, 67,
|
||||
WPRECALC_00_15_3, dummy, dummy, _, _, _, _, _, _, _, _, _ );
|
||||
|
||||
_R( _c, _d, _e, _a, _b, F4, 68,
|
||||
dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ );
|
||||
_R( _b, _c, _d, _e, _a, F4, 69,
|
||||
dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ );
|
||||
_R( _a, _b, _c, _d, _e, F4, 70,
|
||||
WPRECALC_00_15_4, dummy, dummy, _, _, _, _, _, _, _, _, _ );
|
||||
_R( _e, _a, _b, _c, _d, F4, 71,
|
||||
WPRECALC_00_15_5, dummy, dummy, _, _, _, _, _, _, _, _, _ );
|
||||
|
||||
_R( _d, _e, _a, _b, _c, F4, 72,
|
||||
dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ );
|
||||
_R( _c, _d, _e, _a, _b, F4, 73,
|
||||
dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ );
|
||||
_R( _b, _c, _d, _e, _a, F4, 74,
|
||||
WPRECALC_00_15_6, dummy, dummy, _, _, _, _, _, _, _, _, _ );
|
||||
_R( _a, _b, _c, _d, _e, F4, 75,
|
||||
WPRECALC_00_15_7, dummy, dummy, _, _, _, _, _, _, _, _, _ );
|
||||
|
||||
_R( _e, _a, _b, _c, _d, F4, 76,
|
||||
WPRECALC_00_15_8, dummy, dummy, _, _, _, _, _, _, _, _, _ );
|
||||
_R( _d, _e, _a, _b, _c, F4, 77,
|
||||
WPRECALC_00_15_9, dummy, dummy, _, _, _, _, _, _, _, _, _ );
|
||||
_R( _c, _d, _e, _a, _b, F4, 78,
|
||||
WPRECALC_00_15_10, dummy, dummy, _, _, _, _, _, _, _, _, _ );
|
||||
_R( _b, _c, _d, _e, _a, F4, 79,
|
||||
WPRECALC_00_15_11, dummy, WPRECALC_00_15_12, _, _, _, _, _, _, _, _, _ );
|
||||
|
||||
/* Update the chaining variables. */
|
||||
ldm RSTATE, {RT0-RT3};
|
||||
add _a, RT0;
|
||||
ldr RT0, [RSTATE, #state_h4];
|
||||
add _b, RT1;
|
||||
add _c, RT2;
|
||||
add _d, RT3;
|
||||
add _e, RT0;
|
||||
stm RSTATE, {_a-_e};
|
||||
|
||||
b .Loop;
|
||||
|
||||
.Lend:
|
||||
/* Transform 64-79 */
|
||||
R( _b, _c, _d, _e, _a, F4, 64 );
|
||||
R( _a, _b, _c, _d, _e, F4, 65 );
|
||||
R( _e, _a, _b, _c, _d, F4, 66 );
|
||||
R( _d, _e, _a, _b, _c, F4, 67 );
|
||||
R( _c, _d, _e, _a, _b, F4, 68 );
|
||||
R( _b, _c, _d, _e, _a, F4, 69 );
|
||||
R( _a, _b, _c, _d, _e, F4, 70 );
|
||||
R( _e, _a, _b, _c, _d, F4, 71 );
|
||||
R( _d, _e, _a, _b, _c, F4, 72 );
|
||||
R( _c, _d, _e, _a, _b, F4, 73 );
|
||||
R( _b, _c, _d, _e, _a, F4, 74 );
|
||||
R( _a, _b, _c, _d, _e, F4, 75 );
|
||||
R( _e, _a, _b, _c, _d, F4, 76 );
|
||||
R( _d, _e, _a, _b, _c, F4, 77 );
|
||||
R( _c, _d, _e, _a, _b, F4, 78 );
|
||||
R( _b, _c, _d, _e, _a, F4, 79 );
|
||||
|
||||
mov sp, ROLDSTACK;
|
||||
|
||||
/* Update the chaining variables. */
|
||||
ldm RSTATE, {RT0-RT3};
|
||||
add _a, RT0;
|
||||
ldr RT0, [RSTATE, #state_h4];
|
||||
add _b, RT1;
|
||||
add _c, RT2;
|
||||
add _d, RT3;
|
||||
/*vpop {q4-q7};*/
|
||||
add _e, RT0;
|
||||
stm RSTATE, {_a-_e};
|
||||
|
||||
pop {r4-r12, pc};
|
||||
|
||||
.Ldo_nothing:
|
||||
bx lr
|
||||
ENDPROC(sha1_transform_neon)
|
@ -23,32 +23,27 @@
|
||||
#include <linux/types.h>
|
||||
#include <crypto/sha.h>
|
||||
#include <asm/byteorder.h>
|
||||
#include <asm/crypto/sha1.h>
|
||||
|
||||
struct SHA1_CTX {
|
||||
uint32_t h0,h1,h2,h3,h4;
|
||||
u64 count;
|
||||
u8 data[SHA1_BLOCK_SIZE];
|
||||
};
|
||||
|
||||
asmlinkage void sha1_block_data_order(struct SHA1_CTX *digest,
|
||||
asmlinkage void sha1_block_data_order(u32 *digest,
|
||||
const unsigned char *data, unsigned int rounds);
|
||||
|
||||
|
||||
static int sha1_init(struct shash_desc *desc)
|
||||
{
|
||||
struct SHA1_CTX *sctx = shash_desc_ctx(desc);
|
||||
memset(sctx, 0, sizeof(*sctx));
|
||||
sctx->h0 = SHA1_H0;
|
||||
sctx->h1 = SHA1_H1;
|
||||
sctx->h2 = SHA1_H2;
|
||||
sctx->h3 = SHA1_H3;
|
||||
sctx->h4 = SHA1_H4;
|
||||
struct sha1_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
*sctx = (struct sha1_state){
|
||||
.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
|
||||
};
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int __sha1_update(struct SHA1_CTX *sctx, const u8 *data,
|
||||
unsigned int len, unsigned int partial)
|
||||
static int __sha1_update(struct sha1_state *sctx, const u8 *data,
|
||||
unsigned int len, unsigned int partial)
|
||||
{
|
||||
unsigned int done = 0;
|
||||
|
||||
@ -56,43 +51,44 @@ static int __sha1_update(struct SHA1_CTX *sctx, const u8 *data,
|
||||
|
||||
if (partial) {
|
||||
done = SHA1_BLOCK_SIZE - partial;
|
||||
memcpy(sctx->data + partial, data, done);
|
||||
sha1_block_data_order(sctx, sctx->data, 1);
|
||||
memcpy(sctx->buffer + partial, data, done);
|
||||
sha1_block_data_order(sctx->state, sctx->buffer, 1);
|
||||
}
|
||||
|
||||
if (len - done >= SHA1_BLOCK_SIZE) {
|
||||
const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
|
||||
sha1_block_data_order(sctx, data + done, rounds);
|
||||
sha1_block_data_order(sctx->state, data + done, rounds);
|
||||
done += rounds * SHA1_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
memcpy(sctx->data, data + done, len - done);
|
||||
memcpy(sctx->buffer, data + done, len - done);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int sha1_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
int sha1_update_arm(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
{
|
||||
struct SHA1_CTX *sctx = shash_desc_ctx(desc);
|
||||
struct sha1_state *sctx = shash_desc_ctx(desc);
|
||||
unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
|
||||
int res;
|
||||
|
||||
/* Handle the fast case right here */
|
||||
if (partial + len < SHA1_BLOCK_SIZE) {
|
||||
sctx->count += len;
|
||||
memcpy(sctx->data + partial, data, len);
|
||||
memcpy(sctx->buffer + partial, data, len);
|
||||
return 0;
|
||||
}
|
||||
res = __sha1_update(sctx, data, len, partial);
|
||||
return res;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(sha1_update_arm);
|
||||
|
||||
|
||||
/* Add padding and return the message digest. */
|
||||
static int sha1_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
struct SHA1_CTX *sctx = shash_desc_ctx(desc);
|
||||
struct sha1_state *sctx = shash_desc_ctx(desc);
|
||||
unsigned int i, index, padlen;
|
||||
__be32 *dst = (__be32 *)out;
|
||||
__be64 bits;
|
||||
@ -106,7 +102,7 @@ static int sha1_final(struct shash_desc *desc, u8 *out)
|
||||
/* We need to fill a whole block for __sha1_update() */
|
||||
if (padlen <= 56) {
|
||||
sctx->count += padlen;
|
||||
memcpy(sctx->data + index, padding, padlen);
|
||||
memcpy(sctx->buffer + index, padding, padlen);
|
||||
} else {
|
||||
__sha1_update(sctx, padding, padlen, index);
|
||||
}
|
||||
@ -114,7 +110,7 @@ static int sha1_final(struct shash_desc *desc, u8 *out)
|
||||
|
||||
/* Store state in digest */
|
||||
for (i = 0; i < 5; i++)
|
||||
dst[i] = cpu_to_be32(((u32 *)sctx)[i]);
|
||||
dst[i] = cpu_to_be32(sctx->state[i]);
|
||||
|
||||
/* Wipe context */
|
||||
memset(sctx, 0, sizeof(*sctx));
|
||||
@ -124,7 +120,7 @@ static int sha1_final(struct shash_desc *desc, u8 *out)
|
||||
|
||||
static int sha1_export(struct shash_desc *desc, void *out)
|
||||
{
|
||||
struct SHA1_CTX *sctx = shash_desc_ctx(desc);
|
||||
struct sha1_state *sctx = shash_desc_ctx(desc);
|
||||
memcpy(out, sctx, sizeof(*sctx));
|
||||
return 0;
|
||||
}
|
||||
@ -132,7 +128,7 @@ static int sha1_export(struct shash_desc *desc, void *out)
|
||||
|
||||
static int sha1_import(struct shash_desc *desc, const void *in)
|
||||
{
|
||||
struct SHA1_CTX *sctx = shash_desc_ctx(desc);
|
||||
struct sha1_state *sctx = shash_desc_ctx(desc);
|
||||
memcpy(sctx, in, sizeof(*sctx));
|
||||
return 0;
|
||||
}
|
||||
@ -141,12 +137,12 @@ static int sha1_import(struct shash_desc *desc, const void *in)
|
||||
static struct shash_alg alg = {
|
||||
.digestsize = SHA1_DIGEST_SIZE,
|
||||
.init = sha1_init,
|
||||
.update = sha1_update,
|
||||
.update = sha1_update_arm,
|
||||
.final = sha1_final,
|
||||
.export = sha1_export,
|
||||
.import = sha1_import,
|
||||
.descsize = sizeof(struct SHA1_CTX),
|
||||
.statesize = sizeof(struct SHA1_CTX),
|
||||
.descsize = sizeof(struct sha1_state),
|
||||
.statesize = sizeof(struct sha1_state),
|
||||
.base = {
|
||||
.cra_name = "sha1",
|
||||
.cra_driver_name= "sha1-asm",
|
||||
|
197
arch/arm/crypto/sha1_neon_glue.c
Normal file
197
arch/arm/crypto/sha1_neon_glue.c
Normal file
@ -0,0 +1,197 @@
|
||||
/*
|
||||
* Glue code for the SHA1 Secure Hash Algorithm assembler implementation using
|
||||
* ARM NEON instructions.
|
||||
*
|
||||
* Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
|
||||
*
|
||||
* This file is based on sha1_generic.c and sha1_ssse3_glue.c:
|
||||
* Copyright (c) Alan Smithee.
|
||||
* Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
|
||||
* Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
|
||||
* Copyright (c) Mathias Krause <minipli@googlemail.com>
|
||||
* Copyright (c) Chandramouli Narayanan <mouli@linux.intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/cryptohash.h>
|
||||
#include <linux/types.h>
|
||||
#include <crypto/sha.h>
|
||||
#include <asm/byteorder.h>
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
#include <asm/crypto/sha1.h>
|
||||
|
||||
|
||||
asmlinkage void sha1_transform_neon(void *state_h, const char *data,
|
||||
unsigned int rounds);
|
||||
|
||||
|
||||
static int sha1_neon_init(struct shash_desc *desc)
|
||||
{
|
||||
struct sha1_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
*sctx = (struct sha1_state){
|
||||
.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
|
||||
};
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __sha1_neon_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, unsigned int partial)
|
||||
{
|
||||
struct sha1_state *sctx = shash_desc_ctx(desc);
|
||||
unsigned int done = 0;
|
||||
|
||||
sctx->count += len;
|
||||
|
||||
if (partial) {
|
||||
done = SHA1_BLOCK_SIZE - partial;
|
||||
memcpy(sctx->buffer + partial, data, done);
|
||||
sha1_transform_neon(sctx->state, sctx->buffer, 1);
|
||||
}
|
||||
|
||||
if (len - done >= SHA1_BLOCK_SIZE) {
|
||||
const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
|
||||
|
||||
sha1_transform_neon(sctx->state, data + done, rounds);
|
||||
done += rounds * SHA1_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
memcpy(sctx->buffer, data + done, len - done);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha1_neon_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
{
|
||||
struct sha1_state *sctx = shash_desc_ctx(desc);
|
||||
unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
|
||||
int res;
|
||||
|
||||
/* Handle the fast case right here */
|
||||
if (partial + len < SHA1_BLOCK_SIZE) {
|
||||
sctx->count += len;
|
||||
memcpy(sctx->buffer + partial, data, len);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!may_use_simd()) {
|
||||
res = sha1_update_arm(desc, data, len);
|
||||
} else {
|
||||
kernel_neon_begin();
|
||||
res = __sha1_neon_update(desc, data, len, partial);
|
||||
kernel_neon_end();
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
/* Add padding and return the message digest. */
|
||||
static int sha1_neon_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
struct sha1_state *sctx = shash_desc_ctx(desc);
|
||||
unsigned int i, index, padlen;
|
||||
__be32 *dst = (__be32 *)out;
|
||||
__be64 bits;
|
||||
static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
|
||||
|
||||
bits = cpu_to_be64(sctx->count << 3);
|
||||
|
||||
/* Pad out to 56 mod 64 and append length */
|
||||
index = sctx->count % SHA1_BLOCK_SIZE;
|
||||
padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
|
||||
if (!may_use_simd()) {
|
||||
sha1_update_arm(desc, padding, padlen);
|
||||
sha1_update_arm(desc, (const u8 *)&bits, sizeof(bits));
|
||||
} else {
|
||||
kernel_neon_begin();
|
||||
/* We need to fill a whole block for __sha1_neon_update() */
|
||||
if (padlen <= 56) {
|
||||
sctx->count += padlen;
|
||||
memcpy(sctx->buffer + index, padding, padlen);
|
||||
} else {
|
||||
__sha1_neon_update(desc, padding, padlen, index);
|
||||
}
|
||||
__sha1_neon_update(desc, (const u8 *)&bits, sizeof(bits), 56);
|
||||
kernel_neon_end();
|
||||
}
|
||||
|
||||
/* Store state in digest */
|
||||
for (i = 0; i < 5; i++)
|
||||
dst[i] = cpu_to_be32(sctx->state[i]);
|
||||
|
||||
/* Wipe context */
|
||||
memset(sctx, 0, sizeof(*sctx));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha1_neon_export(struct shash_desc *desc, void *out)
|
||||
{
|
||||
struct sha1_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
memcpy(out, sctx, sizeof(*sctx));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha1_neon_import(struct shash_desc *desc, const void *in)
|
||||
{
|
||||
struct sha1_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
memcpy(sctx, in, sizeof(*sctx));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct shash_alg alg = {
|
||||
.digestsize = SHA1_DIGEST_SIZE,
|
||||
.init = sha1_neon_init,
|
||||
.update = sha1_neon_update,
|
||||
.final = sha1_neon_final,
|
||||
.export = sha1_neon_export,
|
||||
.import = sha1_neon_import,
|
||||
.descsize = sizeof(struct sha1_state),
|
||||
.statesize = sizeof(struct sha1_state),
|
||||
.base = {
|
||||
.cra_name = "sha1",
|
||||
.cra_driver_name = "sha1-neon",
|
||||
.cra_priority = 250,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA1_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
};
|
||||
|
||||
static int __init sha1_neon_mod_init(void)
|
||||
{
|
||||
if (!cpu_has_neon())
|
||||
return -ENODEV;
|
||||
|
||||
return crypto_register_shash(&alg);
|
||||
}
|
||||
|
||||
static void __exit sha1_neon_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_shash(&alg);
|
||||
}
|
||||
|
||||
module_init(sha1_neon_mod_init);
|
||||
module_exit(sha1_neon_mod_fini);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, NEON accelerated");
|
||||
MODULE_ALIAS("sha1");
|
455
arch/arm/crypto/sha512-armv7-neon.S
Normal file
455
arch/arm/crypto/sha512-armv7-neon.S
Normal file
@ -0,0 +1,455 @@
|
||||
/* sha512-armv7-neon.S - ARM/NEON assembly implementation of SHA-512 transform
|
||||
*
|
||||
* Copyright © 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
|
||||
.syntax unified
|
||||
.code 32
|
||||
.fpu neon
|
||||
|
||||
.text
|
||||
|
||||
/* structure of SHA512_CONTEXT */
|
||||
#define hd_a 0
|
||||
#define hd_b ((hd_a) + 8)
|
||||
#define hd_c ((hd_b) + 8)
|
||||
#define hd_d ((hd_c) + 8)
|
||||
#define hd_e ((hd_d) + 8)
|
||||
#define hd_f ((hd_e) + 8)
|
||||
#define hd_g ((hd_f) + 8)
|
||||
|
||||
/* register macros */
|
||||
#define RK %r2
|
||||
|
||||
#define RA d0
|
||||
#define RB d1
|
||||
#define RC d2
|
||||
#define RD d3
|
||||
#define RE d4
|
||||
#define RF d5
|
||||
#define RG d6
|
||||
#define RH d7
|
||||
|
||||
#define RT0 d8
|
||||
#define RT1 d9
|
||||
#define RT2 d10
|
||||
#define RT3 d11
|
||||
#define RT4 d12
|
||||
#define RT5 d13
|
||||
#define RT6 d14
|
||||
#define RT7 d15
|
||||
|
||||
#define RT01q q4
|
||||
#define RT23q q5
|
||||
#define RT45q q6
|
||||
#define RT67q q7
|
||||
|
||||
#define RW0 d16
|
||||
#define RW1 d17
|
||||
#define RW2 d18
|
||||
#define RW3 d19
|
||||
#define RW4 d20
|
||||
#define RW5 d21
|
||||
#define RW6 d22
|
||||
#define RW7 d23
|
||||
#define RW8 d24
|
||||
#define RW9 d25
|
||||
#define RW10 d26
|
||||
#define RW11 d27
|
||||
#define RW12 d28
|
||||
#define RW13 d29
|
||||
#define RW14 d30
|
||||
#define RW15 d31
|
||||
|
||||
#define RW01q q8
|
||||
#define RW23q q9
|
||||
#define RW45q q10
|
||||
#define RW67q q11
|
||||
#define RW89q q12
|
||||
#define RW1011q q13
|
||||
#define RW1213q q14
|
||||
#define RW1415q q15
|
||||
|
||||
/***********************************************************************
|
||||
* ARM assembly implementation of sha512 transform
|
||||
***********************************************************************/
|
||||
#define rounds2_0_63(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, rw01q, rw2, \
|
||||
rw23q, rw1415q, rw9, rw10, interleave_op, arg1) \
|
||||
/* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \
|
||||
vshr.u64 RT2, re, #14; \
|
||||
vshl.u64 RT3, re, #64 - 14; \
|
||||
interleave_op(arg1); \
|
||||
vshr.u64 RT4, re, #18; \
|
||||
vshl.u64 RT5, re, #64 - 18; \
|
||||
vld1.64 {RT0}, [RK]!; \
|
||||
veor.64 RT23q, RT23q, RT45q; \
|
||||
vshr.u64 RT4, re, #41; \
|
||||
vshl.u64 RT5, re, #64 - 41; \
|
||||
vadd.u64 RT0, RT0, rw0; \
|
||||
veor.64 RT23q, RT23q, RT45q; \
|
||||
vmov.64 RT7, re; \
|
||||
veor.64 RT1, RT2, RT3; \
|
||||
vbsl.64 RT7, rf, rg; \
|
||||
\
|
||||
vadd.u64 RT1, RT1, rh; \
|
||||
vshr.u64 RT2, ra, #28; \
|
||||
vshl.u64 RT3, ra, #64 - 28; \
|
||||
vadd.u64 RT1, RT1, RT0; \
|
||||
vshr.u64 RT4, ra, #34; \
|
||||
vshl.u64 RT5, ra, #64 - 34; \
|
||||
vadd.u64 RT1, RT1, RT7; \
|
||||
\
|
||||
/* h = Sum0 (a) + Maj (a, b, c); */ \
|
||||
veor.64 RT23q, RT23q, RT45q; \
|
||||
vshr.u64 RT4, ra, #39; \
|
||||
vshl.u64 RT5, ra, #64 - 39; \
|
||||
veor.64 RT0, ra, rb; \
|
||||
veor.64 RT23q, RT23q, RT45q; \
|
||||
vbsl.64 RT0, rc, rb; \
|
||||
vadd.u64 rd, rd, RT1; /* d+=t1; */ \
|
||||
veor.64 rh, RT2, RT3; \
|
||||
\
|
||||
/* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \
|
||||
vshr.u64 RT2, rd, #14; \
|
||||
vshl.u64 RT3, rd, #64 - 14; \
|
||||
vadd.u64 rh, rh, RT0; \
|
||||
vshr.u64 RT4, rd, #18; \
|
||||
vshl.u64 RT5, rd, #64 - 18; \
|
||||
vadd.u64 rh, rh, RT1; /* h+=t1; */ \
|
||||
vld1.64 {RT0}, [RK]!; \
|
||||
veor.64 RT23q, RT23q, RT45q; \
|
||||
vshr.u64 RT4, rd, #41; \
|
||||
vshl.u64 RT5, rd, #64 - 41; \
|
||||
vadd.u64 RT0, RT0, rw1; \
|
||||
veor.64 RT23q, RT23q, RT45q; \
|
||||
vmov.64 RT7, rd; \
|
||||
veor.64 RT1, RT2, RT3; \
|
||||
vbsl.64 RT7, re, rf; \
|
||||
\
|
||||
vadd.u64 RT1, RT1, rg; \
|
||||
vshr.u64 RT2, rh, #28; \
|
||||
vshl.u64 RT3, rh, #64 - 28; \
|
||||
vadd.u64 RT1, RT1, RT0; \
|
||||
vshr.u64 RT4, rh, #34; \
|
||||
vshl.u64 RT5, rh, #64 - 34; \
|
||||
vadd.u64 RT1, RT1, RT7; \
|
||||
\
|
||||
/* g = Sum0 (h) + Maj (h, a, b); */ \
|
||||
veor.64 RT23q, RT23q, RT45q; \
|
||||
vshr.u64 RT4, rh, #39; \
|
||||
vshl.u64 RT5, rh, #64 - 39; \
|
||||
veor.64 RT0, rh, ra; \
|
||||
veor.64 RT23q, RT23q, RT45q; \
|
||||
vbsl.64 RT0, rb, ra; \
|
||||
vadd.u64 rc, rc, RT1; /* c+=t1; */ \
|
||||
veor.64 rg, RT2, RT3; \
|
||||
\
|
||||
/* w[0] += S1 (w[14]) + w[9] + S0 (w[1]); */ \
|
||||
/* w[1] += S1 (w[15]) + w[10] + S0 (w[2]); */ \
|
||||
\
|
||||
/**** S0(w[1:2]) */ \
|
||||
\
|
||||
/* w[0:1] += w[9:10] */ \
|
||||
/* RT23q = rw1:rw2 */ \
|
||||
vext.u64 RT23q, rw01q, rw23q, #1; \
|
||||
vadd.u64 rw0, rw9; \
|
||||
vadd.u64 rg, rg, RT0; \
|
||||
vadd.u64 rw1, rw10;\
|
||||
vadd.u64 rg, rg, RT1; /* g+=t1; */ \
|
||||
\
|
||||
vshr.u64 RT45q, RT23q, #1; \
|
||||
vshl.u64 RT67q, RT23q, #64 - 1; \
|
||||
vshr.u64 RT01q, RT23q, #8; \
|
||||
veor.u64 RT45q, RT45q, RT67q; \
|
||||
vshl.u64 RT67q, RT23q, #64 - 8; \
|
||||
veor.u64 RT45q, RT45q, RT01q; \
|
||||
vshr.u64 RT01q, RT23q, #7; \
|
||||
veor.u64 RT45q, RT45q, RT67q; \
|
||||
\
|
||||
/**** S1(w[14:15]) */ \
|
||||
vshr.u64 RT23q, rw1415q, #6; \
|
||||
veor.u64 RT01q, RT01q, RT45q; \
|
||||
vshr.u64 RT45q, rw1415q, #19; \
|
||||
vshl.u64 RT67q, rw1415q, #64 - 19; \
|
||||
veor.u64 RT23q, RT23q, RT45q; \
|
||||
vshr.u64 RT45q, rw1415q, #61; \
|
||||
veor.u64 RT23q, RT23q, RT67q; \
|
||||
vshl.u64 RT67q, rw1415q, #64 - 61; \
|
||||
veor.u64 RT23q, RT23q, RT45q; \
|
||||
vadd.u64 rw01q, RT01q; /* w[0:1] += S(w[1:2]) */ \
|
||||
veor.u64 RT01q, RT23q, RT67q;
|
||||
#define vadd_RT01q(rw01q) \
|
||||
/* w[0:1] += S(w[14:15]) */ \
|
||||
vadd.u64 rw01q, RT01q;
|
||||
|
||||
#define dummy(_) /*_*/
|
||||
|
||||
#define rounds2_64_79(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, \
|
||||
interleave_op1, arg1, interleave_op2, arg2) \
|
||||
/* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \
|
||||
vshr.u64 RT2, re, #14; \
|
||||
vshl.u64 RT3, re, #64 - 14; \
|
||||
interleave_op1(arg1); \
|
||||
vshr.u64 RT4, re, #18; \
|
||||
vshl.u64 RT5, re, #64 - 18; \
|
||||
interleave_op2(arg2); \
|
||||
vld1.64 {RT0}, [RK]!; \
|
||||
veor.64 RT23q, RT23q, RT45q; \
|
||||
vshr.u64 RT4, re, #41; \
|
||||
vshl.u64 RT5, re, #64 - 41; \
|
||||
vadd.u64 RT0, RT0, rw0; \
|
||||
veor.64 RT23q, RT23q, RT45q; \
|
||||
vmov.64 RT7, re; \
|
||||
veor.64 RT1, RT2, RT3; \
|
||||
vbsl.64 RT7, rf, rg; \
|
||||
\
|
||||
vadd.u64 RT1, RT1, rh; \
|
||||
vshr.u64 RT2, ra, #28; \
|
||||
vshl.u64 RT3, ra, #64 - 28; \
|
||||
vadd.u64 RT1, RT1, RT0; \
|
||||
vshr.u64 RT4, ra, #34; \
|
||||
vshl.u64 RT5, ra, #64 - 34; \
|
||||
vadd.u64 RT1, RT1, RT7; \
|
||||
\
|
||||
/* h = Sum0 (a) + Maj (a, b, c); */ \
|
||||
veor.64 RT23q, RT23q, RT45q; \
|
||||
vshr.u64 RT4, ra, #39; \
|
||||
vshl.u64 RT5, ra, #64 - 39; \
|
||||
veor.64 RT0, ra, rb; \
|
||||
veor.64 RT23q, RT23q, RT45q; \
|
||||
vbsl.64 RT0, rc, rb; \
|
||||
vadd.u64 rd, rd, RT1; /* d+=t1; */ \
|
||||
veor.64 rh, RT2, RT3; \
|
||||
\
|
||||
/* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \
|
||||
vshr.u64 RT2, rd, #14; \
|
||||
vshl.u64 RT3, rd, #64 - 14; \
|
||||
vadd.u64 rh, rh, RT0; \
|
||||
vshr.u64 RT4, rd, #18; \
|
||||
vshl.u64 RT5, rd, #64 - 18; \
|
||||
vadd.u64 rh, rh, RT1; /* h+=t1; */ \
|
||||
vld1.64 {RT0}, [RK]!; \
|
||||
veor.64 RT23q, RT23q, RT45q; \
|
||||
vshr.u64 RT4, rd, #41; \
|
||||
vshl.u64 RT5, rd, #64 - 41; \
|
||||
vadd.u64 RT0, RT0, rw1; \
|
||||
veor.64 RT23q, RT23q, RT45q; \
|
||||
vmov.64 RT7, rd; \
|
||||
veor.64 RT1, RT2, RT3; \
|
||||
vbsl.64 RT7, re, rf; \
|
||||
\
|
||||
vadd.u64 RT1, RT1, rg; \
|
||||
vshr.u64 RT2, rh, #28; \
|
||||
vshl.u64 RT3, rh, #64 - 28; \
|
||||
vadd.u64 RT1, RT1, RT0; \
|
||||
vshr.u64 RT4, rh, #34; \
|
||||
vshl.u64 RT5, rh, #64 - 34; \
|
||||
vadd.u64 RT1, RT1, RT7; \
|
||||
\
|
||||
/* g = Sum0 (h) + Maj (h, a, b); */ \
|
||||
veor.64 RT23q, RT23q, RT45q; \
|
||||
vshr.u64 RT4, rh, #39; \
|
||||
vshl.u64 RT5, rh, #64 - 39; \
|
||||
veor.64 RT0, rh, ra; \
|
||||
veor.64 RT23q, RT23q, RT45q; \
|
||||
vbsl.64 RT0, rb, ra; \
|
||||
vadd.u64 rc, rc, RT1; /* c+=t1; */ \
|
||||
veor.64 rg, RT2, RT3;
|
||||
#define vadd_rg_RT0(rg) \
|
||||
vadd.u64 rg, rg, RT0;
|
||||
#define vadd_rg_RT1(rg) \
|
||||
vadd.u64 rg, rg, RT1; /* g+=t1; */
|
||||
|
||||
.align 3
|
||||
ENTRY(sha512_transform_neon)
|
||||
/* Input:
|
||||
* %r0: SHA512_CONTEXT
|
||||
* %r1: data
|
||||
* %r2: u64 k[] constants
|
||||
* %r3: nblks
|
||||
*/
|
||||
push {%lr};
|
||||
|
||||
mov %lr, #0;
|
||||
|
||||
/* Load context to d0-d7 */
|
||||
vld1.64 {RA-RD}, [%r0]!;
|
||||
vld1.64 {RE-RH}, [%r0];
|
||||
sub %r0, #(4*8);
|
||||
|
||||
/* Load input to w[16], d16-d31 */
|
||||
/* NOTE: Assumes that on ARMv7 unaligned accesses are always allowed. */
|
||||
vld1.64 {RW0-RW3}, [%r1]!;
|
||||
vld1.64 {RW4-RW7}, [%r1]!;
|
||||
vld1.64 {RW8-RW11}, [%r1]!;
|
||||
vld1.64 {RW12-RW15}, [%r1]!;
|
||||
#ifdef __ARMEL__
|
||||
/* byteswap */
|
||||
vrev64.8 RW01q, RW01q;
|
||||
vrev64.8 RW23q, RW23q;
|
||||
vrev64.8 RW45q, RW45q;
|
||||
vrev64.8 RW67q, RW67q;
|
||||
vrev64.8 RW89q, RW89q;
|
||||
vrev64.8 RW1011q, RW1011q;
|
||||
vrev64.8 RW1213q, RW1213q;
|
||||
vrev64.8 RW1415q, RW1415q;
|
||||
#endif
|
||||
|
||||
/* EABI says that d8-d15 must be preserved by callee. */
|
||||
/*vpush {RT0-RT7};*/
|
||||
|
||||
.Loop:
|
||||
rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2,
|
||||
RW23q, RW1415q, RW9, RW10, dummy, _);
|
||||
b .Lenter_rounds;
|
||||
|
||||
.Loop_rounds:
|
||||
rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2,
|
||||
RW23q, RW1415q, RW9, RW10, vadd_RT01q, RW1415q);
|
||||
.Lenter_rounds:
|
||||
rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3, RW23q, RW4,
|
||||
RW45q, RW01q, RW11, RW12, vadd_RT01q, RW01q);
|
||||
rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, RW45q, RW6,
|
||||
RW67q, RW23q, RW13, RW14, vadd_RT01q, RW23q);
|
||||
rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, RW67q, RW8,
|
||||
RW89q, RW45q, RW15, RW0, vadd_RT01q, RW45q);
|
||||
rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, RW89q, RW10,
|
||||
RW1011q, RW67q, RW1, RW2, vadd_RT01q, RW67q);
|
||||
rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, RW1011q, RW12,
|
||||
RW1213q, RW89q, RW3, RW4, vadd_RT01q, RW89q);
|
||||
add %lr, #16;
|
||||
rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, RW1213q, RW14,
|
||||
RW1415q, RW1011q, RW5, RW6, vadd_RT01q, RW1011q);
|
||||
cmp %lr, #64;
|
||||
rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, RW1415q, RW0,
|
||||
RW01q, RW1213q, RW7, RW8, vadd_RT01q, RW1213q);
|
||||
bne .Loop_rounds;
|
||||
|
||||
subs %r3, #1;
|
||||
|
||||
rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1,
|
||||
vadd_RT01q, RW1415q, dummy, _);
|
||||
rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3,
|
||||
vadd_rg_RT0, RG, vadd_rg_RT1, RG);
|
||||
beq .Lhandle_tail;
|
||||
vld1.64 {RW0-RW3}, [%r1]!;
|
||||
rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5,
|
||||
vadd_rg_RT0, RE, vadd_rg_RT1, RE);
|
||||
rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7,
|
||||
vadd_rg_RT0, RC, vadd_rg_RT1, RC);
|
||||
#ifdef __ARMEL__
|
||||
vrev64.8 RW01q, RW01q;
|
||||
vrev64.8 RW23q, RW23q;
|
||||
#endif
|
||||
vld1.64 {RW4-RW7}, [%r1]!;
|
||||
rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9,
|
||||
vadd_rg_RT0, RA, vadd_rg_RT1, RA);
|
||||
rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11,
|
||||
vadd_rg_RT0, RG, vadd_rg_RT1, RG);
|
||||
#ifdef __ARMEL__
|
||||
vrev64.8 RW45q, RW45q;
|
||||
vrev64.8 RW67q, RW67q;
|
||||
#endif
|
||||
vld1.64 {RW8-RW11}, [%r1]!;
|
||||
rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13,
|
||||
vadd_rg_RT0, RE, vadd_rg_RT1, RE);
|
||||
rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15,
|
||||
vadd_rg_RT0, RC, vadd_rg_RT1, RC);
|
||||
#ifdef __ARMEL__
|
||||
vrev64.8 RW89q, RW89q;
|
||||
vrev64.8 RW1011q, RW1011q;
|
||||
#endif
|
||||
vld1.64 {RW12-RW15}, [%r1]!;
|
||||
vadd_rg_RT0(RA);
|
||||
vadd_rg_RT1(RA);
|
||||
|
||||
/* Load context */
|
||||
vld1.64 {RT0-RT3}, [%r0]!;
|
||||
vld1.64 {RT4-RT7}, [%r0];
|
||||
sub %r0, #(4*8);
|
||||
|
||||
#ifdef __ARMEL__
|
||||
vrev64.8 RW1213q, RW1213q;
|
||||
vrev64.8 RW1415q, RW1415q;
|
||||
#endif
|
||||
|
||||
vadd.u64 RA, RT0;
|
||||
vadd.u64 RB, RT1;
|
||||
vadd.u64 RC, RT2;
|
||||
vadd.u64 RD, RT3;
|
||||
vadd.u64 RE, RT4;
|
||||
vadd.u64 RF, RT5;
|
||||
vadd.u64 RG, RT6;
|
||||
vadd.u64 RH, RT7;
|
||||
|
||||
/* Store the first half of context */
|
||||
vst1.64 {RA-RD}, [%r0]!;
|
||||
sub RK, $(8*80);
|
||||
vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */
|
||||
mov %lr, #0;
|
||||
sub %r0, #(4*8);
|
||||
|
||||
b .Loop;
|
||||
|
||||
.Lhandle_tail:
|
||||
rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5,
|
||||
vadd_rg_RT0, RE, vadd_rg_RT1, RE);
|
||||
rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7,
|
||||
vadd_rg_RT0, RC, vadd_rg_RT1, RC);
|
||||
rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9,
|
||||
vadd_rg_RT0, RA, vadd_rg_RT1, RA);
|
||||
rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11,
|
||||
vadd_rg_RT0, RG, vadd_rg_RT1, RG);
|
||||
rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13,
|
||||
vadd_rg_RT0, RE, vadd_rg_RT1, RE);
|
||||
rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15,
|
||||
vadd_rg_RT0, RC, vadd_rg_RT1, RC);
|
||||
|
||||
/* Load context to d16-d23 */
|
||||
vld1.64 {RW0-RW3}, [%r0]!;
|
||||
vadd_rg_RT0(RA);
|
||||
vld1.64 {RW4-RW7}, [%r0];
|
||||
vadd_rg_RT1(RA);
|
||||
sub %r0, #(4*8);
|
||||
|
||||
vadd.u64 RA, RW0;
|
||||
vadd.u64 RB, RW1;
|
||||
vadd.u64 RC, RW2;
|
||||
vadd.u64 RD, RW3;
|
||||
vadd.u64 RE, RW4;
|
||||
vadd.u64 RF, RW5;
|
||||
vadd.u64 RG, RW6;
|
||||
vadd.u64 RH, RW7;
|
||||
|
||||
/* Store the first half of context */
|
||||
vst1.64 {RA-RD}, [%r0]!;
|
||||
|
||||
/* Clear used registers */
|
||||
/* d16-d31 */
|
||||
veor.u64 RW01q, RW01q;
|
||||
veor.u64 RW23q, RW23q;
|
||||
veor.u64 RW45q, RW45q;
|
||||
veor.u64 RW67q, RW67q;
|
||||
vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */
|
||||
veor.u64 RW89q, RW89q;
|
||||
veor.u64 RW1011q, RW1011q;
|
||||
veor.u64 RW1213q, RW1213q;
|
||||
veor.u64 RW1415q, RW1415q;
|
||||
/* d8-d15 */
|
||||
/*vpop {RT0-RT7};*/
|
||||
/* d0-d7 (q0-q3) */
|
||||
veor.u64 %q0, %q0;
|
||||
veor.u64 %q1, %q1;
|
||||
veor.u64 %q2, %q2;
|
||||
veor.u64 %q3, %q3;
|
||||
|
||||
pop {%pc};
|
||||
ENDPROC(sha512_transform_neon)
|
305
arch/arm/crypto/sha512_neon_glue.c
Normal file
305
arch/arm/crypto/sha512_neon_glue.c
Normal file
@ -0,0 +1,305 @@
|
||||
/*
|
||||
* Glue code for the SHA512 Secure Hash Algorithm assembly implementation
|
||||
* using NEON instructions.
|
||||
*
|
||||
* Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
|
||||
*
|
||||
* This file is based on sha512_ssse3_glue.c:
|
||||
* Copyright (C) 2013 Intel Corporation
|
||||
* Author: Tim Chen <tim.c.chen@linux.intel.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License as published by the Free
|
||||
* Software Foundation; either version 2 of the License, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/cryptohash.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/string.h>
|
||||
#include <crypto/sha.h>
|
||||
#include <asm/byteorder.h>
|
||||
#include <asm/simd.h>
|
||||
#include <asm/neon.h>
|
||||
|
||||
|
||||
static const u64 sha512_k[] = {
|
||||
0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
|
||||
0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
|
||||
0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
|
||||
0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
|
||||
0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
|
||||
0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
|
||||
0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
|
||||
0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
|
||||
0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
|
||||
0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
|
||||
0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
|
||||
0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
|
||||
0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
|
||||
0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
|
||||
0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
|
||||
0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
|
||||
0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
|
||||
0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
|
||||
0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
|
||||
0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
|
||||
0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
|
||||
0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
|
||||
0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
|
||||
0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
|
||||
0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
|
||||
0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
|
||||
0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
|
||||
0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
|
||||
0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
|
||||
0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
|
||||
0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
|
||||
0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
|
||||
0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,
|
||||
0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
|
||||
0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
|
||||
0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
|
||||
0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
|
||||
0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
|
||||
0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
|
||||
0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
|
||||
};
|
||||
|
||||
|
||||
asmlinkage void sha512_transform_neon(u64 *digest, const void *data,
|
||||
const u64 k[], unsigned int num_blks);
|
||||
|
||||
|
||||
static int sha512_neon_init(struct shash_desc *desc)
|
||||
{
|
||||
struct sha512_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
sctx->state[0] = SHA512_H0;
|
||||
sctx->state[1] = SHA512_H1;
|
||||
sctx->state[2] = SHA512_H2;
|
||||
sctx->state[3] = SHA512_H3;
|
||||
sctx->state[4] = SHA512_H4;
|
||||
sctx->state[5] = SHA512_H5;
|
||||
sctx->state[6] = SHA512_H6;
|
||||
sctx->state[7] = SHA512_H7;
|
||||
sctx->count[0] = sctx->count[1] = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __sha512_neon_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, unsigned int partial)
|
||||
{
|
||||
struct sha512_state *sctx = shash_desc_ctx(desc);
|
||||
unsigned int done = 0;
|
||||
|
||||
sctx->count[0] += len;
|
||||
if (sctx->count[0] < len)
|
||||
sctx->count[1]++;
|
||||
|
||||
if (partial) {
|
||||
done = SHA512_BLOCK_SIZE - partial;
|
||||
memcpy(sctx->buf + partial, data, done);
|
||||
sha512_transform_neon(sctx->state, sctx->buf, sha512_k, 1);
|
||||
}
|
||||
|
||||
if (len - done >= SHA512_BLOCK_SIZE) {
|
||||
const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE;
|
||||
|
||||
sha512_transform_neon(sctx->state, data + done, sha512_k,
|
||||
rounds);
|
||||
|
||||
done += rounds * SHA512_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
memcpy(sctx->buf, data + done, len - done);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha512_neon_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
{
|
||||
struct sha512_state *sctx = shash_desc_ctx(desc);
|
||||
unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE;
|
||||
int res;
|
||||
|
||||
/* Handle the fast case right here */
|
||||
if (partial + len < SHA512_BLOCK_SIZE) {
|
||||
sctx->count[0] += len;
|
||||
if (sctx->count[0] < len)
|
||||
sctx->count[1]++;
|
||||
memcpy(sctx->buf + partial, data, len);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!may_use_simd()) {
|
||||
res = crypto_sha512_update(desc, data, len);
|
||||
} else {
|
||||
kernel_neon_begin();
|
||||
res = __sha512_neon_update(desc, data, len, partial);
|
||||
kernel_neon_end();
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
/* Add padding and return the message digest. */
|
||||
static int sha512_neon_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
struct sha512_state *sctx = shash_desc_ctx(desc);
|
||||
unsigned int i, index, padlen;
|
||||
__be64 *dst = (__be64 *)out;
|
||||
__be64 bits[2];
|
||||
static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, };
|
||||
|
||||
/* save number of bits */
|
||||
bits[1] = cpu_to_be64(sctx->count[0] << 3);
|
||||
bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
|
||||
|
||||
/* Pad out to 112 mod 128 and append length */
|
||||
index = sctx->count[0] & 0x7f;
|
||||
padlen = (index < 112) ? (112 - index) : ((128+112) - index);
|
||||
|
||||
if (!may_use_simd()) {
|
||||
crypto_sha512_update(desc, padding, padlen);
|
||||
crypto_sha512_update(desc, (const u8 *)&bits, sizeof(bits));
|
||||
} else {
|
||||
kernel_neon_begin();
|
||||
/* We need to fill a whole block for __sha512_neon_update() */
|
||||
if (padlen <= 112) {
|
||||
sctx->count[0] += padlen;
|
||||
if (sctx->count[0] < padlen)
|
||||
sctx->count[1]++;
|
||||
memcpy(sctx->buf + index, padding, padlen);
|
||||
} else {
|
||||
__sha512_neon_update(desc, padding, padlen, index);
|
||||
}
|
||||
__sha512_neon_update(desc, (const u8 *)&bits,
|
||||
sizeof(bits), 112);
|
||||
kernel_neon_end();
|
||||
}
|
||||
|
||||
/* Store state in digest */
|
||||
for (i = 0; i < 8; i++)
|
||||
dst[i] = cpu_to_be64(sctx->state[i]);
|
||||
|
||||
/* Wipe context */
|
||||
memset(sctx, 0, sizeof(*sctx));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha512_neon_export(struct shash_desc *desc, void *out)
|
||||
{
|
||||
struct sha512_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
memcpy(out, sctx, sizeof(*sctx));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha512_neon_import(struct shash_desc *desc, const void *in)
|
||||
{
|
||||
struct sha512_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
memcpy(sctx, in, sizeof(*sctx));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha384_neon_init(struct shash_desc *desc)
|
||||
{
|
||||
struct sha512_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
sctx->state[0] = SHA384_H0;
|
||||
sctx->state[1] = SHA384_H1;
|
||||
sctx->state[2] = SHA384_H2;
|
||||
sctx->state[3] = SHA384_H3;
|
||||
sctx->state[4] = SHA384_H4;
|
||||
sctx->state[5] = SHA384_H5;
|
||||
sctx->state[6] = SHA384_H6;
|
||||
sctx->state[7] = SHA384_H7;
|
||||
|
||||
sctx->count[0] = sctx->count[1] = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha384_neon_final(struct shash_desc *desc, u8 *hash)
|
||||
{
|
||||
u8 D[SHA512_DIGEST_SIZE];
|
||||
|
||||
sha512_neon_final(desc, D);
|
||||
|
||||
memcpy(hash, D, SHA384_DIGEST_SIZE);
|
||||
memset(D, 0, SHA512_DIGEST_SIZE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct shash_alg algs[] = { {
|
||||
.digestsize = SHA512_DIGEST_SIZE,
|
||||
.init = sha512_neon_init,
|
||||
.update = sha512_neon_update,
|
||||
.final = sha512_neon_final,
|
||||
.export = sha512_neon_export,
|
||||
.import = sha512_neon_import,
|
||||
.descsize = sizeof(struct sha512_state),
|
||||
.statesize = sizeof(struct sha512_state),
|
||||
.base = {
|
||||
.cra_name = "sha512",
|
||||
.cra_driver_name = "sha512-neon",
|
||||
.cra_priority = 250,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA512_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
}, {
|
||||
.digestsize = SHA384_DIGEST_SIZE,
|
||||
.init = sha384_neon_init,
|
||||
.update = sha512_neon_update,
|
||||
.final = sha384_neon_final,
|
||||
.export = sha512_neon_export,
|
||||
.import = sha512_neon_import,
|
||||
.descsize = sizeof(struct sha512_state),
|
||||
.statesize = sizeof(struct sha512_state),
|
||||
.base = {
|
||||
.cra_name = "sha384",
|
||||
.cra_driver_name = "sha384-neon",
|
||||
.cra_priority = 250,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA384_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
} };
|
||||
|
||||
static int __init sha512_neon_mod_init(void)
|
||||
{
|
||||
if (!cpu_has_neon())
|
||||
return -ENODEV;
|
||||
|
||||
return crypto_register_shashes(algs, ARRAY_SIZE(algs));
|
||||
}
|
||||
|
||||
static void __exit sha512_neon_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
|
||||
}
|
||||
|
||||
module_init(sha512_neon_mod_init);
|
||||
module_exit(sha512_neon_mod_fini);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, NEON accelerated");
|
||||
|
||||
MODULE_ALIAS("sha512");
|
||||
MODULE_ALIAS("sha384");
|
@ -24,6 +24,8 @@
|
||||
#include <asm/domain.h>
|
||||
#include <asm/opcodes-virt.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/page.h>
|
||||
#include <asm/thread_info.h>
|
||||
|
||||
#define IOMEM(x) (x)
|
||||
|
||||
@ -179,10 +181,10 @@
|
||||
* Get current thread_info.
|
||||
*/
|
||||
.macro get_thread_info, rd
|
||||
ARM( mov \rd, sp, lsr #13 )
|
||||
ARM( mov \rd, sp, lsr #THREAD_SIZE_ORDER + PAGE_SHIFT )
|
||||
THUMB( mov \rd, sp )
|
||||
THUMB( lsr \rd, \rd, #13 )
|
||||
mov \rd, \rd, lsl #13
|
||||
THUMB( lsr \rd, \rd, #THREAD_SIZE_ORDER + PAGE_SHIFT )
|
||||
mov \rd, \rd, lsl #THREAD_SIZE_ORDER + PAGE_SHIFT
|
||||
.endm
|
||||
|
||||
/*
|
||||
@ -425,4 +427,25 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo
|
||||
.macro ret\c, reg
|
||||
#if __LINUX_ARM_ARCH__ < 6
|
||||
mov\c pc, \reg
|
||||
#else
|
||||
.ifeqs "\reg", "lr"
|
||||
bx\c \reg
|
||||
.else
|
||||
mov\c pc, \reg
|
||||
.endif
|
||||
#endif
|
||||
.endm
|
||||
.endr
|
||||
|
||||
.macro ret.w, reg
|
||||
ret \reg
|
||||
#ifdef CONFIG_THUMB2_KERNEL
|
||||
nop
|
||||
#endif
|
||||
.endm
|
||||
|
||||
#endif /* __ASM_ASSEMBLER_H__ */
|
||||
|
@ -62,17 +62,18 @@
|
||||
#define ARM_CPU_IMP_ARM 0x41
|
||||
#define ARM_CPU_IMP_INTEL 0x69
|
||||
|
||||
#define ARM_CPU_PART_ARM1136 0xB360
|
||||
#define ARM_CPU_PART_ARM1156 0xB560
|
||||
#define ARM_CPU_PART_ARM1176 0xB760
|
||||
#define ARM_CPU_PART_ARM11MPCORE 0xB020
|
||||
#define ARM_CPU_PART_CORTEX_A8 0xC080
|
||||
#define ARM_CPU_PART_CORTEX_A9 0xC090
|
||||
#define ARM_CPU_PART_CORTEX_A5 0xC050
|
||||
#define ARM_CPU_PART_CORTEX_A15 0xC0F0
|
||||
#define ARM_CPU_PART_CORTEX_A7 0xC070
|
||||
#define ARM_CPU_PART_CORTEX_A12 0xC0D0
|
||||
#define ARM_CPU_PART_CORTEX_A17 0xC0E0
|
||||
/* ARM implemented processors */
|
||||
#define ARM_CPU_PART_ARM1136 0x4100b360
|
||||
#define ARM_CPU_PART_ARM1156 0x4100b560
|
||||
#define ARM_CPU_PART_ARM1176 0x4100b760
|
||||
#define ARM_CPU_PART_ARM11MPCORE 0x4100b020
|
||||
#define ARM_CPU_PART_CORTEX_A8 0x4100c080
|
||||
#define ARM_CPU_PART_CORTEX_A9 0x4100c090
|
||||
#define ARM_CPU_PART_CORTEX_A5 0x4100c050
|
||||
#define ARM_CPU_PART_CORTEX_A7 0x4100c070
|
||||
#define ARM_CPU_PART_CORTEX_A12 0x4100c0d0
|
||||
#define ARM_CPU_PART_CORTEX_A17 0x4100c0e0
|
||||
#define ARM_CPU_PART_CORTEX_A15 0x4100c0f0
|
||||
|
||||
#define ARM_CPU_XSCALE_ARCH_MASK 0xe000
|
||||
#define ARM_CPU_XSCALE_ARCH_V1 0x2000
|
||||
@ -171,14 +172,24 @@ static inline unsigned int __attribute_const__ read_cpuid_implementor(void)
|
||||
return (read_cpuid_id() & 0xFF000000) >> 24;
|
||||
}
|
||||
|
||||
static inline unsigned int __attribute_const__ read_cpuid_part_number(void)
|
||||
/*
|
||||
* The CPU part number is meaningless without referring to the CPU
|
||||
* implementer: implementers are free to define their own part numbers
|
||||
* which are permitted to clash with other implementer part numbers.
|
||||
*/
|
||||
static inline unsigned int __attribute_const__ read_cpuid_part(void)
|
||||
{
|
||||
return read_cpuid_id() & 0xff00fff0;
|
||||
}
|
||||
|
||||
static inline unsigned int __attribute_const__ __deprecated read_cpuid_part_number(void)
|
||||
{
|
||||
return read_cpuid_id() & 0xFFF0;
|
||||
}
|
||||
|
||||
static inline unsigned int __attribute_const__ xscale_cpu_arch_version(void)
|
||||
{
|
||||
return read_cpuid_part_number() & ARM_CPU_XSCALE_ARCH_MASK;
|
||||
return read_cpuid_id() & ARM_CPU_XSCALE_ARCH_MASK;
|
||||
}
|
||||
|
||||
static inline unsigned int __attribute_const__ read_cpuid_cachetype(void)
|
||||
|
10
arch/arm/include/asm/crypto/sha1.h
Normal file
10
arch/arm/include/asm/crypto/sha1.h
Normal file
@ -0,0 +1,10 @@
|
||||
#ifndef ASM_ARM_CRYPTO_SHA1_H
|
||||
#define ASM_ARM_CRYPTO_SHA1_H
|
||||
|
||||
#include <linux/crypto.h>
|
||||
#include <crypto/sha.h>
|
||||
|
||||
extern int sha1_update_arm(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len);
|
||||
|
||||
#endif
|
@ -35,5 +35,5 @@
|
||||
\symbol_name:
|
||||
mov r8, lr
|
||||
arch_irq_handler_default
|
||||
mov pc, r8
|
||||
ret r8
|
||||
.endm
|
||||
|
@ -221,15 +221,6 @@
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CPU_V7
|
||||
# ifdef CPU_NAME
|
||||
# undef MULTI_CPU
|
||||
# define MULTI_CPU
|
||||
# else
|
||||
# define CPU_NAME cpu_v7
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CPU_V7M
|
||||
# ifdef CPU_NAME
|
||||
# undef MULTI_CPU
|
||||
@ -248,6 +239,15 @@
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_CPU_V7
|
||||
/*
|
||||
* Cortex-A9 needs a different suspend/resume function, so we need
|
||||
* multiple CPU support for ARMv7 anyway.
|
||||
*/
|
||||
# undef MULTI_CPU
|
||||
# define MULTI_CPU
|
||||
#endif
|
||||
|
||||
#ifndef MULTI_CPU
|
||||
#define cpu_proc_init __glue(CPU_NAME,_proc_init)
|
||||
#define cpu_proc_fin __glue(CPU_NAME,_proc_fin)
|
||||
|
@ -217,6 +217,22 @@ int __mcpm_cluster_state(unsigned int cluster);
|
||||
int __init mcpm_sync_init(
|
||||
void (*power_up_setup)(unsigned int affinity_level));
|
||||
|
||||
/**
|
||||
* mcpm_loopback - make a run through the MCPM low-level code
|
||||
*
|
||||
* @cache_disable: pointer to function performing cache disabling
|
||||
*
|
||||
* This exercises the MCPM machinery by soft resetting the CPU and branching
|
||||
* to the MCPM low-level entry code before returning to the caller.
|
||||
* The @cache_disable function must do the necessary cache disabling to
|
||||
* let the regular kernel init code turn it back on as if the CPU was
|
||||
* hotplugged in. The MCPM state machine is set as if the cluster was
|
||||
* initialized meaning the power_up_setup callback passed to mcpm_sync_init()
|
||||
* will be invoked for all affinity levels. This may be useful to initialize
|
||||
* some resources such as enabling the CCI that requires the cache to be off, or simply for testing purposes.
|
||||
*/
|
||||
int __init mcpm_loopback(void (*cache_disable)(void));
|
||||
|
||||
void __init mcpm_smp_set_ops(void);
|
||||
|
||||
#else
|
||||
|
23
arch/arm/include/asm/mcs_spinlock.h
Normal file
23
arch/arm/include/asm/mcs_spinlock.h
Normal file
@ -0,0 +1,23 @@
|
||||
#ifndef __ASM_MCS_LOCK_H
|
||||
#define __ASM_MCS_LOCK_H
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
#include <asm/spinlock.h>
|
||||
|
||||
/* MCS spin-locking. */
|
||||
#define arch_mcs_spin_lock_contended(lock) \
|
||||
do { \
|
||||
/* Ensure prior stores are observed before we enter wfe. */ \
|
||||
smp_mb(); \
|
||||
while (!(smp_load_acquire(lock))) \
|
||||
wfe(); \
|
||||
} while (0) \
|
||||
|
||||
#define arch_mcs_spin_unlock_contended(lock) \
|
||||
do { \
|
||||
smp_store_release(lock, 1); \
|
||||
dsb_sev(); \
|
||||
} while (0)
|
||||
|
||||
#endif /* CONFIG_SMP */
|
||||
#endif /* __ASM_MCS_LOCK_H */
|
@ -91,9 +91,7 @@
|
||||
* of this define that was meant to.
|
||||
* Fortunately, there is no reference for this in noMMU mode, for now.
|
||||
*/
|
||||
#ifndef TASK_SIZE
|
||||
#define TASK_SIZE (CONFIG_DRAM_SIZE)
|
||||
#endif
|
||||
#define TASK_SIZE UL(0xffffffff)
|
||||
|
||||
#ifndef TASK_UNMAPPED_BASE
|
||||
#define TASK_UNMAPPED_BASE UL(0x00000000)
|
||||
@ -150,13 +148,11 @@
|
||||
|
||||
/*
|
||||
* PLAT_PHYS_OFFSET is the offset (from zero) of the start of physical
|
||||
* memory. This is used for XIP and NoMMU kernels, or by kernels which
|
||||
* have their own mach/memory.h. Assembly code must always use
|
||||
* memory. This is used for XIP and NoMMU kernels, and on platforms that don't
|
||||
* have CONFIG_ARM_PATCH_PHYS_VIRT. Assembly code must always use
|
||||
* PLAT_PHYS_OFFSET and not PHYS_OFFSET.
|
||||
*/
|
||||
#ifndef PLAT_PHYS_OFFSET
|
||||
#define PLAT_PHYS_OFFSET UL(CONFIG_PHYS_OFFSET)
|
||||
#endif
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
|
@ -12,15 +12,6 @@
|
||||
#ifndef __ARM_PERF_EVENT_H__
|
||||
#define __ARM_PERF_EVENT_H__
|
||||
|
||||
/*
|
||||
* The ARMv7 CPU PMU supports up to 32 event counters.
|
||||
*/
|
||||
#define ARMPMU_MAX_HWEVENTS 32
|
||||
|
||||
#define HW_OP_UNSUPPORTED 0xFFFF
|
||||
#define C(_x) PERF_COUNT_HW_CACHE_##_x
|
||||
#define CACHE_OP_UNSUPPORTED 0xFFFF
|
||||
|
||||
#ifdef CONFIG_HW_PERF_EVENTS
|
||||
struct pt_regs;
|
||||
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
|
||||
|
@ -43,7 +43,7 @@
|
||||
#define PMD_SECT_BUFFERABLE (_AT(pmdval_t, 1) << 2)
|
||||
#define PMD_SECT_CACHEABLE (_AT(pmdval_t, 1) << 3)
|
||||
#define PMD_SECT_USER (_AT(pmdval_t, 1) << 6) /* AP[1] */
|
||||
#define PMD_SECT_RDONLY (_AT(pmdval_t, 1) << 7) /* AP[2] */
|
||||
#define PMD_SECT_AP2 (_AT(pmdval_t, 1) << 7) /* read only */
|
||||
#define PMD_SECT_S (_AT(pmdval_t, 3) << 8)
|
||||
#define PMD_SECT_AF (_AT(pmdval_t, 1) << 10)
|
||||
#define PMD_SECT_nG (_AT(pmdval_t, 1) << 11)
|
||||
@ -72,6 +72,7 @@
|
||||
#define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1)
|
||||
#define PTE_BUFFERABLE (_AT(pteval_t, 1) << 2) /* AttrIndx[0] */
|
||||
#define PTE_CACHEABLE (_AT(pteval_t, 1) << 3) /* AttrIndx[1] */
|
||||
#define PTE_AP2 (_AT(pteval_t, 1) << 7) /* AP[2] */
|
||||
#define PTE_EXT_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */
|
||||
#define PTE_EXT_AF (_AT(pteval_t, 1) << 10) /* Access Flag */
|
||||
#define PTE_EXT_NG (_AT(pteval_t, 1) << 11) /* nG */
|
||||
|
@ -79,18 +79,19 @@
|
||||
#define L_PTE_PRESENT (_AT(pteval_t, 3) << 0) /* Present */
|
||||
#define L_PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !PRESENT */
|
||||
#define L_PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */
|
||||
#define L_PTE_RDONLY (_AT(pteval_t, 1) << 7) /* AP[2] */
|
||||
#define L_PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */
|
||||
#define L_PTE_YOUNG (_AT(pteval_t, 1) << 10) /* AF */
|
||||
#define L_PTE_XN (_AT(pteval_t, 1) << 54) /* XN */
|
||||
#define L_PTE_DIRTY (_AT(pteval_t, 1) << 55) /* unused */
|
||||
#define L_PTE_SPECIAL (_AT(pteval_t, 1) << 56) /* unused */
|
||||
#define L_PTE_DIRTY (_AT(pteval_t, 1) << 55)
|
||||
#define L_PTE_SPECIAL (_AT(pteval_t, 1) << 56)
|
||||
#define L_PTE_NONE (_AT(pteval_t, 1) << 57) /* PROT_NONE */
|
||||
#define L_PTE_RDONLY (_AT(pteval_t, 1) << 58) /* READ ONLY */
|
||||
|
||||
#define PMD_SECT_VALID (_AT(pmdval_t, 1) << 0)
|
||||
#define PMD_SECT_DIRTY (_AT(pmdval_t, 1) << 55)
|
||||
#define PMD_SECT_SPLITTING (_AT(pmdval_t, 1) << 56)
|
||||
#define PMD_SECT_NONE (_AT(pmdval_t, 1) << 57)
|
||||
#define L_PMD_SECT_VALID (_AT(pmdval_t, 1) << 0)
|
||||
#define L_PMD_SECT_DIRTY (_AT(pmdval_t, 1) << 55)
|
||||
#define L_PMD_SECT_SPLITTING (_AT(pmdval_t, 1) << 56)
|
||||
#define L_PMD_SECT_NONE (_AT(pmdval_t, 1) << 57)
|
||||
#define L_PMD_SECT_RDONLY (_AT(pteval_t, 1) << 58)
|
||||
|
||||
/*
|
||||
* To be used in assembly code with the upper page attributes.
|
||||
@ -207,27 +208,32 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
|
||||
#define pte_huge(pte) (pte_val(pte) && !(pte_val(pte) & PTE_TABLE_BIT))
|
||||
#define pte_mkhuge(pte) (__pte(pte_val(pte) & ~PTE_TABLE_BIT))
|
||||
|
||||
#define pmd_young(pmd) (pmd_val(pmd) & PMD_SECT_AF)
|
||||
#define pmd_isset(pmd, val) ((u32)(val) == (val) ? pmd_val(pmd) & (val) \
|
||||
: !!(pmd_val(pmd) & (val)))
|
||||
#define pmd_isclear(pmd, val) (!(pmd_val(pmd) & (val)))
|
||||
|
||||
#define pmd_young(pmd) (pmd_isset((pmd), PMD_SECT_AF))
|
||||
|
||||
#define __HAVE_ARCH_PMD_WRITE
|
||||
#define pmd_write(pmd) (!(pmd_val(pmd) & PMD_SECT_RDONLY))
|
||||
#define pmd_write(pmd) (pmd_isclear((pmd), L_PMD_SECT_RDONLY))
|
||||
#define pmd_dirty(pmd) (pmd_isset((pmd), L_PMD_SECT_DIRTY))
|
||||
|
||||
#define pmd_hugewillfault(pmd) (!pmd_young(pmd) || !pmd_write(pmd))
|
||||
#define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd))
|
||||
|
||||
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
|
||||
#define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT))
|
||||
#define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING)
|
||||
#define pmd_trans_huge(pmd) (pmd_val(pmd) && !pmd_table(pmd))
|
||||
#define pmd_trans_splitting(pmd) (pmd_isset((pmd), L_PMD_SECT_SPLITTING))
|
||||
#endif
|
||||
|
||||
#define PMD_BIT_FUNC(fn,op) \
|
||||
static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; }
|
||||
|
||||
PMD_BIT_FUNC(wrprotect, |= PMD_SECT_RDONLY);
|
||||
PMD_BIT_FUNC(wrprotect, |= L_PMD_SECT_RDONLY);
|
||||
PMD_BIT_FUNC(mkold, &= ~PMD_SECT_AF);
|
||||
PMD_BIT_FUNC(mksplitting, |= PMD_SECT_SPLITTING);
|
||||
PMD_BIT_FUNC(mkwrite, &= ~PMD_SECT_RDONLY);
|
||||
PMD_BIT_FUNC(mkdirty, |= PMD_SECT_DIRTY);
|
||||
PMD_BIT_FUNC(mksplitting, |= L_PMD_SECT_SPLITTING);
|
||||
PMD_BIT_FUNC(mkwrite, &= ~L_PMD_SECT_RDONLY);
|
||||
PMD_BIT_FUNC(mkdirty, |= L_PMD_SECT_DIRTY);
|
||||
PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF);
|
||||
|
||||
#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))
|
||||
@ -241,8 +247,8 @@ PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF);
|
||||
|
||||
static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
|
||||
{
|
||||
const pmdval_t mask = PMD_SECT_USER | PMD_SECT_XN | PMD_SECT_RDONLY |
|
||||
PMD_SECT_VALID | PMD_SECT_NONE;
|
||||
const pmdval_t mask = PMD_SECT_USER | PMD_SECT_XN | L_PMD_SECT_RDONLY |
|
||||
L_PMD_SECT_VALID | L_PMD_SECT_NONE;
|
||||
pmd_val(pmd) = (pmd_val(pmd) & ~mask) | (pgprot_val(newprot) & mask);
|
||||
return pmd;
|
||||
}
|
||||
@ -253,8 +259,13 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
|
||||
BUG_ON(addr >= TASK_SIZE);
|
||||
|
||||
/* create a faulting entry if PROT_NONE protected */
|
||||
if (pmd_val(pmd) & PMD_SECT_NONE)
|
||||
pmd_val(pmd) &= ~PMD_SECT_VALID;
|
||||
if (pmd_val(pmd) & L_PMD_SECT_NONE)
|
||||
pmd_val(pmd) &= ~L_PMD_SECT_VALID;
|
||||
|
||||
if (pmd_write(pmd) && pmd_dirty(pmd))
|
||||
pmd_val(pmd) &= ~PMD_SECT_AP2;
|
||||
else
|
||||
pmd_val(pmd) |= PMD_SECT_AP2;
|
||||
|
||||
*pmdp = __pmd(pmd_val(pmd) | PMD_SECT_nG);
|
||||
flush_pmd_entry(pmdp);
|
||||
|
@ -214,18 +214,22 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd)
|
||||
|
||||
#define pte_clear(mm,addr,ptep) set_pte_ext(ptep, __pte(0), 0)
|
||||
|
||||
#define pte_isset(pte, val) ((u32)(val) == (val) ? pte_val(pte) & (val) \
|
||||
: !!(pte_val(pte) & (val)))
|
||||
#define pte_isclear(pte, val) (!(pte_val(pte) & (val)))
|
||||
|
||||
#define pte_none(pte) (!pte_val(pte))
|
||||
#define pte_present(pte) (pte_val(pte) & L_PTE_PRESENT)
|
||||
#define pte_valid(pte) (pte_val(pte) & L_PTE_VALID)
|
||||
#define pte_present(pte) (pte_isset((pte), L_PTE_PRESENT))
|
||||
#define pte_valid(pte) (pte_isset((pte), L_PTE_VALID))
|
||||
#define pte_accessible(mm, pte) (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte))
|
||||
#define pte_write(pte) (!(pte_val(pte) & L_PTE_RDONLY))
|
||||
#define pte_dirty(pte) (pte_val(pte) & L_PTE_DIRTY)
|
||||
#define pte_young(pte) (pte_val(pte) & L_PTE_YOUNG)
|
||||
#define pte_exec(pte) (!(pte_val(pte) & L_PTE_XN))
|
||||
#define pte_write(pte) (pte_isclear((pte), L_PTE_RDONLY))
|
||||
#define pte_dirty(pte) (pte_isset((pte), L_PTE_DIRTY))
|
||||
#define pte_young(pte) (pte_isset((pte), L_PTE_YOUNG))
|
||||
#define pte_exec(pte) (pte_isclear((pte), L_PTE_XN))
|
||||
#define pte_special(pte) (0)
|
||||
|
||||
#define pte_valid_user(pte) \
|
||||
(pte_valid(pte) && (pte_val(pte) & L_PTE_USER) && pte_young(pte))
|
||||
(pte_valid(pte) && pte_isset((pte), L_PTE_USER) && pte_young(pte))
|
||||
|
||||
#if __LINUX_ARM_ARCH__ < 6
|
||||
static inline void __sync_icache_dcache(pte_t pteval)
|
||||
|
@ -42,6 +42,25 @@ struct arm_pmu_platdata {
|
||||
|
||||
#ifdef CONFIG_HW_PERF_EVENTS
|
||||
|
||||
/*
|
||||
* The ARMv7 CPU PMU supports up to 32 event counters.
|
||||
*/
|
||||
#define ARMPMU_MAX_HWEVENTS 32
|
||||
|
||||
#define HW_OP_UNSUPPORTED 0xFFFF
|
||||
#define C(_x) PERF_COUNT_HW_CACHE_##_x
|
||||
#define CACHE_OP_UNSUPPORTED 0xFFFF
|
||||
|
||||
#define PERF_MAP_ALL_UNSUPPORTED \
|
||||
[0 ... PERF_COUNT_HW_MAX - 1] = HW_OP_UNSUPPORTED
|
||||
|
||||
#define PERF_CACHE_MAP_ALL_UNSUPPORTED \
|
||||
[0 ... C(MAX) - 1] = { \
|
||||
[0 ... C(OP_MAX) - 1] = { \
|
||||
[0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED, \
|
||||
}, \
|
||||
}
|
||||
|
||||
/* The events for a given PMU register set. */
|
||||
struct pmu_hw_events {
|
||||
/*
|
||||
|
@ -84,6 +84,12 @@ static inline long regs_return_value(struct pt_regs *regs)
|
||||
|
||||
#define instruction_pointer(regs) (regs)->ARM_pc
|
||||
|
||||
#ifdef CONFIG_THUMB2_KERNEL
|
||||
#define frame_pointer(regs) (regs)->ARM_r7
|
||||
#else
|
||||
#define frame_pointer(regs) (regs)->ARM_fp
|
||||
#endif
|
||||
|
||||
static inline void instruction_pointer_set(struct pt_regs *regs,
|
||||
unsigned long val)
|
||||
{
|
||||
|
@ -11,7 +11,7 @@
|
||||
|
||||
static inline bool scu_a9_has_base(void)
|
||||
{
|
||||
return read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9;
|
||||
return read_cpuid_part() == ARM_CPU_PART_CORTEX_A9;
|
||||
}
|
||||
|
||||
static inline unsigned long scu_a9_get_base(void)
|
||||
|
@ -1,13 +1,28 @@
|
||||
#ifndef __ASM_STACKTRACE_H
|
||||
#define __ASM_STACKTRACE_H
|
||||
|
||||
#include <asm/ptrace.h>
|
||||
|
||||
struct stackframe {
|
||||
/*
|
||||
* FP member should hold R7 when CONFIG_THUMB2_KERNEL is enabled
|
||||
* and R11 otherwise.
|
||||
*/
|
||||
unsigned long fp;
|
||||
unsigned long sp;
|
||||
unsigned long lr;
|
||||
unsigned long pc;
|
||||
};
|
||||
|
||||
static __always_inline
|
||||
void arm_get_current_stackframe(struct pt_regs *regs, struct stackframe *frame)
|
||||
{
|
||||
frame->fp = frame_pointer(regs);
|
||||
frame->sp = regs->ARM_sp;
|
||||
frame->lr = regs->ARM_lr;
|
||||
frame->pc = regs->ARM_pc;
|
||||
}
|
||||
|
||||
extern int unwind_frame(struct stackframe *frame);
|
||||
extern void walk_stackframe(struct stackframe *frame,
|
||||
int (*fn)(struct stackframe *, void *), void *data);
|
||||
|
@ -14,9 +14,10 @@
|
||||
|
||||
#include <linux/compiler.h>
|
||||
#include <asm/fpstate.h>
|
||||
#include <asm/page.h>
|
||||
|
||||
#define THREAD_SIZE_ORDER 1
|
||||
#define THREAD_SIZE 8192
|
||||
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
|
||||
#define THREAD_START_SP (THREAD_SIZE - 8)
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
@ -107,6 +107,8 @@ static inline void set_fs(mm_segment_t fs)
|
||||
extern int __get_user_1(void *);
|
||||
extern int __get_user_2(void *);
|
||||
extern int __get_user_4(void *);
|
||||
extern int __get_user_lo8(void *);
|
||||
extern int __get_user_8(void *);
|
||||
|
||||
#define __GUP_CLOBBER_1 "lr", "cc"
|
||||
#ifdef CONFIG_CPU_USE_DOMAINS
|
||||
@ -115,6 +117,8 @@ extern int __get_user_4(void *);
|
||||
#define __GUP_CLOBBER_2 "lr", "cc"
|
||||
#endif
|
||||
#define __GUP_CLOBBER_4 "lr", "cc"
|
||||
#define __GUP_CLOBBER_lo8 "lr", "cc"
|
||||
#define __GUP_CLOBBER_8 "lr", "cc"
|
||||
|
||||
#define __get_user_x(__r2,__p,__e,__l,__s) \
|
||||
__asm__ __volatile__ ( \
|
||||
@ -125,11 +129,19 @@ extern int __get_user_4(void *);
|
||||
: "0" (__p), "r" (__l) \
|
||||
: __GUP_CLOBBER_##__s)
|
||||
|
||||
/* narrowing a double-word get into a single 32bit word register: */
|
||||
#ifdef __ARMEB__
|
||||
#define __get_user_xb(__r2, __p, __e, __l, __s) \
|
||||
__get_user_x(__r2, __p, __e, __l, lo8)
|
||||
#else
|
||||
#define __get_user_xb __get_user_x
|
||||
#endif
|
||||
|
||||
#define __get_user_check(x,p) \
|
||||
({ \
|
||||
unsigned long __limit = current_thread_info()->addr_limit - 1; \
|
||||
register const typeof(*(p)) __user *__p asm("r0") = (p);\
|
||||
register unsigned long __r2 asm("r2"); \
|
||||
register typeof(x) __r2 asm("r2"); \
|
||||
register unsigned long __l asm("r1") = __limit; \
|
||||
register int __e asm("r0"); \
|
||||
switch (sizeof(*(__p))) { \
|
||||
@ -142,6 +154,12 @@ extern int __get_user_4(void *);
|
||||
case 4: \
|
||||
__get_user_x(__r2, __p, __e, __l, 4); \
|
||||
break; \
|
||||
case 8: \
|
||||
if (sizeof((x)) < 8) \
|
||||
__get_user_xb(__r2, __p, __e, __l, 4); \
|
||||
else \
|
||||
__get_user_x(__r2, __p, __e, __l, 8); \
|
||||
break; \
|
||||
default: __e = __get_user_bad(); break; \
|
||||
} \
|
||||
x = (typeof(*(p))) __r2; \
|
||||
@ -224,7 +242,7 @@ static inline void set_fs(mm_segment_t fs)
|
||||
#define access_ok(type,addr,size) (__range_ok(addr,size) == 0)
|
||||
|
||||
#define user_addr_max() \
|
||||
(segment_eq(get_fs(), USER_DS) ? TASK_SIZE : ~0UL)
|
||||
(segment_eq(get_fs(), KERNEL_DS) ? ~0UL : get_fs())
|
||||
|
||||
/*
|
||||
* The "__xxx" versions of the user access functions do not verify the
|
||||
|
@ -15,7 +15,17 @@
|
||||
|
||||
#include <uapi/asm/unistd.h>
|
||||
|
||||
/*
|
||||
* This may need to be greater than __NR_last_syscall+1 in order to
|
||||
* account for the padding in the syscall table
|
||||
*/
|
||||
#define __NR_syscalls (384)
|
||||
|
||||
/*
|
||||
* *NOTE*: This is a ghost syscall private to the kernel. Only the
|
||||
* __kuser_cmpxchg code in entry-armv.S should be aware of its
|
||||
* existence. Don't ever use this from user code.
|
||||
*/
|
||||
#define __ARM_NR_cmpxchg (__ARM_NR_BASE+0x00fff0)
|
||||
|
||||
#define __ARCH_WANT_STAT64
|
||||
|
@ -410,11 +410,6 @@
|
||||
#define __NR_sched_getattr (__NR_SYSCALL_BASE+381)
|
||||
#define __NR_renameat2 (__NR_SYSCALL_BASE+382)
|
||||
|
||||
/*
|
||||
* This may need to be greater than __NR_last_syscall+1 in order to
|
||||
* account for the padding in the syscall table
|
||||
*/
|
||||
|
||||
/*
|
||||
* The following SWIs are ARM private.
|
||||
*/
|
||||
@ -425,12 +420,6 @@
|
||||
#define __ARM_NR_usr32 (__ARM_NR_BASE+4)
|
||||
#define __ARM_NR_set_tls (__ARM_NR_BASE+5)
|
||||
|
||||
/*
|
||||
* *NOTE*: This is a ghost syscall private to the kernel. Only the
|
||||
* __kuser_cmpxchg code in entry-armv.S should be aware of its
|
||||
* existence. Don't ever use this from user code.
|
||||
*/
|
||||
|
||||
/*
|
||||
* The following syscalls are obsolete and no longer available for EABI.
|
||||
*/
|
||||
|
@ -90,7 +90,7 @@ ENTRY(printascii)
|
||||
ldrneb r1, [r0], #1
|
||||
teqne r1, #0
|
||||
bne 1b
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(printascii)
|
||||
|
||||
ENTRY(printch)
|
||||
@ -105,7 +105,7 @@ ENTRY(debug_ll_addr)
|
||||
addruart r2, r3, ip
|
||||
str r2, [r0]
|
||||
str r3, [r1]
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(debug_ll_addr)
|
||||
#endif
|
||||
|
||||
@ -116,7 +116,7 @@ ENTRY(printascii)
|
||||
mov r0, #0x04 @ SYS_WRITE0
|
||||
ARM( svc #0x123456 )
|
||||
THUMB( svc #0xab )
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(printascii)
|
||||
|
||||
ENTRY(printch)
|
||||
@ -125,14 +125,14 @@ ENTRY(printch)
|
||||
mov r0, #0x03 @ SYS_WRITEC
|
||||
ARM( svc #0x123456 )
|
||||
THUMB( svc #0xab )
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(printch)
|
||||
|
||||
ENTRY(debug_ll_addr)
|
||||
mov r2, #0
|
||||
str r2, [r0]
|
||||
str r2, [r1]
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(debug_ll_addr)
|
||||
|
||||
#endif
|
||||
|
@ -224,7 +224,7 @@ svc_preempt:
|
||||
1: bl preempt_schedule_irq @ irq en/disable is done inside
|
||||
ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS
|
||||
tst r0, #_TIF_NEED_RESCHED
|
||||
moveq pc, r8 @ go again
|
||||
reteq r8 @ go again
|
||||
b 1b
|
||||
#endif
|
||||
|
||||
@ -490,7 +490,7 @@ ENDPROC(__und_usr)
|
||||
.pushsection .fixup, "ax"
|
||||
.align 2
|
||||
4: str r4, [sp, #S_PC] @ retry current instruction
|
||||
mov pc, r9
|
||||
ret r9
|
||||
.popsection
|
||||
.pushsection __ex_table,"a"
|
||||
.long 1b, 4b
|
||||
@ -552,7 +552,7 @@ call_fpe:
|
||||
#endif
|
||||
tst r0, #0x08000000 @ only CDP/CPRT/LDC/STC have bit 27
|
||||
tstne r0, #0x04000000 @ bit 26 set on both ARM and Thumb-2
|
||||
moveq pc, lr
|
||||
reteq lr
|
||||
and r8, r0, #0x00000f00 @ mask out CP number
|
||||
THUMB( lsr r8, r8, #8 )
|
||||
mov r7, #1
|
||||
@ -571,33 +571,33 @@ call_fpe:
|
||||
THUMB( add pc, r8 )
|
||||
nop
|
||||
|
||||
movw_pc lr @ CP#0
|
||||
ret.w lr @ CP#0
|
||||
W(b) do_fpe @ CP#1 (FPE)
|
||||
W(b) do_fpe @ CP#2 (FPE)
|
||||
movw_pc lr @ CP#3
|
||||
ret.w lr @ CP#3
|
||||
#ifdef CONFIG_CRUNCH
|
||||
b crunch_task_enable @ CP#4 (MaverickCrunch)
|
||||
b crunch_task_enable @ CP#5 (MaverickCrunch)
|
||||
b crunch_task_enable @ CP#6 (MaverickCrunch)
|
||||
#else
|
||||
movw_pc lr @ CP#4
|
||||
movw_pc lr @ CP#5
|
||||
movw_pc lr @ CP#6
|
||||
ret.w lr @ CP#4
|
||||
ret.w lr @ CP#5
|
||||
ret.w lr @ CP#6
|
||||
#endif
|
||||
movw_pc lr @ CP#7
|
||||
movw_pc lr @ CP#8
|
||||
movw_pc lr @ CP#9
|
||||
ret.w lr @ CP#7
|
||||
ret.w lr @ CP#8
|
||||
ret.w lr @ CP#9
|
||||
#ifdef CONFIG_VFP
|
||||
W(b) do_vfp @ CP#10 (VFP)
|
||||
W(b) do_vfp @ CP#11 (VFP)
|
||||
#else
|
||||
movw_pc lr @ CP#10 (VFP)
|
||||
movw_pc lr @ CP#11 (VFP)
|
||||
ret.w lr @ CP#10 (VFP)
|
||||
ret.w lr @ CP#11 (VFP)
|
||||
#endif
|
||||
movw_pc lr @ CP#12
|
||||
movw_pc lr @ CP#13
|
||||
movw_pc lr @ CP#14 (Debug)
|
||||
movw_pc lr @ CP#15 (Control)
|
||||
ret.w lr @ CP#12
|
||||
ret.w lr @ CP#13
|
||||
ret.w lr @ CP#14 (Debug)
|
||||
ret.w lr @ CP#15 (Control)
|
||||
|
||||
#ifdef NEED_CPU_ARCHITECTURE
|
||||
.align 2
|
||||
@ -649,7 +649,7 @@ ENTRY(fp_enter)
|
||||
.popsection
|
||||
|
||||
ENTRY(no_fp)
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(no_fp)
|
||||
|
||||
__und_usr_fault_32:
|
||||
@ -745,7 +745,7 @@ ENDPROC(__switch_to)
|
||||
#ifdef CONFIG_ARM_THUMB
|
||||
bx \reg
|
||||
#else
|
||||
mov pc, \reg
|
||||
ret \reg
|
||||
#endif
|
||||
.endm
|
||||
|
||||
@ -837,7 +837,7 @@ kuser_cmpxchg64_fixup:
|
||||
#if __LINUX_ARM_ARCH__ < 6
|
||||
bcc kuser_cmpxchg32_fixup
|
||||
#endif
|
||||
mov pc, lr
|
||||
ret lr
|
||||
.previous
|
||||
|
||||
#else
|
||||
@ -905,7 +905,7 @@ kuser_cmpxchg32_fixup:
|
||||
subs r8, r4, r7
|
||||
rsbcss r8, r8, #(2b - 1b)
|
||||
strcs r7, [sp, #S_PC]
|
||||
mov pc, lr
|
||||
ret lr
|
||||
.previous
|
||||
|
||||
#else
|
||||
|
@ -8,6 +8,7 @@
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <asm/assembler.h>
|
||||
#include <asm/unistd.h>
|
||||
#include <asm/ftrace.h>
|
||||
#include <asm/unwind.h>
|
||||
@ -88,7 +89,7 @@ ENTRY(ret_from_fork)
|
||||
cmp r5, #0
|
||||
movne r0, r4
|
||||
adrne lr, BSYM(1f)
|
||||
movne pc, r5
|
||||
retne r5
|
||||
1: get_thread_info tsk
|
||||
b ret_slow_syscall
|
||||
ENDPROC(ret_from_fork)
|
||||
@ -290,7 +291,7 @@ ENDPROC(ftrace_graph_caller_old)
|
||||
|
||||
.macro mcount_exit
|
||||
ldmia sp!, {r0-r3, ip, lr}
|
||||
mov pc, ip
|
||||
ret ip
|
||||
.endm
|
||||
|
||||
ENTRY(__gnu_mcount_nc)
|
||||
@ -298,7 +299,7 @@ UNWIND(.fnstart)
|
||||
#ifdef CONFIG_DYNAMIC_FTRACE
|
||||
mov ip, lr
|
||||
ldmia sp!, {lr}
|
||||
mov pc, ip
|
||||
ret ip
|
||||
#else
|
||||
__mcount
|
||||
#endif
|
||||
@ -333,12 +334,12 @@ return_to_handler:
|
||||
bl ftrace_return_to_handler
|
||||
mov lr, r0 @ r0 has real ret addr
|
||||
ldmia sp!, {r0-r3}
|
||||
mov pc, lr
|
||||
ret lr
|
||||
#endif
|
||||
|
||||
ENTRY(ftrace_stub)
|
||||
.Lftrace_stub:
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(ftrace_stub)
|
||||
|
||||
#endif /* CONFIG_FUNCTION_TRACER */
|
||||
@ -561,7 +562,7 @@ sys_mmap2:
|
||||
streq r5, [sp, #4]
|
||||
beq sys_mmap_pgoff
|
||||
mov r0, #-EINVAL
|
||||
mov pc, lr
|
||||
ret lr
|
||||
#else
|
||||
str r5, [sp, #4]
|
||||
b sys_mmap_pgoff
|
||||
|
@ -240,12 +240,6 @@
|
||||
movs pc, lr @ return & move spsr_svc into cpsr
|
||||
.endm
|
||||
|
||||
@
|
||||
@ 32-bit wide "mov pc, reg"
|
||||
@
|
||||
.macro movw_pc, reg
|
||||
mov pc, \reg
|
||||
.endm
|
||||
#else /* CONFIG_THUMB2_KERNEL */
|
||||
.macro svc_exit, rpsr, irq = 0
|
||||
.if \irq != 0
|
||||
@ -304,14 +298,6 @@
|
||||
movs pc, lr @ return & move spsr_svc into cpsr
|
||||
.endm
|
||||
#endif /* ifdef CONFIG_CPU_V7M / else */
|
||||
|
||||
@
|
||||
@ 32-bit wide "mov pc, reg"
|
||||
@
|
||||
.macro movw_pc, reg
|
||||
mov pc, \reg
|
||||
nop
|
||||
.endm
|
||||
#endif /* !CONFIG_THUMB2_KERNEL */
|
||||
|
||||
/*
|
||||
|
@ -32,7 +32,7 @@ ENTRY(__set_fiq_regs)
|
||||
ldr lr, [r0]
|
||||
msr cpsr_c, r1 @ return to SVC mode
|
||||
mov r0, r0 @ avoid hazard prior to ARMv4
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(__set_fiq_regs)
|
||||
|
||||
ENTRY(__get_fiq_regs)
|
||||
@ -45,5 +45,5 @@ ENTRY(__get_fiq_regs)
|
||||
str lr, [r0]
|
||||
msr cpsr_c, r1 @ return to SVC mode
|
||||
mov r0, r0 @ avoid hazard prior to ARMv4
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(__get_fiq_regs)
|
||||
|
@ -10,6 +10,7 @@
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
*/
|
||||
#include <asm/assembler.h>
|
||||
|
||||
#define ATAG_CORE 0x54410001
|
||||
#define ATAG_CORE_SIZE ((2*4 + 3*4) >> 2)
|
||||
@ -61,10 +62,10 @@ __vet_atags:
|
||||
cmp r5, r6
|
||||
bne 1f
|
||||
|
||||
2: mov pc, lr @ atag/dtb pointer is ok
|
||||
2: ret lr @ atag/dtb pointer is ok
|
||||
|
||||
1: mov r2, #0
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(__vet_atags)
|
||||
|
||||
/*
|
||||
@ -162,7 +163,7 @@ __lookup_processor_type:
|
||||
cmp r5, r6
|
||||
blo 1b
|
||||
mov r5, #0 @ unknown processor
|
||||
2: mov pc, lr
|
||||
2: ret lr
|
||||
ENDPROC(__lookup_processor_type)
|
||||
|
||||
/*
|
||||
|
@ -82,7 +82,7 @@ ENTRY(stext)
|
||||
adr lr, BSYM(1f) @ return (PIC) address
|
||||
ARM( add pc, r10, #PROCINFO_INITFUNC )
|
||||
THUMB( add r12, r10, #PROCINFO_INITFUNC )
|
||||
THUMB( mov pc, r12 )
|
||||
THUMB( ret r12 )
|
||||
1: b __after_proc_init
|
||||
ENDPROC(stext)
|
||||
|
||||
@ -119,7 +119,7 @@ ENTRY(secondary_startup)
|
||||
mov r13, r12 @ __secondary_switched address
|
||||
ARM( add pc, r10, #PROCINFO_INITFUNC )
|
||||
THUMB( add r12, r10, #PROCINFO_INITFUNC )
|
||||
THUMB( mov pc, r12 )
|
||||
THUMB( ret r12 )
|
||||
ENDPROC(secondary_startup)
|
||||
|
||||
ENTRY(__secondary_switched)
|
||||
@ -164,7 +164,7 @@ __after_proc_init:
|
||||
#endif
|
||||
mcr p15, 0, r0, c1, c0, 0 @ write control reg
|
||||
#endif /* CONFIG_CPU_CP15 */
|
||||
mov pc, r13
|
||||
ret r13
|
||||
ENDPROC(__after_proc_init)
|
||||
.ltorg
|
||||
|
||||
@ -254,7 +254,7 @@ ENTRY(__setup_mpu)
|
||||
orr r0, r0, #CR_M @ Set SCTRL.M (MPU on)
|
||||
mcr p15, 0, r0, c1, c0, 0 @ Enable MPU
|
||||
isb
|
||||
mov pc,lr
|
||||
ret lr
|
||||
ENDPROC(__setup_mpu)
|
||||
#endif
|
||||
#include "head-common.S"
|
||||
|
@ -140,7 +140,7 @@ ENTRY(stext)
|
||||
mov r8, r4 @ set TTBR1 to swapper_pg_dir
|
||||
ARM( add pc, r10, #PROCINFO_INITFUNC )
|
||||
THUMB( add r12, r10, #PROCINFO_INITFUNC )
|
||||
THUMB( mov pc, r12 )
|
||||
THUMB( ret r12 )
|
||||
1: b __enable_mmu
|
||||
ENDPROC(stext)
|
||||
.ltorg
|
||||
@ -335,7 +335,7 @@ __create_page_tables:
|
||||
sub r4, r4, #0x1000 @ point to the PGD table
|
||||
mov r4, r4, lsr #ARCH_PGD_SHIFT
|
||||
#endif
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(__create_page_tables)
|
||||
.ltorg
|
||||
.align
|
||||
@ -383,7 +383,7 @@ ENTRY(secondary_startup)
|
||||
ARM( add pc, r10, #PROCINFO_INITFUNC ) @ initialise processor
|
||||
@ (return control reg)
|
||||
THUMB( add r12, r10, #PROCINFO_INITFUNC )
|
||||
THUMB( mov pc, r12 )
|
||||
THUMB( ret r12 )
|
||||
ENDPROC(secondary_startup)
|
||||
|
||||
/*
|
||||
@ -468,7 +468,7 @@ ENTRY(__turn_mmu_on)
|
||||
instr_sync
|
||||
mov r3, r3
|
||||
mov r3, r13
|
||||
mov pc, r3
|
||||
ret r3
|
||||
__turn_mmu_on_end:
|
||||
ENDPROC(__turn_mmu_on)
|
||||
.popsection
|
||||
@ -487,7 +487,7 @@ __fixup_smp:
|
||||
orr r4, r4, #0x0000b000
|
||||
orr r4, r4, #0x00000020 @ val 0x4100b020
|
||||
teq r3, r4 @ ARM 11MPCore?
|
||||
moveq pc, lr @ yes, assume SMP
|
||||
reteq lr @ yes, assume SMP
|
||||
|
||||
mrc p15, 0, r0, c0, c0, 5 @ read MPIDR
|
||||
and r0, r0, #0xc0000000 @ multiprocessing extensions and
|
||||
@ -500,7 +500,7 @@ __fixup_smp:
|
||||
orr r4, r4, #0x0000c000
|
||||
orr r4, r4, #0x00000090
|
||||
teq r3, r4 @ Check for ARM Cortex-A9
|
||||
movne pc, lr @ Not ARM Cortex-A9,
|
||||
retne lr @ Not ARM Cortex-A9,
|
||||
|
||||
@ If a future SoC *does* use 0x0 as the PERIPH_BASE, then the
|
||||
@ below address check will need to be #ifdef'd or equivalent
|
||||
@ -512,7 +512,7 @@ __fixup_smp:
|
||||
ARM_BE8(rev r0, r0) @ byteswap if big endian
|
||||
and r0, r0, #0x3 @ number of CPUs
|
||||
teq r0, #0x0 @ is 1?
|
||||
movne pc, lr
|
||||
retne lr
|
||||
|
||||
__fixup_smp_on_up:
|
||||
adr r0, 1f
|
||||
@ -539,7 +539,7 @@ smp_on_up:
|
||||
.text
|
||||
__do_fixup_smp_on_up:
|
||||
cmp r4, r5
|
||||
movhs pc, lr
|
||||
reths lr
|
||||
ldmia r4!, {r0, r6}
|
||||
ARM( str r6, [r0, r3] )
|
||||
THUMB( add r0, r0, r3 )
|
||||
@ -672,7 +672,7 @@ ARM_BE8(rev16 ip, ip)
|
||||
2: cmp r4, r5
|
||||
ldrcc r7, [r4], #4 @ use branch for delay slot
|
||||
bcc 1b
|
||||
mov pc, lr
|
||||
ret lr
|
||||
#endif
|
||||
ENDPROC(__fixup_a_pv_table)
|
||||
|
||||
|
@ -99,7 +99,7 @@ ENTRY(__hyp_stub_install_secondary)
|
||||
* immediately.
|
||||
*/
|
||||
compare_cpu_mode_with_primary r4, r5, r6, r7
|
||||
movne pc, lr
|
||||
retne lr
|
||||
|
||||
/*
|
||||
* Once we have given up on one CPU, we do not try to install the
|
||||
@ -111,7 +111,7 @@ ENTRY(__hyp_stub_install_secondary)
|
||||
*/
|
||||
|
||||
cmp r4, #HYP_MODE
|
||||
movne pc, lr @ give up if the CPU is not in HYP mode
|
||||
retne lr @ give up if the CPU is not in HYP mode
|
||||
|
||||
/*
|
||||
* Configure HSCTLR to set correct exception endianness/instruction set
|
||||
@ -201,7 +201,7 @@ ENDPROC(__hyp_get_vectors)
|
||||
@ fall through
|
||||
ENTRY(__hyp_set_vectors)
|
||||
__HVC(0)
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(__hyp_set_vectors)
|
||||
|
||||
#ifndef ZIMAGE
|
||||
|
@ -100,7 +100,7 @@ ENTRY(iwmmxt_task_enable)
|
||||
get_thread_info r10
|
||||
#endif
|
||||
4: dec_preempt_count r10, r3
|
||||
mov pc, r9 @ normal exit from exception
|
||||
ret r9 @ normal exit from exception
|
||||
|
||||
concan_save:
|
||||
|
||||
@ -144,7 +144,7 @@ concan_dump:
|
||||
wstrd wR15, [r1, #MMX_WR15]
|
||||
|
||||
2: teq r0, #0 @ anything to load?
|
||||
moveq pc, lr @ if not, return
|
||||
reteq lr @ if not, return
|
||||
|
||||
concan_load:
|
||||
|
||||
@ -177,10 +177,10 @@ concan_load:
|
||||
@ clear CUP/MUP (only if r1 != 0)
|
||||
teq r1, #0
|
||||
mov r2, #0
|
||||
moveq pc, lr
|
||||
reteq lr
|
||||
|
||||
tmcr wCon, r2
|
||||
mov pc, lr
|
||||
ret lr
|
||||
|
||||
/*
|
||||
* Back up Concan regs to save area and disable access to them
|
||||
@ -266,7 +266,7 @@ ENTRY(iwmmxt_task_copy)
|
||||
mov r3, lr @ preserve return address
|
||||
bl concan_dump
|
||||
msr cpsr_c, ip @ restore interrupt mode
|
||||
mov pc, r3
|
||||
ret r3
|
||||
|
||||
/*
|
||||
* Restore Concan state from given memory address
|
||||
@ -302,7 +302,7 @@ ENTRY(iwmmxt_task_restore)
|
||||
mov r3, lr @ preserve return address
|
||||
bl concan_load
|
||||
msr cpsr_c, ip @ restore interrupt mode
|
||||
mov pc, r3
|
||||
ret r3
|
||||
|
||||
/*
|
||||
* Concan handling on task switch
|
||||
@ -324,7 +324,7 @@ ENTRY(iwmmxt_task_switch)
|
||||
add r3, r0, #TI_IWMMXT_STATE @ get next task Concan save area
|
||||
ldr r2, [r2] @ get current Concan owner
|
||||
teq r2, r3 @ next task owns it?
|
||||
movne pc, lr @ no: leave Concan disabled
|
||||
retne lr @ no: leave Concan disabled
|
||||
|
||||
1: @ flip Concan access
|
||||
XSC(eor r1, r1, #0x3)
|
||||
@ -351,7 +351,7 @@ ENTRY(iwmmxt_task_release)
|
||||
eors r0, r0, r1 @ if equal...
|
||||
streq r0, [r3] @ then clear ownership
|
||||
msr cpsr_c, r2 @ restore interrupts
|
||||
mov pc, lr
|
||||
ret lr
|
||||
|
||||
.data
|
||||
concan_owner:
|
||||
|
@ -560,11 +560,16 @@ user_backtrace(struct frame_tail __user *tail,
|
||||
struct perf_callchain_entry *entry)
|
||||
{
|
||||
struct frame_tail buftail;
|
||||
unsigned long err;
|
||||
|
||||
/* Also check accessibility of one struct frame_tail beyond */
|
||||
if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
|
||||
return NULL;
|
||||
if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail)))
|
||||
|
||||
pagefault_disable();
|
||||
err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
|
||||
pagefault_enable();
|
||||
|
||||
if (err)
|
||||
return NULL;
|
||||
|
||||
perf_callchain_store(entry, buftail.lr);
|
||||
@ -590,6 +595,10 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
|
||||
}
|
||||
|
||||
perf_callchain_store(entry, regs->ARM_pc);
|
||||
|
||||
if (!current->mm)
|
||||
return;
|
||||
|
||||
tail = (struct frame_tail __user *)regs->ARM_fp - 1;
|
||||
|
||||
while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
|
||||
@ -621,10 +630,7 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
|
||||
return;
|
||||
}
|
||||
|
||||
fr.fp = regs->ARM_fp;
|
||||
fr.sp = regs->ARM_sp;
|
||||
fr.lr = regs->ARM_lr;
|
||||
fr.pc = regs->ARM_pc;
|
||||
arm_get_current_stackframe(regs, &fr);
|
||||
walk_stackframe(&fr, callchain_trace, entry);
|
||||
}
|
||||
|
||||
|
@ -233,14 +233,17 @@ static struct of_device_id cpu_pmu_of_device_ids[] = {
|
||||
{.compatible = "arm,cortex-a7-pmu", .data = armv7_a7_pmu_init},
|
||||
{.compatible = "arm,cortex-a5-pmu", .data = armv7_a5_pmu_init},
|
||||
{.compatible = "arm,arm11mpcore-pmu", .data = armv6mpcore_pmu_init},
|
||||
{.compatible = "arm,arm1176-pmu", .data = armv6pmu_init},
|
||||
{.compatible = "arm,arm1136-pmu", .data = armv6pmu_init},
|
||||
{.compatible = "arm,arm1176-pmu", .data = armv6_1176_pmu_init},
|
||||
{.compatible = "arm,arm1136-pmu", .data = armv6_1136_pmu_init},
|
||||
{.compatible = "qcom,krait-pmu", .data = krait_pmu_init},
|
||||
{},
|
||||
};
|
||||
|
||||
static struct platform_device_id cpu_pmu_plat_device_ids[] = {
|
||||
{.name = "arm-pmu"},
|
||||
{.name = "armv6-pmu"},
|
||||
{.name = "armv7-pmu"},
|
||||
{.name = "xscale-pmu"},
|
||||
{},
|
||||
};
|
||||
|
||||
@ -250,40 +253,43 @@ static struct platform_device_id cpu_pmu_plat_device_ids[] = {
|
||||
static int probe_current_pmu(struct arm_pmu *pmu)
|
||||
{
|
||||
int cpu = get_cpu();
|
||||
unsigned long implementor = read_cpuid_implementor();
|
||||
unsigned long part_number = read_cpuid_part_number();
|
||||
int ret = -ENODEV;
|
||||
|
||||
pr_info("probing PMU on CPU %d\n", cpu);
|
||||
|
||||
switch (read_cpuid_part()) {
|
||||
/* ARM Ltd CPUs. */
|
||||
if (implementor == ARM_CPU_IMP_ARM) {
|
||||
switch (part_number) {
|
||||
case ARM_CPU_PART_ARM1136:
|
||||
case ARM_CPU_PART_ARM1156:
|
||||
case ARM_CPU_PART_ARM1176:
|
||||
ret = armv6pmu_init(pmu);
|
||||
break;
|
||||
case ARM_CPU_PART_ARM11MPCORE:
|
||||
ret = armv6mpcore_pmu_init(pmu);
|
||||
break;
|
||||
case ARM_CPU_PART_CORTEX_A8:
|
||||
ret = armv7_a8_pmu_init(pmu);
|
||||
break;
|
||||
case ARM_CPU_PART_CORTEX_A9:
|
||||
ret = armv7_a9_pmu_init(pmu);
|
||||
break;
|
||||
}
|
||||
/* Intel CPUs [xscale]. */
|
||||
} else if (implementor == ARM_CPU_IMP_INTEL) {
|
||||
switch (xscale_cpu_arch_version()) {
|
||||
case ARM_CPU_XSCALE_ARCH_V1:
|
||||
ret = xscale1pmu_init(pmu);
|
||||
break;
|
||||
case ARM_CPU_XSCALE_ARCH_V2:
|
||||
ret = xscale2pmu_init(pmu);
|
||||
break;
|
||||
case ARM_CPU_PART_ARM1136:
|
||||
ret = armv6_1136_pmu_init(pmu);
|
||||
break;
|
||||
case ARM_CPU_PART_ARM1156:
|
||||
ret = armv6_1156_pmu_init(pmu);
|
||||
break;
|
||||
case ARM_CPU_PART_ARM1176:
|
||||
ret = armv6_1176_pmu_init(pmu);
|
||||
break;
|
||||
case ARM_CPU_PART_ARM11MPCORE:
|
||||
ret = armv6mpcore_pmu_init(pmu);
|
||||
break;
|
||||
case ARM_CPU_PART_CORTEX_A8:
|
||||
ret = armv7_a8_pmu_init(pmu);
|
||||
break;
|
||||
case ARM_CPU_PART_CORTEX_A9:
|
||||
ret = armv7_a9_pmu_init(pmu);
|
||||
break;
|
||||
|
||||
default:
|
||||
if (read_cpuid_implementor() == ARM_CPU_IMP_INTEL) {
|
||||
switch (xscale_cpu_arch_version()) {
|
||||
case ARM_CPU_XSCALE_ARCH_V1:
|
||||
ret = xscale1pmu_init(pmu);
|
||||
break;
|
||||
case ARM_CPU_XSCALE_ARCH_V2:
|
||||
ret = xscale2pmu_init(pmu);
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
put_cpu();
|
||||
|
@ -65,13 +65,11 @@ enum armv6_counters {
|
||||
* accesses/misses in hardware.
|
||||
*/
|
||||
static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
|
||||
PERF_MAP_ALL_UNSUPPORTED,
|
||||
[PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES,
|
||||
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC,
|
||||
[PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
|
||||
[PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
|
||||
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC,
|
||||
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT,
|
||||
[PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
|
||||
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV6_PERFCTR_IBUF_STALL,
|
||||
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV6_PERFCTR_LSU_FULL_STALL,
|
||||
};
|
||||
@ -79,116 +77,31 @@ static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
|
||||
static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
|
||||
[C(L1D)] = {
|
||||
/*
|
||||
* The performance counters don't differentiate between read
|
||||
* and write accesses/misses so this isn't strictly correct,
|
||||
* but it's the best we can do. Writes and reads get
|
||||
* combined.
|
||||
*/
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
|
||||
[C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
|
||||
[C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(L1I)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(LL)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(DTLB)] = {
|
||||
/*
|
||||
* The ARM performance counters can count micro DTLB misses,
|
||||
* micro ITLB misses and main TLB misses. There isn't an event
|
||||
* for TLB misses, so use the micro misses here and if users
|
||||
* want the main TLB misses they can use a raw counter.
|
||||
*/
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(ITLB)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(BPU)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(NODE)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
PERF_CACHE_MAP_ALL_UNSUPPORTED,
|
||||
|
||||
/*
|
||||
* The performance counters don't differentiate between read and write
|
||||
* accesses/misses so this isn't strictly correct, but it's the best we
|
||||
* can do. Writes and reads get combined.
|
||||
*/
|
||||
[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
|
||||
[C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
|
||||
[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
|
||||
[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
|
||||
|
||||
[C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
|
||||
|
||||
/*
|
||||
* The ARM performance counters can count micro DTLB misses, micro ITLB
|
||||
* misses and main TLB misses. There isn't an event for TLB misses, so
|
||||
* use the micro misses here and if users want the main TLB misses they
|
||||
* can use a raw counter.
|
||||
*/
|
||||
[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
|
||||
[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
|
||||
|
||||
[C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
|
||||
[C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
|
||||
};
|
||||
|
||||
enum armv6mpcore_perf_types {
|
||||
@ -220,13 +133,11 @@ enum armv6mpcore_perf_types {
|
||||
* accesses/misses in hardware.
|
||||
*/
|
||||
static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
|
||||
PERF_MAP_ALL_UNSUPPORTED,
|
||||
[PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES,
|
||||
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC,
|
||||
[PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
|
||||
[PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
|
||||
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC,
|
||||
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT,
|
||||
[PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
|
||||
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV6MPCORE_PERFCTR_IBUF_STALL,
|
||||
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV6MPCORE_PERFCTR_LSU_FULL_STALL,
|
||||
};
|
||||
@ -234,114 +145,26 @@ static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
|
||||
static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
|
||||
[C(L1D)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] =
|
||||
ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
|
||||
[C(RESULT_MISS)] =
|
||||
ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] =
|
||||
ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
|
||||
[C(RESULT_MISS)] =
|
||||
ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(L1I)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(LL)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(DTLB)] = {
|
||||
/*
|
||||
* The ARM performance counters can count micro DTLB misses,
|
||||
* micro ITLB misses and main TLB misses. There isn't an event
|
||||
* for TLB misses, so use the micro misses here and if users
|
||||
* want the main TLB misses they can use a raw counter.
|
||||
*/
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(ITLB)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(BPU)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(NODE)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
PERF_CACHE_MAP_ALL_UNSUPPORTED,
|
||||
|
||||
[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
|
||||
[C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
|
||||
[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
|
||||
[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
|
||||
|
||||
[C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
|
||||
|
||||
/*
|
||||
* The ARM performance counters can count micro DTLB misses, micro ITLB
|
||||
* misses and main TLB misses. There isn't an event for TLB misses, so
|
||||
* use the micro misses here and if users want the main TLB misses they
|
||||
* can use a raw counter.
|
||||
*/
|
||||
[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
|
||||
[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
|
||||
|
||||
[C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
|
||||
[C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
|
||||
};
|
||||
|
||||
static inline unsigned long
|
||||
@ -653,9 +476,8 @@ static int armv6_map_event(struct perf_event *event)
|
||||
&armv6_perf_cache_map, 0xFF);
|
||||
}
|
||||
|
||||
static int armv6pmu_init(struct arm_pmu *cpu_pmu)
|
||||
static void armv6pmu_init(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
cpu_pmu->name = "v6";
|
||||
cpu_pmu->handle_irq = armv6pmu_handle_irq;
|
||||
cpu_pmu->enable = armv6pmu_enable_event;
|
||||
cpu_pmu->disable = armv6pmu_disable_event;
|
||||
@ -667,7 +489,26 @@ static int armv6pmu_init(struct arm_pmu *cpu_pmu)
|
||||
cpu_pmu->map_event = armv6_map_event;
|
||||
cpu_pmu->num_events = 3;
|
||||
cpu_pmu->max_period = (1LLU << 32) - 1;
|
||||
}
|
||||
|
||||
static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
armv6pmu_init(cpu_pmu);
|
||||
cpu_pmu->name = "armv6_1136";
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int armv6_1156_pmu_init(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
armv6pmu_init(cpu_pmu);
|
||||
cpu_pmu->name = "armv6_1156";
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int armv6_1176_pmu_init(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
armv6pmu_init(cpu_pmu);
|
||||
cpu_pmu->name = "armv6_1176";
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -687,7 +528,7 @@ static int armv6mpcore_map_event(struct perf_event *event)
|
||||
|
||||
static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
cpu_pmu->name = "v6mpcore";
|
||||
cpu_pmu->name = "armv6_11mpcore";
|
||||
cpu_pmu->handle_irq = armv6pmu_handle_irq;
|
||||
cpu_pmu->enable = armv6pmu_enable_event;
|
||||
cpu_pmu->disable = armv6mpcore_pmu_disable_event;
|
||||
@ -703,7 +544,17 @@ static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu)
|
||||
return 0;
|
||||
}
|
||||
#else
|
||||
static int armv6pmu_init(struct arm_pmu *cpu_pmu)
|
||||
static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
static int armv6_1156_pmu_init(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
static int armv6_1176_pmu_init(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
return -ENODEV;
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -48,118 +48,31 @@ enum xscale_counters {
|
||||
};
|
||||
|
||||
static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = {
|
||||
PERF_MAP_ALL_UNSUPPORTED,
|
||||
[PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT,
|
||||
[PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION,
|
||||
[PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
|
||||
[PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
|
||||
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH,
|
||||
[PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS,
|
||||
[PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
|
||||
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = XSCALE_PERFCTR_ICACHE_NO_DELIVER,
|
||||
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED,
|
||||
};
|
||||
|
||||
static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
|
||||
[PERF_COUNT_HW_CACHE_OP_MAX]
|
||||
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
|
||||
[C(L1D)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
|
||||
[C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
|
||||
[C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(L1I)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(LL)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(DTLB)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(ITLB)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(BPU)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
[C(NODE)] = {
|
||||
[C(OP_READ)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_WRITE)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
[C(OP_PREFETCH)] = {
|
||||
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
|
||||
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
|
||||
},
|
||||
},
|
||||
PERF_CACHE_MAP_ALL_UNSUPPORTED,
|
||||
|
||||
[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
|
||||
[C(L1D)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
|
||||
[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
|
||||
[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
|
||||
|
||||
[C(L1I)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
|
||||
|
||||
[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
|
||||
[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
|
||||
|
||||
[C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
|
||||
[C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
|
||||
};
|
||||
|
||||
#define XSCALE_PMU_ENABLE 0x001
|
||||
@ -442,7 +355,7 @@ static int xscale_map_event(struct perf_event *event)
|
||||
|
||||
static int xscale1pmu_init(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
cpu_pmu->name = "xscale1";
|
||||
cpu_pmu->name = "armv5_xscale1";
|
||||
cpu_pmu->handle_irq = xscale1pmu_handle_irq;
|
||||
cpu_pmu->enable = xscale1pmu_enable_event;
|
||||
cpu_pmu->disable = xscale1pmu_disable_event;
|
||||
@ -812,7 +725,7 @@ static inline void xscale2pmu_write_counter(struct perf_event *event, u32 val)
|
||||
|
||||
static int xscale2pmu_init(struct arm_pmu *cpu_pmu)
|
||||
{
|
||||
cpu_pmu->name = "xscale2";
|
||||
cpu_pmu->name = "armv5_xscale2";
|
||||
cpu_pmu->handle_irq = xscale2pmu_handle_irq;
|
||||
cpu_pmu->enable = xscale2pmu_enable_event;
|
||||
cpu_pmu->disable = xscale2pmu_disable_event;
|
||||
|
@ -3,6 +3,7 @@
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
#include <asm/kexec.h>
|
||||
|
||||
.align 3 /* not needed for this code, but keeps fncpy() happy */
|
||||
@ -59,7 +60,7 @@ ENTRY(relocate_new_kernel)
|
||||
mov r0,#0
|
||||
ldr r1,kexec_mach_type
|
||||
ldr r2,kexec_boot_atags
|
||||
ARM( mov pc, lr )
|
||||
ARM( ret lr )
|
||||
THUMB( bx lr )
|
||||
|
||||
.align
|
||||
|
@ -393,19 +393,34 @@ static void __init cpuid_init_hwcaps(void)
|
||||
elf_hwcap |= HWCAP_LPAE;
|
||||
}
|
||||
|
||||
static void __init feat_v6_fixup(void)
|
||||
static void __init elf_hwcap_fixup(void)
|
||||
{
|
||||
int id = read_cpuid_id();
|
||||
|
||||
if ((id & 0xff0f0000) != 0x41070000)
|
||||
return;
|
||||
unsigned id = read_cpuid_id();
|
||||
unsigned sync_prim;
|
||||
|
||||
/*
|
||||
* HWCAP_TLS is available only on 1136 r1p0 and later,
|
||||
* see also kuser_get_tls_init.
|
||||
*/
|
||||
if ((((id >> 4) & 0xfff) == 0xb36) && (((id >> 20) & 3) == 0))
|
||||
if (read_cpuid_part() == ARM_CPU_PART_ARM1136 &&
|
||||
((id >> 20) & 3) == 0) {
|
||||
elf_hwcap &= ~HWCAP_TLS;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Verify if CPUID scheme is implemented */
|
||||
if ((id & 0x000f0000) != 0x000f0000)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If the CPU supports LDREX/STREX and LDREXB/STREXB,
|
||||
* avoid advertising SWP; it may not be atomic with
|
||||
* multiprocessing cores.
|
||||
*/
|
||||
sync_prim = ((read_cpuid_ext(CPUID_EXT_ISAR3) >> 8) & 0xf0) |
|
||||
((read_cpuid_ext(CPUID_EXT_ISAR4) >> 20) & 0x0f);
|
||||
if (sync_prim >= 0x13)
|
||||
elf_hwcap &= ~HWCAP_SWP;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -609,7 +624,7 @@ static void __init setup_processor(void)
|
||||
#endif
|
||||
erratum_a15_798181_init();
|
||||
|
||||
feat_v6_fixup();
|
||||
elf_hwcap_fixup();
|
||||
|
||||
cacheid_init();
|
||||
cpu_init();
|
||||
|
@ -107,7 +107,7 @@ ENTRY(cpu_resume_mmu)
|
||||
instr_sync
|
||||
mov r0, r0
|
||||
mov r0, r0
|
||||
mov pc, r3 @ jump to virtual address
|
||||
ret r3 @ jump to virtual address
|
||||
ENDPROC(cpu_resume_mmu)
|
||||
.popsection
|
||||
cpu_resume_after_mmu:
|
||||
|
@ -17,6 +17,8 @@
|
||||
#include <asm/cputype.h>
|
||||
|
||||
#define SCU_CTRL 0x00
|
||||
#define SCU_ENABLE (1 << 0)
|
||||
#define SCU_STANDBY_ENABLE (1 << 5)
|
||||
#define SCU_CONFIG 0x04
|
||||
#define SCU_CPU_STATUS 0x08
|
||||
#define SCU_INVALIDATE 0x0c
|
||||
@ -50,10 +52,16 @@ void scu_enable(void __iomem *scu_base)
|
||||
|
||||
scu_ctrl = readl_relaxed(scu_base + SCU_CTRL);
|
||||
/* already enabled? */
|
||||
if (scu_ctrl & 1)
|
||||
if (scu_ctrl & SCU_ENABLE)
|
||||
return;
|
||||
|
||||
scu_ctrl |= 1;
|
||||
scu_ctrl |= SCU_ENABLE;
|
||||
|
||||
/* Cortex-A9 earlier than r2p0 has no standby bit in SCU */
|
||||
if ((read_cpuid_id() & 0xff0ffff0) == 0x410fc090 &&
|
||||
(read_cpuid_id() & 0x00f0000f) >= 0x00200000)
|
||||
scu_ctrl |= SCU_STANDBY_ENABLE;
|
||||
|
||||
writel_relaxed(scu_ctrl, scu_base + SCU_CTRL);
|
||||
|
||||
/*
|
||||
|
@ -92,15 +92,19 @@ void erratum_a15_798181_init(void)
|
||||
unsigned int midr = read_cpuid_id();
|
||||
unsigned int revidr = read_cpuid(CPUID_REVIDR);
|
||||
|
||||
/* Cortex-A15 r0p0..r3p2 w/o ECO fix affected */
|
||||
if ((midr & 0xff0ffff0) != 0x410fc0f0 || midr > 0x413fc0f2 ||
|
||||
(revidr & 0x210) == 0x210) {
|
||||
return;
|
||||
}
|
||||
if (revidr & 0x10)
|
||||
erratum_a15_798181_handler = erratum_a15_798181_partial;
|
||||
else
|
||||
/* Brahma-B15 r0p0..r0p2 affected
|
||||
* Cortex-A15 r0p0..r3p2 w/o ECO fix affected */
|
||||
if ((midr & 0xff0ffff0) == 0x420f00f0 && midr <= 0x420f00f2)
|
||||
erratum_a15_798181_handler = erratum_a15_798181_broadcast;
|
||||
else if ((midr & 0xff0ffff0) == 0x410fc0f0 && midr <= 0x413fc0f2 &&
|
||||
(revidr & 0x210) != 0x210) {
|
||||
if (revidr & 0x10)
|
||||
erratum_a15_798181_handler =
|
||||
erratum_a15_798181_partial;
|
||||
else
|
||||
erratum_a15_798181_handler =
|
||||
erratum_a15_798181_broadcast;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include <linux/perf_event.h>
|
||||
|
||||
#include <asm/opcodes.h>
|
||||
#include <asm/system_info.h>
|
||||
#include <asm/traps.h>
|
||||
#include <asm/uaccess.h>
|
||||
|
||||
@ -266,6 +267,9 @@ static struct undef_hook swp_hook = {
|
||||
*/
|
||||
static int __init swp_emulation_init(void)
|
||||
{
|
||||
if (cpu_architecture() < CPU_ARCH_ARMv7)
|
||||
return 0;
|
||||
|
||||
#ifdef CONFIG_PROC_FS
|
||||
if (!proc_create("cpu/swp_emulation", S_IRUGO, NULL, &proc_status_fops))
|
||||
return -ENOMEM;
|
||||
|
@ -50,10 +50,7 @@ unsigned long profile_pc(struct pt_regs *regs)
|
||||
if (!in_lock_functions(regs->ARM_pc))
|
||||
return regs->ARM_pc;
|
||||
|
||||
frame.fp = regs->ARM_fp;
|
||||
frame.sp = regs->ARM_sp;
|
||||
frame.lr = regs->ARM_lr;
|
||||
frame.pc = regs->ARM_pc;
|
||||
arm_get_current_stackframe(regs, &frame);
|
||||
do {
|
||||
int ret = unwind_frame(&frame);
|
||||
if (ret < 0)
|
||||
|
@ -31,11 +31,13 @@
|
||||
#include <asm/exception.h>
|
||||
#include <asm/unistd.h>
|
||||
#include <asm/traps.h>
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/unwind.h>
|
||||
#include <asm/tls.h>
|
||||
#include <asm/system_misc.h>
|
||||
#include <asm/opcodes.h>
|
||||
|
||||
|
||||
static const char *handler[]= {
|
||||
"prefetch abort",
|
||||
"data abort",
|
||||
@ -184,7 +186,7 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
|
||||
tsk = current;
|
||||
|
||||
if (regs) {
|
||||
fp = regs->ARM_fp;
|
||||
fp = frame_pointer(regs);
|
||||
mode = processor_mode(regs);
|
||||
} else if (tsk != current) {
|
||||
fp = thread_saved_fp(tsk);
|
||||
@ -719,7 +721,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
|
||||
dump_instr("", regs);
|
||||
if (user_mode(regs)) {
|
||||
__show_regs(regs);
|
||||
c_backtrace(regs->ARM_fp, processor_mode(regs));
|
||||
c_backtrace(frame_pointer(regs), processor_mode(regs));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
@ -479,12 +479,10 @@ void unwind_backtrace(struct pt_regs *regs, struct task_struct *tsk)
|
||||
tsk = current;
|
||||
|
||||
if (regs) {
|
||||
frame.fp = regs->ARM_fp;
|
||||
frame.sp = regs->ARM_sp;
|
||||
frame.lr = regs->ARM_lr;
|
||||
arm_get_current_stackframe(regs, &frame);
|
||||
/* PC might be corrupted, use LR in that case. */
|
||||
frame.pc = kernel_text_address(regs->ARM_pc)
|
||||
? regs->ARM_pc : regs->ARM_lr;
|
||||
if (!kernel_text_address(regs->ARM_pc))
|
||||
frame.pc = regs->ARM_lr;
|
||||
} else if (tsk == current) {
|
||||
frame.fp = (unsigned long)__builtin_frame_address(0);
|
||||
frame.sp = current_sp;
|
||||
|
@ -318,7 +318,6 @@ SECTIONS
|
||||
_end = .;
|
||||
|
||||
STABS_DEBUG
|
||||
.comment 0 : { *(.comment) }
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -274,13 +274,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
|
||||
|
||||
int __attribute_const__ kvm_target_cpu(void)
|
||||
{
|
||||
unsigned long implementor = read_cpuid_implementor();
|
||||
unsigned long part_number = read_cpuid_part_number();
|
||||
|
||||
if (implementor != ARM_CPU_IMP_ARM)
|
||||
return -EINVAL;
|
||||
|
||||
switch (part_number) {
|
||||
switch (read_cpuid_part()) {
|
||||
case ARM_CPU_PART_CORTEX_A7:
|
||||
return KVM_ARM_TARGET_CORTEX_A7;
|
||||
case ARM_CPU_PART_CORTEX_A15:
|
||||
|
@ -17,6 +17,7 @@
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
#include <asm/unified.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/kvm_asm.h>
|
||||
@ -134,7 +135,7 @@ phase2:
|
||||
ldr r0, =TRAMPOLINE_VA
|
||||
adr r1, target
|
||||
bfi r0, r1, #0, #PAGE_SHIFT
|
||||
mov pc, r0
|
||||
ret r0
|
||||
|
||||
target: @ We're now in the trampoline code, switch page tables
|
||||
mcrr p15, 4, r2, r3, c2
|
||||
|
@ -27,6 +27,7 @@ Boston, MA 02110-1301, USA. */
|
||||
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
#ifdef __ARMEB__
|
||||
#define al r1
|
||||
@ -47,7 +48,7 @@ ENTRY(__aeabi_llsl)
|
||||
THUMB( lsrmi r3, al, ip )
|
||||
THUMB( orrmi ah, ah, r3 )
|
||||
mov al, al, lsl r2
|
||||
mov pc, lr
|
||||
ret lr
|
||||
|
||||
ENDPROC(__ashldi3)
|
||||
ENDPROC(__aeabi_llsl)
|
||||
|
@ -27,6 +27,7 @@ Boston, MA 02110-1301, USA. */
|
||||
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
#ifdef __ARMEB__
|
||||
#define al r1
|
||||
@ -47,7 +48,7 @@ ENTRY(__aeabi_lasr)
|
||||
THUMB( lslmi r3, ah, ip )
|
||||
THUMB( orrmi al, al, r3 )
|
||||
mov ah, ah, asr r2
|
||||
mov pc, lr
|
||||
ret lr
|
||||
|
||||
ENDPROC(__ashrdi3)
|
||||
ENDPROC(__aeabi_lasr)
|
||||
|
@ -25,7 +25,7 @@
|
||||
ENTRY(c_backtrace)
|
||||
|
||||
#if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK)
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(c_backtrace)
|
||||
#else
|
||||
stmfd sp!, {r4 - r8, lr} @ Save an extra register so we have a location...
|
||||
|
@ -1,3 +1,4 @@
|
||||
#include <asm/assembler.h>
|
||||
#include <asm/unwind.h>
|
||||
|
||||
#if __LINUX_ARM_ARCH__ >= 6
|
||||
@ -70,7 +71,7 @@ UNWIND( .fnstart )
|
||||
\instr r2, r2, r3
|
||||
str r2, [r1, r0, lsl #2]
|
||||
restore_irqs ip
|
||||
mov pc, lr
|
||||
ret lr
|
||||
UNWIND( .fnend )
|
||||
ENDPROC(\name )
|
||||
.endm
|
||||
@ -98,7 +99,7 @@ UNWIND( .fnstart )
|
||||
\store r2, [r1]
|
||||
moveq r0, #0
|
||||
restore_irqs ip
|
||||
mov pc, lr
|
||||
ret lr
|
||||
UNWIND( .fnend )
|
||||
ENDPROC(\name )
|
||||
.endm
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
#if __LINUX_ARM_ARCH__ >= 6
|
||||
ENTRY(__bswapsi2)
|
||||
@ -18,7 +19,7 @@ ENTRY(__bswapsi2)
|
||||
mov r3, r3, lsr #8
|
||||
bic r3, r3, #0xff00
|
||||
eor r0, r3, r0, ror #8
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(__bswapsi2)
|
||||
|
||||
ENTRY(__bswapdi2)
|
||||
@ -31,6 +32,6 @@ ENTRY(__bswapdi2)
|
||||
bic r1, r1, #0xff00
|
||||
eor r1, r1, r0, ror #8
|
||||
eor r0, r3, ip, ror #8
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(__bswapdi2)
|
||||
#endif
|
||||
|
@ -36,9 +36,9 @@ ENTRY(call_with_stack)
|
||||
mov r0, r1
|
||||
|
||||
adr lr, BSYM(1f)
|
||||
mov pc, r2
|
||||
ret r2
|
||||
|
||||
1: ldr lr, [sp]
|
||||
ldr sp, [sp, #4]
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(call_with_stack)
|
||||
|
@ -97,7 +97,7 @@ td3 .req lr
|
||||
#endif
|
||||
#endif
|
||||
adcnes sum, sum, td0 @ update checksum
|
||||
mov pc, lr
|
||||
ret lr
|
||||
|
||||
ENTRY(csum_partial)
|
||||
stmfd sp!, {buf, lr}
|
||||
|
@ -7,6 +7,7 @@
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
#include <asm/assembler.h>
|
||||
|
||||
/*
|
||||
* unsigned int
|
||||
@ -40,7 +41,7 @@ sum .req r3
|
||||
adcs sum, sum, ip, put_byte_1 @ update checksum
|
||||
strb ip, [dst], #1
|
||||
tst dst, #2
|
||||
moveq pc, lr @ dst is now 32bit aligned
|
||||
reteq lr @ dst is now 32bit aligned
|
||||
|
||||
.Ldst_16bit: load2b r8, ip
|
||||
sub len, len, #2
|
||||
@ -48,7 +49,7 @@ sum .req r3
|
||||
strb r8, [dst], #1
|
||||
adcs sum, sum, ip, put_byte_1
|
||||
strb ip, [dst], #1
|
||||
mov pc, lr @ dst is now 32bit aligned
|
||||
ret lr @ dst is now 32bit aligned
|
||||
|
||||
/*
|
||||
* Handle 0 to 7 bytes, with any alignment of source and
|
||||
|
@ -35,7 +35,7 @@ ENTRY(__loop_const_udelay) @ 0 <= r0 <= 0x7fffff06
|
||||
mul r0, r2, r0 @ max = 2^32-1
|
||||
add r0, r0, r1, lsr #32-6
|
||||
movs r0, r0, lsr #6
|
||||
moveq pc, lr
|
||||
reteq lr
|
||||
|
||||
/*
|
||||
* loops = r0 * HZ * loops_per_jiffy / 1000000
|
||||
@ -46,23 +46,23 @@ ENTRY(__loop_const_udelay) @ 0 <= r0 <= 0x7fffff06
|
||||
ENTRY(__loop_delay)
|
||||
subs r0, r0, #1
|
||||
#if 0
|
||||
movls pc, lr
|
||||
retls lr
|
||||
subs r0, r0, #1
|
||||
movls pc, lr
|
||||
retls lr
|
||||
subs r0, r0, #1
|
||||
movls pc, lr
|
||||
retls lr
|
||||
subs r0, r0, #1
|
||||
movls pc, lr
|
||||
retls lr
|
||||
subs r0, r0, #1
|
||||
movls pc, lr
|
||||
retls lr
|
||||
subs r0, r0, #1
|
||||
movls pc, lr
|
||||
retls lr
|
||||
subs r0, r0, #1
|
||||
movls pc, lr
|
||||
retls lr
|
||||
subs r0, r0, #1
|
||||
#endif
|
||||
bhi __loop_delay
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(__loop_udelay)
|
||||
ENDPROC(__loop_const_udelay)
|
||||
ENDPROC(__loop_delay)
|
||||
|
@ -13,6 +13,7 @@
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
#include <asm/unwind.h>
|
||||
|
||||
#ifdef __ARMEB__
|
||||
@ -97,7 +98,7 @@ UNWIND(.fnstart)
|
||||
mov yl, #0
|
||||
cmpeq xl, r4
|
||||
movlo xh, xl
|
||||
movlo pc, lr
|
||||
retlo lr
|
||||
|
||||
@ The division loop for lower bit positions.
|
||||
@ Here we shift remainer bits leftwards rather than moving the
|
||||
@ -111,14 +112,14 @@ UNWIND(.fnstart)
|
||||
subcs xh, xh, r4
|
||||
movs ip, ip, lsr #1
|
||||
bne 4b
|
||||
mov pc, lr
|
||||
ret lr
|
||||
|
||||
@ The top part of remainder became zero. If carry is set
|
||||
@ (the 33th bit) this is a false positive so resume the loop.
|
||||
@ Otherwise, if lower part is also null then we are done.
|
||||
6: bcs 5b
|
||||
cmp xl, #0
|
||||
moveq pc, lr
|
||||
reteq lr
|
||||
|
||||
@ We still have remainer bits in the low part. Bring them up.
|
||||
|
||||
@ -144,7 +145,7 @@ UNWIND(.fnstart)
|
||||
movs ip, ip, lsr #1
|
||||
mov xh, #1
|
||||
bne 4b
|
||||
mov pc, lr
|
||||
ret lr
|
||||
|
||||
8: @ Division by a power of 2: determine what that divisor order is
|
||||
@ then simply shift values around
|
||||
@ -184,13 +185,13 @@ UNWIND(.fnstart)
|
||||
THUMB( orr yl, yl, xh )
|
||||
mov xh, xl, lsl ip
|
||||
mov xh, xh, lsr ip
|
||||
mov pc, lr
|
||||
ret lr
|
||||
|
||||
@ eq -> division by 1: obvious enough...
|
||||
9: moveq yl, xl
|
||||
moveq yh, xh
|
||||
moveq xh, #0
|
||||
moveq pc, lr
|
||||
reteq lr
|
||||
UNWIND(.fnend)
|
||||
|
||||
UNWIND(.fnstart)
|
||||
|
@ -35,7 +35,7 @@ ENTRY(_find_first_zero_bit_le)
|
||||
2: cmp r2, r1 @ any more?
|
||||
blo 1b
|
||||
3: mov r0, r1 @ no free bits
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(_find_first_zero_bit_le)
|
||||
|
||||
/*
|
||||
@ -76,7 +76,7 @@ ENTRY(_find_first_bit_le)
|
||||
2: cmp r2, r1 @ any more?
|
||||
blo 1b
|
||||
3: mov r0, r1 @ no free bits
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(_find_first_bit_le)
|
||||
|
||||
/*
|
||||
@ -114,7 +114,7 @@ ENTRY(_find_first_zero_bit_be)
|
||||
2: cmp r2, r1 @ any more?
|
||||
blo 1b
|
||||
3: mov r0, r1 @ no free bits
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(_find_first_zero_bit_be)
|
||||
|
||||
ENTRY(_find_next_zero_bit_be)
|
||||
@ -148,7 +148,7 @@ ENTRY(_find_first_bit_be)
|
||||
2: cmp r2, r1 @ any more?
|
||||
blo 1b
|
||||
3: mov r0, r1 @ no free bits
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(_find_first_bit_be)
|
||||
|
||||
ENTRY(_find_next_bit_be)
|
||||
@ -192,5 +192,5 @@ ENDPROC(_find_next_bit_be)
|
||||
#endif
|
||||
cmp r1, r0 @ Clamp to maxbit
|
||||
movlo r0, r1
|
||||
mov pc, lr
|
||||
ret lr
|
||||
|
||||
|
@ -18,7 +18,7 @@
|
||||
* Inputs: r0 contains the address
|
||||
* r1 contains the address limit, which must be preserved
|
||||
* Outputs: r0 is the error code
|
||||
* r2 contains the zero-extended value
|
||||
* r2, r3 contains the zero-extended value
|
||||
* lr corrupted
|
||||
*
|
||||
* No other registers must be altered. (see <asm/uaccess.h>
|
||||
@ -36,7 +36,7 @@ ENTRY(__get_user_1)
|
||||
check_uaccess r0, 1, r1, r2, __get_user_bad
|
||||
1: TUSER(ldrb) r2, [r0]
|
||||
mov r0, #0
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(__get_user_1)
|
||||
|
||||
ENTRY(__get_user_2)
|
||||
@ -56,25 +56,60 @@ rb .req r0
|
||||
orr r2, rb, r2, lsl #8
|
||||
#endif
|
||||
mov r0, #0
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(__get_user_2)
|
||||
|
||||
ENTRY(__get_user_4)
|
||||
check_uaccess r0, 4, r1, r2, __get_user_bad
|
||||
4: TUSER(ldr) r2, [r0]
|
||||
mov r0, #0
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(__get_user_4)
|
||||
|
||||
ENTRY(__get_user_8)
|
||||
check_uaccess r0, 8, r1, r2, __get_user_bad
|
||||
#ifdef CONFIG_THUMB2_KERNEL
|
||||
5: TUSER(ldr) r2, [r0]
|
||||
6: TUSER(ldr) r3, [r0, #4]
|
||||
#else
|
||||
5: TUSER(ldr) r2, [r0], #4
|
||||
6: TUSER(ldr) r3, [r0]
|
||||
#endif
|
||||
mov r0, #0
|
||||
ret lr
|
||||
ENDPROC(__get_user_8)
|
||||
|
||||
#ifdef __ARMEB__
|
||||
ENTRY(__get_user_lo8)
|
||||
check_uaccess r0, 8, r1, r2, __get_user_bad
|
||||
#ifdef CONFIG_CPU_USE_DOMAINS
|
||||
add r0, r0, #4
|
||||
7: ldrt r2, [r0]
|
||||
#else
|
||||
7: ldr r2, [r0, #4]
|
||||
#endif
|
||||
mov r0, #0
|
||||
ret lr
|
||||
ENDPROC(__get_user_lo8)
|
||||
#endif
|
||||
|
||||
__get_user_bad8:
|
||||
mov r3, #0
|
||||
__get_user_bad:
|
||||
mov r2, #0
|
||||
mov r0, #-EFAULT
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(__get_user_bad)
|
||||
ENDPROC(__get_user_bad8)
|
||||
|
||||
.pushsection __ex_table, "a"
|
||||
.long 1b, __get_user_bad
|
||||
.long 2b, __get_user_bad
|
||||
.long 3b, __get_user_bad
|
||||
.long 4b, __get_user_bad
|
||||
.long 5b, __get_user_bad8
|
||||
.long 6b, __get_user_bad8
|
||||
#ifdef __ARMEB__
|
||||
.long 7b, __get_user_bad
|
||||
#endif
|
||||
.popsection
|
||||
|
@ -25,7 +25,7 @@
|
||||
|
||||
ENTRY(__raw_readsb)
|
||||
teq r2, #0 @ do we have to check for the zero len?
|
||||
moveq pc, lr
|
||||
reteq lr
|
||||
ands ip, r1, #3
|
||||
bne .Linsb_align
|
||||
|
||||
|
@ -12,7 +12,7 @@
|
||||
|
||||
ENTRY(__raw_readsl)
|
||||
teq r2, #0 @ do we have to check for the zero len?
|
||||
moveq pc, lr
|
||||
reteq lr
|
||||
ands ip, r1, #3
|
||||
bne 3f
|
||||
|
||||
@ -33,7 +33,7 @@ ENTRY(__raw_readsl)
|
||||
stmcsia r1!, {r3, ip}
|
||||
ldrne r3, [r0, #0]
|
||||
strne r3, [r1, #0]
|
||||
mov pc, lr
|
||||
ret lr
|
||||
|
||||
3: ldr r3, [r0]
|
||||
cmp ip, #2
|
||||
@ -75,5 +75,5 @@ ENTRY(__raw_readsl)
|
||||
strb r3, [r1, #1]
|
||||
8: mov r3, ip, get_byte_0
|
||||
strb r3, [r1, #0]
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(__raw_readsl)
|
||||
|
@ -27,11 +27,11 @@
|
||||
strb r3, [r1], #1
|
||||
|
||||
subs r2, r2, #1
|
||||
moveq pc, lr
|
||||
reteq lr
|
||||
|
||||
ENTRY(__raw_readsw)
|
||||
teq r2, #0 @ do we have to check for the zero len?
|
||||
moveq pc, lr
|
||||
reteq lr
|
||||
tst r1, #3
|
||||
bne .Linsw_align
|
||||
|
||||
|
@ -26,7 +26,7 @@
|
||||
|
||||
ENTRY(__raw_readsw)
|
||||
teq r2, #0
|
||||
moveq pc, lr
|
||||
reteq lr
|
||||
tst r1, #3
|
||||
bne .Linsw_align
|
||||
|
||||
|
@ -45,7 +45,7 @@
|
||||
|
||||
ENTRY(__raw_writesb)
|
||||
teq r2, #0 @ do we have to check for the zero len?
|
||||
moveq pc, lr
|
||||
reteq lr
|
||||
ands ip, r1, #3
|
||||
bne .Loutsb_align
|
||||
|
||||
|
@ -12,7 +12,7 @@
|
||||
|
||||
ENTRY(__raw_writesl)
|
||||
teq r2, #0 @ do we have to check for the zero len?
|
||||
moveq pc, lr
|
||||
reteq lr
|
||||
ands ip, r1, #3
|
||||
bne 3f
|
||||
|
||||
@ -33,7 +33,7 @@ ENTRY(__raw_writesl)
|
||||
ldrne r3, [r1, #0]
|
||||
strcs ip, [r0, #0]
|
||||
strne r3, [r0, #0]
|
||||
mov pc, lr
|
||||
ret lr
|
||||
|
||||
3: bic r1, r1, #3
|
||||
ldr r3, [r1], #4
|
||||
@ -47,7 +47,7 @@ ENTRY(__raw_writesl)
|
||||
orr ip, ip, r3, lspush #16
|
||||
str ip, [r0]
|
||||
bne 4b
|
||||
mov pc, lr
|
||||
ret lr
|
||||
|
||||
5: mov ip, r3, lspull #8
|
||||
ldr r3, [r1], #4
|
||||
@ -55,7 +55,7 @@ ENTRY(__raw_writesl)
|
||||
orr ip, ip, r3, lspush #24
|
||||
str ip, [r0]
|
||||
bne 5b
|
||||
mov pc, lr
|
||||
ret lr
|
||||
|
||||
6: mov ip, r3, lspull #24
|
||||
ldr r3, [r1], #4
|
||||
@ -63,5 +63,5 @@ ENTRY(__raw_writesl)
|
||||
orr ip, ip, r3, lspush #8
|
||||
str ip, [r0]
|
||||
bne 6b
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(__raw_writesl)
|
||||
|
@ -28,11 +28,11 @@
|
||||
orr r3, r3, r3, lsl #16
|
||||
str r3, [r0]
|
||||
subs r2, r2, #1
|
||||
moveq pc, lr
|
||||
reteq lr
|
||||
|
||||
ENTRY(__raw_writesw)
|
||||
teq r2, #0 @ do we have to check for the zero len?
|
||||
moveq pc, lr
|
||||
reteq lr
|
||||
tst r1, #3
|
||||
bne .Loutsw_align
|
||||
|
||||
|
@ -31,7 +31,7 @@
|
||||
|
||||
ENTRY(__raw_writesw)
|
||||
teq r2, #0
|
||||
moveq pc, lr
|
||||
reteq lr
|
||||
ands r3, r1, #3
|
||||
bne .Loutsw_align
|
||||
|
||||
@ -96,5 +96,5 @@ ENTRY(__raw_writesw)
|
||||
tst r2, #1
|
||||
3: movne ip, r3, lsr #8
|
||||
strneh ip, [r0]
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(__raw_writesw)
|
||||
|
@ -210,7 +210,7 @@ ENTRY(__aeabi_uidiv)
|
||||
UNWIND(.fnstart)
|
||||
|
||||
subs r2, r1, #1
|
||||
moveq pc, lr
|
||||
reteq lr
|
||||
bcc Ldiv0
|
||||
cmp r0, r1
|
||||
bls 11f
|
||||
@ -220,16 +220,16 @@ UNWIND(.fnstart)
|
||||
ARM_DIV_BODY r0, r1, r2, r3
|
||||
|
||||
mov r0, r2
|
||||
mov pc, lr
|
||||
ret lr
|
||||
|
||||
11: moveq r0, #1
|
||||
movne r0, #0
|
||||
mov pc, lr
|
||||
ret lr
|
||||
|
||||
12: ARM_DIV2_ORDER r1, r2
|
||||
|
||||
mov r0, r0, lsr r2
|
||||
mov pc, lr
|
||||
ret lr
|
||||
|
||||
UNWIND(.fnend)
|
||||
ENDPROC(__udivsi3)
|
||||
@ -244,11 +244,11 @@ UNWIND(.fnstart)
|
||||
moveq r0, #0
|
||||
tsthi r1, r2 @ see if divisor is power of 2
|
||||
andeq r0, r0, r2
|
||||
movls pc, lr
|
||||
retls lr
|
||||
|
||||
ARM_MOD_BODY r0, r1, r2, r3
|
||||
|
||||
mov pc, lr
|
||||
ret lr
|
||||
|
||||
UNWIND(.fnend)
|
||||
ENDPROC(__umodsi3)
|
||||
@ -274,23 +274,23 @@ UNWIND(.fnstart)
|
||||
|
||||
cmp ip, #0
|
||||
rsbmi r0, r0, #0
|
||||
mov pc, lr
|
||||
ret lr
|
||||
|
||||
10: teq ip, r0 @ same sign ?
|
||||
rsbmi r0, r0, #0
|
||||
mov pc, lr
|
||||
ret lr
|
||||
|
||||
11: movlo r0, #0
|
||||
moveq r0, ip, asr #31
|
||||
orreq r0, r0, #1
|
||||
mov pc, lr
|
||||
ret lr
|
||||
|
||||
12: ARM_DIV2_ORDER r1, r2
|
||||
|
||||
cmp ip, #0
|
||||
mov r0, r3, lsr r2
|
||||
rsbmi r0, r0, #0
|
||||
mov pc, lr
|
||||
ret lr
|
||||
|
||||
UNWIND(.fnend)
|
||||
ENDPROC(__divsi3)
|
||||
@ -315,7 +315,7 @@ UNWIND(.fnstart)
|
||||
|
||||
10: cmp ip, #0
|
||||
rsbmi r0, r0, #0
|
||||
mov pc, lr
|
||||
ret lr
|
||||
|
||||
UNWIND(.fnend)
|
||||
ENDPROC(__modsi3)
|
||||
@ -331,7 +331,7 @@ UNWIND(.save {r0, r1, ip, lr} )
|
||||
ldmfd sp!, {r1, r2, ip, lr}
|
||||
mul r3, r0, r2
|
||||
sub r1, r1, r3
|
||||
mov pc, lr
|
||||
ret lr
|
||||
|
||||
UNWIND(.fnend)
|
||||
ENDPROC(__aeabi_uidivmod)
|
||||
@ -344,7 +344,7 @@ UNWIND(.save {r0, r1, ip, lr} )
|
||||
ldmfd sp!, {r1, r2, ip, lr}
|
||||
mul r3, r0, r2
|
||||
sub r1, r1, r3
|
||||
mov pc, lr
|
||||
ret lr
|
||||
|
||||
UNWIND(.fnend)
|
||||
ENDPROC(__aeabi_idivmod)
|
||||
|
@ -27,6 +27,7 @@ Boston, MA 02110-1301, USA. */
|
||||
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
#ifdef __ARMEB__
|
||||
#define al r1
|
||||
@ -47,7 +48,7 @@ ENTRY(__aeabi_llsr)
|
||||
THUMB( lslmi r3, ah, ip )
|
||||
THUMB( orrmi al, al, r3 )
|
||||
mov ah, ah, lsr r2
|
||||
mov pc, lr
|
||||
ret lr
|
||||
|
||||
ENDPROC(__lshrdi3)
|
||||
ENDPROC(__aeabi_llsr)
|
||||
|
@ -22,5 +22,5 @@ ENTRY(memchr)
|
||||
bne 1b
|
||||
sub r0, r0, #1
|
||||
2: movne r0, #0
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(memchr)
|
||||
|
@ -110,7 +110,7 @@ ENTRY(memset)
|
||||
strneb r1, [ip], #1
|
||||
tst r2, #1
|
||||
strneb r1, [ip], #1
|
||||
mov pc, lr
|
||||
ret lr
|
||||
|
||||
6: subs r2, r2, #4 @ 1 do we have enough
|
||||
blt 5b @ 1 bytes to align with?
|
||||
|
@ -121,5 +121,5 @@ ENTRY(__memzero)
|
||||
strneb r2, [r0], #1 @ 1
|
||||
tst r1, #1 @ 1 a byte left over
|
||||
strneb r2, [r0], #1 @ 1
|
||||
mov pc, lr @ 1
|
||||
ret lr @ 1
|
||||
ENDPROC(__memzero)
|
||||
|
@ -11,6 +11,7 @@
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
#ifdef __ARMEB__
|
||||
#define xh r0
|
||||
@ -41,7 +42,7 @@ ENTRY(__aeabi_lmul)
|
||||
adc xh, xh, yh, lsr #16
|
||||
adds xl, xl, ip, lsl #16
|
||||
adc xh, xh, ip, lsr #16
|
||||
mov pc, lr
|
||||
ret lr
|
||||
|
||||
ENDPROC(__muldi3)
|
||||
ENDPROC(__aeabi_lmul)
|
||||
|
@ -36,7 +36,7 @@ ENTRY(__put_user_1)
|
||||
check_uaccess r0, 1, r1, ip, __put_user_bad
|
||||
1: TUSER(strb) r2, [r0]
|
||||
mov r0, #0
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(__put_user_1)
|
||||
|
||||
ENTRY(__put_user_2)
|
||||
@ -60,14 +60,14 @@ ENTRY(__put_user_2)
|
||||
#endif
|
||||
#endif /* CONFIG_THUMB2_KERNEL */
|
||||
mov r0, #0
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(__put_user_2)
|
||||
|
||||
ENTRY(__put_user_4)
|
||||
check_uaccess r0, 4, r1, ip, __put_user_bad
|
||||
4: TUSER(str) r2, [r0]
|
||||
mov r0, #0
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(__put_user_4)
|
||||
|
||||
ENTRY(__put_user_8)
|
||||
@ -80,12 +80,12 @@ ENTRY(__put_user_8)
|
||||
6: TUSER(str) r3, [r0]
|
||||
#endif
|
||||
mov r0, #0
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(__put_user_8)
|
||||
|
||||
__put_user_bad:
|
||||
mov r0, #-EFAULT
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(__put_user_bad)
|
||||
|
||||
.pushsection __ex_table, "a"
|
||||
|
@ -23,5 +23,5 @@ ENTRY(strchr)
|
||||
teq r2, r1
|
||||
movne r0, #0
|
||||
subeq r0, r0, #1
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(strchr)
|
||||
|
@ -22,5 +22,5 @@ ENTRY(strrchr)
|
||||
teq r2, #0
|
||||
bne 1b
|
||||
mov r0, r3
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(strrchr)
|
||||
|
@ -11,6 +11,7 @@
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
#ifdef __ARMEB__
|
||||
#define xh r0
|
||||
@ -31,7 +32,7 @@ ENTRY(__ucmpdi2)
|
||||
movlo r0, #0
|
||||
moveq r0, #1
|
||||
movhi r0, #2
|
||||
mov pc, lr
|
||||
ret lr
|
||||
|
||||
ENDPROC(__ucmpdi2)
|
||||
|
||||
@ -44,7 +45,7 @@ ENTRY(__aeabi_ulcmp)
|
||||
movlo r0, #-1
|
||||
moveq r0, #0
|
||||
movhi r0, #1
|
||||
mov pc, lr
|
||||
ret lr
|
||||
|
||||
ENDPROC(__aeabi_ulcmp)
|
||||
|
||||
|
@ -213,7 +213,7 @@ ddr2clk_stop_done:
|
||||
cmp ip, r0
|
||||
bne ddr2clk_stop_done
|
||||
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(davinci_ddr_psc_config)
|
||||
|
||||
CACHE_FLUSH:
|
||||
|
@ -16,11 +16,6 @@
|
||||
#ifndef __ASM_ARCH_MEMORY_H
|
||||
#define __ASM_ARCH_MEMORY_H
|
||||
|
||||
/*
|
||||
* Physical DRAM offset.
|
||||
*/
|
||||
#define PLAT_PHYS_OFFSET UL(0x00000000)
|
||||
|
||||
/*
|
||||
* Cache flushing area - SRAM
|
||||
*/
|
||||
|
@ -198,7 +198,7 @@ crunch_load:
|
||||
get_thread_info r10
|
||||
#endif
|
||||
2: dec_preempt_count r10, r3
|
||||
mov pc, lr
|
||||
ret lr
|
||||
|
||||
/*
|
||||
* Back up crunch regs to save area and disable access to them
|
||||
@ -277,7 +277,7 @@ ENTRY(crunch_task_copy)
|
||||
mov r3, lr @ preserve return address
|
||||
bl crunch_save
|
||||
msr cpsr_c, ip @ restore interrupt mode
|
||||
mov pc, r3
|
||||
ret r3
|
||||
|
||||
/*
|
||||
* Restore crunch state from given memory address
|
||||
@ -310,4 +310,4 @@ ENTRY(crunch_task_restore)
|
||||
mov r3, lr @ preserve return address
|
||||
bl crunch_load
|
||||
msr cpsr_c, ip @ restore interrupt mode
|
||||
mov pc, r3
|
||||
ret r3
|
||||
|
@ -1,22 +0,0 @@
|
||||
/*
|
||||
* arch/arm/mach-ep93xx/include/mach/memory.h
|
||||
*/
|
||||
|
||||
#ifndef __ASM_ARCH_MEMORY_H
|
||||
#define __ASM_ARCH_MEMORY_H
|
||||
|
||||
#if defined(CONFIG_EP93XX_SDCE3_SYNC_PHYS_OFFSET)
|
||||
#define PLAT_PHYS_OFFSET UL(0x00000000)
|
||||
#elif defined(CONFIG_EP93XX_SDCE0_PHYS_OFFSET)
|
||||
#define PLAT_PHYS_OFFSET UL(0xc0000000)
|
||||
#elif defined(CONFIG_EP93XX_SDCE1_PHYS_OFFSET)
|
||||
#define PLAT_PHYS_OFFSET UL(0xd0000000)
|
||||
#elif defined(CONFIG_EP93XX_SDCE2_PHYS_OFFSET)
|
||||
#define PLAT_PHYS_OFFSET UL(0xe0000000)
|
||||
#elif defined(CONFIG_EP93XX_SDCE3_ASYNC_PHYS_OFFSET)
|
||||
#define PLAT_PHYS_OFFSET UL(0xf0000000)
|
||||
#else
|
||||
#error "Kconfig bug: No EP93xx PHYS_OFFSET set"
|
||||
#endif
|
||||
|
||||
#endif
|
@ -119,6 +119,7 @@ config EXYNOS5420_MCPM
|
||||
bool "Exynos5420 Multi-Cluster PM support"
|
||||
depends on MCPM && SOC_EXYNOS5420
|
||||
select ARM_CCI
|
||||
select ARM_CPU_SUSPEND
|
||||
help
|
||||
This is needed to provide CPU and cluster power management
|
||||
on Exynos5420 implementing big.LITTLE.
|
||||
|
@ -196,7 +196,7 @@ static void exynos_power_down(void)
|
||||
if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
|
||||
arch_spin_unlock(&exynos_mcpm_lock);
|
||||
|
||||
if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A15) {
|
||||
if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A15) {
|
||||
/*
|
||||
* On the Cortex-A15 we need to disable
|
||||
* L2 prefetching before flushing the cache.
|
||||
@ -289,6 +289,19 @@ static void __naked exynos_pm_power_up_setup(unsigned int affinity_level)
|
||||
"b cci_enable_port_for_self");
|
||||
}
|
||||
|
||||
static void __init exynos_cache_off(void)
|
||||
{
|
||||
if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A15) {
|
||||
/* disable L2 prefetching on the Cortex-A15 */
|
||||
asm volatile(
|
||||
"mcr p15, 1, %0, c15, c0, 3\n\t"
|
||||
"isb\n\t"
|
||||
"dsb"
|
||||
: : "r" (0x400));
|
||||
}
|
||||
exynos_v7_exit_coherency_flush(all);
|
||||
}
|
||||
|
||||
static const struct of_device_id exynos_dt_mcpm_match[] = {
|
||||
{ .compatible = "samsung,exynos5420" },
|
||||
{ .compatible = "samsung,exynos5800" },
|
||||
@ -332,6 +345,8 @@ static int __init exynos_mcpm_init(void)
|
||||
ret = mcpm_platform_register(&exynos_power_ops);
|
||||
if (!ret)
|
||||
ret = mcpm_sync_init(exynos_pm_power_up_setup);
|
||||
if (!ret)
|
||||
ret = mcpm_loopback(exynos_cache_off); /* turn on the CCI */
|
||||
if (ret) {
|
||||
iounmap(ns_sram_base_addr);
|
||||
return ret;
|
||||
|
@ -190,7 +190,7 @@ static void __init exynos_smp_init_cpus(void)
|
||||
void __iomem *scu_base = scu_base_addr();
|
||||
unsigned int i, ncores;
|
||||
|
||||
if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
|
||||
if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
|
||||
ncores = scu_base ? scu_get_core_count(scu_base) : 1;
|
||||
else
|
||||
/*
|
||||
@ -216,7 +216,7 @@ static void __init exynos_smp_prepare_cpus(unsigned int max_cpus)
|
||||
|
||||
exynos_sysram_init();
|
||||
|
||||
if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
|
||||
if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
|
||||
scu_enable(scu_base_addr());
|
||||
|
||||
/*
|
||||
|
@ -300,7 +300,7 @@ static int exynos_pm_suspend(void)
|
||||
tmp = (S5P_USE_STANDBY_WFI0 | S5P_USE_STANDBY_WFE0);
|
||||
__raw_writel(tmp, S5P_CENTRAL_SEQ_OPTION);
|
||||
|
||||
if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
|
||||
if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
|
||||
exynos_cpu_save_register();
|
||||
|
||||
return 0;
|
||||
@ -334,7 +334,7 @@ static void exynos_pm_resume(void)
|
||||
if (exynos_pm_central_resume())
|
||||
goto early_wakeup;
|
||||
|
||||
if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
|
||||
if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
|
||||
exynos_cpu_restore_register();
|
||||
|
||||
/* For release retention */
|
||||
@ -353,7 +353,7 @@ static void exynos_pm_resume(void)
|
||||
|
||||
s3c_pm_do_restore_core(exynos_core_save, ARRAY_SIZE(exynos_core_save));
|
||||
|
||||
if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
|
||||
if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
|
||||
scu_enable(S5P_VA_SCU);
|
||||
|
||||
early_wakeup:
|
||||
@ -440,15 +440,14 @@ static int exynos_cpu_pm_notifier(struct notifier_block *self,
|
||||
case CPU_PM_ENTER:
|
||||
if (cpu == 0) {
|
||||
exynos_pm_central_suspend();
|
||||
if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
|
||||
if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
|
||||
exynos_cpu_save_register();
|
||||
}
|
||||
break;
|
||||
|
||||
case CPU_PM_EXIT:
|
||||
if (cpu == 0) {
|
||||
if (read_cpuid_part_number() ==
|
||||
ARM_CPU_PART_CORTEX_A9) {
|
||||
if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9) {
|
||||
scu_enable(S5P_VA_SCU);
|
||||
exynos_cpu_restore_register();
|
||||
}
|
||||
|
@ -59,11 +59,6 @@ extern unsigned long __bus_to_pfn(unsigned long);
|
||||
*/
|
||||
#define FLUSH_BASE 0xf9000000
|
||||
|
||||
/*
|
||||
* Physical DRAM offset.
|
||||
*/
|
||||
#define PLAT_PHYS_OFFSET UL(0x00000000)
|
||||
|
||||
#define FLUSH_BASE_PHYS 0x50000000
|
||||
|
||||
#endif
|
||||
|
@ -10,6 +10,7 @@
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/hardware/cache-l2x0.h>
|
||||
#include "hardware.h"
|
||||
@ -301,7 +302,7 @@ rbc_loop:
|
||||
resume_mmdc
|
||||
|
||||
/* return to suspend finish */
|
||||
mov pc, lr
|
||||
ret lr
|
||||
|
||||
resume:
|
||||
/* invalidate L1 I-cache first */
|
||||
@ -325,7 +326,7 @@ resume:
|
||||
mov r5, #0x1
|
||||
resume_mmdc
|
||||
|
||||
mov pc, lr
|
||||
ret lr
|
||||
ENDPROC(imx6_suspend)
|
||||
|
||||
/*
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user