2
0
mirror of https://github.com/edk2-porting/linux-next.git synced 2025-01-02 10:43:57 +08:00
linux-next/include/asm-arm/processor.h
Nicolas Pitre 02828845dd [ARM] 4016/1: prefetch macro is wrong wrt gcc's "delete-null-pointer-checks"
optimization

The gcc manual says:

|`-fdelete-null-pointer-checks'
|     Use global dataflow analysis to identify and eliminate useless
|     checks for null pointers.  The compiler assumes that dereferencing
|     a null pointer would have halted the program.  If a pointer is
|     checked after it has already been dereferenced, it cannot be null.
|     Enabled at levels `-O2', `-O3', `-Os'.

Now the problem can be seen with this test case:

#include <linux/prefetch.h>
extern void bar(char *x);
void foo(char *x)
{
	prefetch(x);
	if (x)
		bar(x);
}

Because the constraint to the inline asm used in the prefetch() macro is
a memory operand, gcc assumes that the asm code does dereference the
pointer and the delete-null-pointer-checks optimization kicks in.
Inspection of generated assembly for the above example shows that bar()
is indeed called unconditionally without any test on the value of x.

Of course in the prefetch case there is no real dereference and it
cannot be assumed that a null pointer would have been caught at that
point. This causes kernel oopses with constructs like
hlist_for_each_entry() where the list's 'next' content is prefetched
before the pointer is tested against NULL, and only when gcc feels like
applying this optimization which doesn't happen all the time with more
complex code.

It appears that the way to prevent delete-null-pointer-checks
optimization to occur in this case is to make prefetch() into a static
inline function instead of a macro. At least this is what is done on
x86_64 where a similar inline asm memory operand is used (I presume they
would have seen the same problem if it didn't work) and resulting code
for the above example confirms that.

An alternative would consist of replacing the memory operand by a
register operand containing the pointer, and use the addressing mode
explicitly in the asm template. But that would be less optimal than an
offsettable memory reference.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
2006-12-13 18:30:20 +00:00

126 lines
2.8 KiB
C

/*
* linux/include/asm-arm/processor.h
*
* Copyright (C) 1995-1999 Russell King
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#ifndef __ASM_ARM_PROCESSOR_H
#define __ASM_ARM_PROCESSOR_H
/*
* Default implementation of macro that returns current
* instruction pointer ("program counter").
*/
#define current_text_addr() ({ __label__ _l; _l: &&_l;})
#ifdef __KERNEL__
#include <asm/ptrace.h>
#include <asm/types.h>
union debug_insn {
u32 arm;
u16 thumb;
};
struct debug_entry {
u32 address;
union debug_insn insn;
};
struct debug_info {
int nsaved;
struct debug_entry bp[2];
};
struct thread_struct {
/* fault info */
unsigned long address;
unsigned long trap_no;
unsigned long error_code;
/* debugging */
struct debug_info debug;
};
#define INIT_THREAD { }
#ifdef CONFIG_MMU
#define nommu_start_thread(regs) do { } while (0)
#else
#define nommu_start_thread(regs) regs->ARM_r10 = current->mm->start_data
#endif
#define start_thread(regs,pc,sp) \
({ \
unsigned long *stack = (unsigned long *)sp; \
set_fs(USER_DS); \
memzero(regs->uregs, sizeof(regs->uregs)); \
if (current->personality & ADDR_LIMIT_32BIT) \
regs->ARM_cpsr = USR_MODE; \
else \
regs->ARM_cpsr = USR26_MODE; \
if (elf_hwcap & HWCAP_THUMB && pc & 1) \
regs->ARM_cpsr |= PSR_T_BIT; \
regs->ARM_pc = pc & ~1; /* pc */ \
regs->ARM_sp = sp; /* sp */ \
regs->ARM_r2 = stack[2]; /* r2 (envp) */ \
regs->ARM_r1 = stack[1]; /* r1 (argv) */ \
regs->ARM_r0 = stack[0]; /* r0 (argc) */ \
nommu_start_thread(regs); \
})
/* Forward declaration, a strange C thing */
struct task_struct;
/* Free all resources held by a thread. */
extern void release_thread(struct task_struct *);
/* Prepare to copy thread state - unlazy all lazy status */
#define prepare_to_copy(tsk) do { } while (0)
unsigned long get_wchan(struct task_struct *p);
#define cpu_relax() barrier()
/*
* Create a new kernel thread
*/
extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
#define task_pt_regs(p) \
((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1)
#define KSTK_EIP(tsk) task_pt_regs(tsk)->ARM_pc
#define KSTK_ESP(tsk) task_pt_regs(tsk)->ARM_sp
/*
* Prefetching support - only ARMv5.
*/
#if __LINUX_ARM_ARCH__ >= 5
#define ARCH_HAS_PREFETCH
static inline void prefetch(const void *ptr)
{
__asm__ __volatile__(
"pld\t%0"
:
: "o" (*(char *)ptr)
: "cc");
}
#define ARCH_HAS_PREFETCHW
#define prefetchw(ptr) prefetch(ptr)
#define ARCH_HAS_SPINLOCK_PREFETCH
#define spin_lock_prefetch(x) do { } while (0)
#endif
#endif
#endif /* __ASM_ARM_PROCESSOR_H */