mirror of
https://github.com/edk2-porting/linux-next.git
synced 2025-01-06 20:53:54 +08:00
e5ab8be68e
Loop in assembly checking the registers with many threads. Signed-off-by: Cyril Bur <cyrilbur@gmail.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
199 lines
3.9 KiB
ArmAsm
199 lines
3.9 KiB
ArmAsm
/*
|
|
* Copyright 2015, Cyril Bur, IBM Corp.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*/
|
|
|
|
#include "../basic_asm.h"
|
|
|
|
#define PUSH_FPU(pos) \
|
|
stfd f14,pos(sp); \
|
|
stfd f15,pos+8(sp); \
|
|
stfd f16,pos+16(sp); \
|
|
stfd f17,pos+24(sp); \
|
|
stfd f18,pos+32(sp); \
|
|
stfd f19,pos+40(sp); \
|
|
stfd f20,pos+48(sp); \
|
|
stfd f21,pos+56(sp); \
|
|
stfd f22,pos+64(sp); \
|
|
stfd f23,pos+72(sp); \
|
|
stfd f24,pos+80(sp); \
|
|
stfd f25,pos+88(sp); \
|
|
stfd f26,pos+96(sp); \
|
|
stfd f27,pos+104(sp); \
|
|
stfd f28,pos+112(sp); \
|
|
stfd f29,pos+120(sp); \
|
|
stfd f30,pos+128(sp); \
|
|
stfd f31,pos+136(sp);
|
|
|
|
#define POP_FPU(pos) \
|
|
lfd f14,pos(sp); \
|
|
lfd f15,pos+8(sp); \
|
|
lfd f16,pos+16(sp); \
|
|
lfd f17,pos+24(sp); \
|
|
lfd f18,pos+32(sp); \
|
|
lfd f19,pos+40(sp); \
|
|
lfd f20,pos+48(sp); \
|
|
lfd f21,pos+56(sp); \
|
|
lfd f22,pos+64(sp); \
|
|
lfd f23,pos+72(sp); \
|
|
lfd f24,pos+80(sp); \
|
|
lfd f25,pos+88(sp); \
|
|
lfd f26,pos+96(sp); \
|
|
lfd f27,pos+104(sp); \
|
|
lfd f28,pos+112(sp); \
|
|
lfd f29,pos+120(sp); \
|
|
lfd f30,pos+128(sp); \
|
|
lfd f31,pos+136(sp);
|
|
|
|
# Careful calling this, it will 'clobber' fpu (by design)
|
|
# Don't call this from C
|
|
FUNC_START(load_fpu)
|
|
lfd f14,0(r3)
|
|
lfd f15,8(r3)
|
|
lfd f16,16(r3)
|
|
lfd f17,24(r3)
|
|
lfd f18,32(r3)
|
|
lfd f19,40(r3)
|
|
lfd f20,48(r3)
|
|
lfd f21,56(r3)
|
|
lfd f22,64(r3)
|
|
lfd f23,72(r3)
|
|
lfd f24,80(r3)
|
|
lfd f25,88(r3)
|
|
lfd f26,96(r3)
|
|
lfd f27,104(r3)
|
|
lfd f28,112(r3)
|
|
lfd f29,120(r3)
|
|
lfd f30,128(r3)
|
|
lfd f31,136(r3)
|
|
blr
|
|
FUNC_END(load_fpu)
|
|
|
|
FUNC_START(check_fpu)
|
|
mr r4,r3
|
|
li r3,1 # assume a bad result
|
|
lfd f0,0(r4)
|
|
fcmpu cr1,f0,f14
|
|
bne cr1,1f
|
|
lfd f0,8(r4)
|
|
fcmpu cr1,f0,f15
|
|
bne cr1,1f
|
|
lfd f0,16(r4)
|
|
fcmpu cr1,f0,f16
|
|
bne cr1,1f
|
|
lfd f0,24(r4)
|
|
fcmpu cr1,f0,f17
|
|
bne cr1,1f
|
|
lfd f0,32(r4)
|
|
fcmpu cr1,f0,f18
|
|
bne cr1,1f
|
|
lfd f0,40(r4)
|
|
fcmpu cr1,f0,f19
|
|
bne cr1,1f
|
|
lfd f0,48(r4)
|
|
fcmpu cr1,f0,f20
|
|
bne cr1,1f
|
|
lfd f0,56(r4)
|
|
fcmpu cr1,f0,f21
|
|
bne cr1,1f
|
|
lfd f0,64(r4)
|
|
fcmpu cr1,f0,f22
|
|
bne cr1,1f
|
|
lfd f0,72(r4)
|
|
fcmpu cr1,f0,f23
|
|
bne cr1,1f
|
|
lfd f0,80(r4)
|
|
fcmpu cr1,f0,f24
|
|
bne cr1,1f
|
|
lfd f0,88(r4)
|
|
fcmpu cr1,f0,f25
|
|
bne cr1,1f
|
|
lfd f0,96(r4)
|
|
fcmpu cr1,f0,f26
|
|
bne cr1,1f
|
|
lfd f0,104(r4)
|
|
fcmpu cr1,f0,f27
|
|
bne cr1,1f
|
|
lfd f0,112(r4)
|
|
fcmpu cr1,f0,f28
|
|
bne cr1,1f
|
|
lfd f0,120(r4)
|
|
fcmpu cr1,f0,f29
|
|
bne cr1,1f
|
|
lfd f0,128(r4)
|
|
fcmpu cr1,f0,f30
|
|
bne cr1,1f
|
|
lfd f0,136(r4)
|
|
fcmpu cr1,f0,f31
|
|
bne cr1,1f
|
|
li r3,0 # Success!!!
|
|
1: blr
|
|
|
|
FUNC_START(test_fpu)
|
|
# r3 holds pointer to where to put the result of fork
|
|
# r4 holds pointer to the pid
|
|
# f14-f31 are non volatiles
|
|
PUSH_BASIC_STACK(256)
|
|
std r3,STACK_FRAME_PARAM(0)(sp) # Address of darray
|
|
std r4,STACK_FRAME_PARAM(1)(sp) # Address of pid
|
|
PUSH_FPU(STACK_FRAME_LOCAL(2,0))
|
|
|
|
bl load_fpu
|
|
nop
|
|
li r0,__NR_fork
|
|
sc
|
|
|
|
# pass the result of the fork to the caller
|
|
ld r9,STACK_FRAME_PARAM(1)(sp)
|
|
std r3,0(r9)
|
|
|
|
ld r3,STACK_FRAME_PARAM(0)(sp)
|
|
bl check_fpu
|
|
nop
|
|
|
|
POP_FPU(STACK_FRAME_LOCAL(2,0))
|
|
POP_BASIC_STACK(256)
|
|
blr
|
|
FUNC_END(test_fpu)
|
|
|
|
# int preempt_fpu(double *darray, int *threads_running, int *running)
|
|
# On starting will (atomically) decrement not_ready as a signal that the FPU
|
|
# has been loaded with darray. Will proceed to check the validity of the FPU
|
|
# registers while running is not zero.
|
|
FUNC_START(preempt_fpu)
|
|
PUSH_BASIC_STACK(256)
|
|
std r3,STACK_FRAME_PARAM(0)(sp) # double *darray
|
|
std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting
|
|
std r5,STACK_FRAME_PARAM(2)(sp) # int *running
|
|
PUSH_FPU(STACK_FRAME_LOCAL(3,0))
|
|
|
|
bl load_fpu
|
|
nop
|
|
|
|
sync
|
|
# Atomic DEC
|
|
ld r3,STACK_FRAME_PARAM(1)(sp)
|
|
1: lwarx r4,0,r3
|
|
addi r4,r4,-1
|
|
stwcx. r4,0,r3
|
|
bne- 1b
|
|
|
|
2: ld r3,STACK_FRAME_PARAM(0)(sp)
|
|
bl check_fpu
|
|
nop
|
|
cmpdi r3,0
|
|
bne 3f
|
|
ld r4,STACK_FRAME_PARAM(2)(sp)
|
|
ld r5,0(r4)
|
|
cmpwi r5,0
|
|
bne 2b
|
|
|
|
3: POP_FPU(STACK_FRAME_LOCAL(3,0))
|
|
POP_BASIC_STACK(256)
|
|
blr
|
|
FUNC_END(preempt_fpu)
|