linux/arch/arm64/kernel/vdso/gettimeofday.S
Kevin Brodsky b33f491f5a arm64: Refactor vDSO time functions
Time functions are directly implemented in assembly in arm64, and it
is desirable to keep it this way for performance reasons (everything
fits in registers, so that the stack is not used at all). However, the
current implementation is quite difficult to read and understand (even
considering it's assembly).  Additionally, due to the structure of
__kernel_clock_gettime, which heavily uses conditional branches to
share code between the different clocks, it is difficult to support a
new clock without making the branches even harder to follow.

This commit completely refactors the structure of clock_gettime (and
gettimeofday along the way) while keeping exactly the same algorithms.
We no longer try to share code; instead, macros provide common
operations. This new approach comes with a number of advantages:
- In clock_gettime, clock implementations are no longer interspersed,
  making them much more readable. Additionally, macros only use
  registers passed as arguments or reserved with .req, this way it is
  easy to make sure that registers are properly allocated. To avoid a
  large number of branches in a given execution path, a jump table is
  used; a normal execution uses 3 unconditional branches.
- __do_get_tspec has been replaced with 2 macros (get_ts_clock_mono,
  get_clock_shifted_nsec) and explicit loading of data from the vDSO
  page. Consequently, clock_gettime and gettimeofday are now leaf
  functions, and saving x30 (lr) is no longer necessary.
- Variables protected by tb_seq_count are now loaded all at once,
  allowing to merge the seqcnt_read macro into seqcnt_check.
- For CLOCK_REALTIME_COARSE, removed an unused load of the wall to
  monotonic timespec.
- For CLOCK_MONOTONIC_COARSE, removed a few shift instructions.

Obviously, the downside of sharing less code is an increase in code
size. However since the vDSO has its own code page, this does not
really matter, as long as the size of the DSO remains below 4 kB. For
now this should be all right:
                    Before  After
  vdso.so size (B)  2776    3000

Signed-off-by: Kevin Brodsky <kevin.brodsky@arm.com>
Reviewed-by: Dave Martin <dave.martin@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
2016-07-12 16:06:27 +01:00

295 lines
6.9 KiB
ArmAsm

/*
* Userspace implementations of gettimeofday() and friends.
*
* Copyright (C) 2012 ARM Limited
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* Author: Will Deacon <will.deacon@arm.com>
*/
#include <linux/linkage.h>
#include <asm/asm-offsets.h>
#include <asm/unistd.h>
#define NSEC_PER_SEC_LO16 0xca00
#define NSEC_PER_SEC_HI16 0x3b9a
vdso_data .req x6
seqcnt .req w7
w_tmp .req w8
x_tmp .req x8
/*
* Conventions for macro arguments:
* - An argument is write-only if its name starts with "res".
* - All other arguments are read-only, unless otherwise specified.
*/
.macro seqcnt_acquire
9999: ldr seqcnt, [vdso_data, #VDSO_TB_SEQ_COUNT]
tbnz seqcnt, #0, 9999b
dmb ishld
.endm
.macro seqcnt_check fail
dmb ishld
ldr w_tmp, [vdso_data, #VDSO_TB_SEQ_COUNT]
cmp w_tmp, seqcnt
b.ne \fail
.endm
.macro syscall_check fail
ldr w_tmp, [vdso_data, #VDSO_USE_SYSCALL]
cbnz w_tmp, \fail
.endm
.macro get_nsec_per_sec res
mov \res, #NSEC_PER_SEC_LO16
movk \res, #NSEC_PER_SEC_HI16, lsl #16
.endm
/*
* Returns the clock delta, in nanoseconds left-shifted by the clock
* shift.
*/
.macro get_clock_shifted_nsec res, cycle_last, mult
/* Read the virtual counter. */
isb
mrs x_tmp, cntvct_el0
/* Calculate cycle delta and convert to ns. */
sub \res, x_tmp, \cycle_last
/* We can only guarantee 56 bits of precision. */
movn x_tmp, #0xff00, lsl #48
and \res, x_tmp, \res
mul \res, \res, \mult
.endm
/*
* Returns in res_{sec,nsec} the REALTIME timespec, based on the
* "wall time" (xtime) and the clock_mono delta.
*/
.macro get_ts_realtime res_sec, res_nsec, \
clock_nsec, xtime_sec, xtime_nsec, nsec_to_sec
add \res_nsec, \clock_nsec, \xtime_nsec
udiv x_tmp, \res_nsec, \nsec_to_sec
add \res_sec, \xtime_sec, x_tmp
msub \res_nsec, x_tmp, \nsec_to_sec, \res_nsec
.endm
/* sec and nsec are modified in place. */
.macro add_ts sec, nsec, ts_sec, ts_nsec, nsec_to_sec
/* Add timespec. */
add \sec, \sec, \ts_sec
add \nsec, \nsec, \ts_nsec
/* Normalise the new timespec. */
cmp \nsec, \nsec_to_sec
b.lt 9999f
sub \nsec, \nsec, \nsec_to_sec
add \sec, \sec, #1
9999:
cmp \nsec, #0
b.ge 9998f
add \nsec, \nsec, \nsec_to_sec
sub \sec, \sec, #1
9998:
.endm
.macro clock_gettime_return, shift=0
.if \shift == 1
lsr x11, x11, x12
.endif
stp x10, x11, [x1, #TSPEC_TV_SEC]
mov x0, xzr
ret
.endm
.macro jump_slot jumptable, index, label
.if (. - \jumptable) != 4 * (\index)
.error "Jump slot index mismatch"
.endif
b \label
.endm
.text
/* int __kernel_gettimeofday(struct timeval *tv, struct timezone *tz); */
ENTRY(__kernel_gettimeofday)
.cfi_startproc
adr vdso_data, _vdso_data
/* If tv is NULL, skip to the timezone code. */
cbz x0, 2f
/* Compute the time of day. */
1: seqcnt_acquire
syscall_check fail=4f
ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
ldp w11, w12, [vdso_data, #VDSO_CS_MULT]
ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
seqcnt_check fail=1b
get_nsec_per_sec res=x9
lsl x9, x9, x12
get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
get_ts_realtime res_sec=x10, res_nsec=x11, \
clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
/* Convert ns to us. */
mov x13, #1000
lsl x13, x13, x12
udiv x11, x11, x13
stp x10, x11, [x0, #TVAL_TV_SEC]
2:
/* If tz is NULL, return 0. */
cbz x1, 3f
ldp w4, w5, [vdso_data, #VDSO_TZ_MINWEST]
stp w4, w5, [x1, #TZ_MINWEST]
3:
mov x0, xzr
ret
4:
/* Syscall fallback. */
mov x8, #__NR_gettimeofday
svc #0
ret
.cfi_endproc
ENDPROC(__kernel_gettimeofday)
#define JUMPSLOT_MAX CLOCK_MONOTONIC_COARSE
/* int __kernel_clock_gettime(clockid_t clock_id, struct timespec *tp); */
ENTRY(__kernel_clock_gettime)
.cfi_startproc
cmp w0, #JUMPSLOT_MAX
b.hi syscall
adr vdso_data, _vdso_data
adr x_tmp, jumptable
add x_tmp, x_tmp, w0, uxtw #2
br x_tmp
ALIGN
jumptable:
jump_slot jumptable, CLOCK_REALTIME, realtime
jump_slot jumptable, CLOCK_MONOTONIC, monotonic
b syscall
b syscall
b syscall
jump_slot jumptable, CLOCK_REALTIME_COARSE, realtime_coarse
jump_slot jumptable, CLOCK_MONOTONIC_COARSE, monotonic_coarse
.if (. - jumptable) != 4 * (JUMPSLOT_MAX + 1)
.error "Wrong jumptable size"
.endif
ALIGN
realtime:
seqcnt_acquire
syscall_check fail=syscall
ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
ldp w11, w12, [vdso_data, #VDSO_CS_MULT]
ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
seqcnt_check fail=realtime
/* All computations are done with left-shifted nsecs. */
get_nsec_per_sec res=x9
lsl x9, x9, x12
get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
get_ts_realtime res_sec=x10, res_nsec=x11, \
clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
clock_gettime_return, shift=1
ALIGN
monotonic:
seqcnt_acquire
syscall_check fail=syscall
ldr x10, [vdso_data, #VDSO_CS_CYCLE_LAST]
ldp w11, w12, [vdso_data, #VDSO_CS_MULT]
ldp x13, x14, [vdso_data, #VDSO_XTIME_CLK_SEC]
ldp x3, x4, [vdso_data, #VDSO_WTM_CLK_SEC]
seqcnt_check fail=monotonic
/* All computations are done with left-shifted nsecs. */
lsl x4, x4, x12
get_nsec_per_sec res=x9
lsl x9, x9, x12
get_clock_shifted_nsec res=x15, cycle_last=x10, mult=x11
get_ts_realtime res_sec=x10, res_nsec=x11, \
clock_nsec=x15, xtime_sec=x13, xtime_nsec=x14, nsec_to_sec=x9
add_ts sec=x10, nsec=x11, ts_sec=x3, ts_nsec=x4, nsec_to_sec=x9
clock_gettime_return, shift=1
ALIGN
realtime_coarse:
seqcnt_acquire
ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC]
seqcnt_check fail=realtime_coarse
clock_gettime_return
ALIGN
monotonic_coarse:
seqcnt_acquire
ldp x10, x11, [vdso_data, #VDSO_XTIME_CRS_SEC]
ldp x13, x14, [vdso_data, #VDSO_WTM_CLK_SEC]
seqcnt_check fail=monotonic_coarse
/* Computations are done in (non-shifted) nsecs. */
get_nsec_per_sec res=x9
add_ts sec=x10, nsec=x11, ts_sec=x13, ts_nsec=x14, nsec_to_sec=x9
clock_gettime_return
ALIGN
syscall: /* Syscall fallback. */
mov x8, #__NR_clock_gettime
svc #0
ret
.cfi_endproc
ENDPROC(__kernel_clock_gettime)
/* int __kernel_clock_getres(clockid_t clock_id, struct timespec *res); */
ENTRY(__kernel_clock_getres)
.cfi_startproc
cmp w0, #CLOCK_REALTIME
ccmp w0, #CLOCK_MONOTONIC, #0x4, ne
b.ne 1f
ldr x2, 5f
b 2f
1:
cmp w0, #CLOCK_REALTIME_COARSE
ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne
b.ne 4f
ldr x2, 6f
2:
cbz w1, 3f
stp xzr, x2, [x1]
3: /* res == NULL. */
mov w0, wzr
ret
4: /* Syscall fallback. */
mov x8, #__NR_clock_getres
svc #0
ret
5:
.quad CLOCK_REALTIME_RES
6:
.quad CLOCK_COARSE_RES
.cfi_endproc
ENDPROC(__kernel_clock_getres)