2
0
mirror of https://github.com/edk2-porting/linux-next.git synced 2024-12-23 12:43:55 +08:00
linux-next/arch/m32r/lib/checksum.S
Linus Torvalds 1da177e4c3 Linux-2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!
2005-04-16 15:20:36 -07:00

323 lines
6.9 KiB
ArmAsm

/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* IP/TCP/UDP checksumming routines
*
* Authors: Jorge Cwik, <jorge@laser.satlink.net>
* Arnt Gulbrandsen, <agulbra@nvg.unit.no>
* Tom May, <ftom@netcom.com>
* Pentium Pro/II routines:
* Alexander Kjeldaas <astor@guardian.no>
* Finn Arne Gangstad <finnag@guardian.no>
* Lots of code moved from tcp.c and ip.c; see those files
* for more names.
*
* Changes: Ingo Molnar, converted csum_partial_copy() to 2.1 exception
* handling.
* Andi Kleen, add zeroing on error
* converted to pure assembler
* Hirokazu Takata,Hiroyuki Kondo rewrite for the m32r architecture.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
/* $Id$ */
#include <linux/config.h>
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/errno.h>
/*
* computes a partial checksum, e.g. for TCP/UDP fragments
*/
/*
unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
*/
#ifdef CONFIG_ISA_DUAL_ISSUE
/*
* Experiments with Ethernet and SLIP connections show that buff
* is aligned on either a 2-byte or 4-byte boundary. We get at
* least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
* Fortunately, it is easy to convert 2-byte alignment to 4-byte
* alignment for the unrolled loop.
*/
.text
ENTRY(csum_partial)
; Function args
; r0: unsigned char *buff
; r1: int len
; r2: unsigned int sum
push r2 || ldi r2, #0
and3 r7, r0, #1 ; Check alignment.
beqz r7, 1f ; Jump if alignment is ok.
; 1-byte mis aligned
ldub r4, @r0 || addi r0, #1
; clear c-bit || Alignment uses up bytes.
cmp r0, r0 || addi r1, #-1
ldi r3, #0 || addx r2, r4
addx r2, r3
.fillinsn
1:
and3 r4, r0, #2 ; Check alignment.
beqz r4, 2f ; Jump if alignment is ok.
; clear c-bit || Alignment uses up two bytes.
cmp r0, r0 || addi r1, #-2
bgtz r1, 1f ; Jump if we had at least two bytes.
bra 4f || addi r1, #2
.fillinsn ; len(r1) was < 2. Deal with it.
1:
; 2-byte aligned
lduh r4, @r0 || ldi r3, #0
addx r2, r4 || addi r0, #2
addx r2, r3
.fillinsn
2:
; 4-byte aligned
cmp r0, r0 ; clear c-bit
srl3 r6, r1, #5
beqz r6, 2f
.fillinsn
1: ld r3, @r0+
ld r4, @r0+ ; +4
ld r5, @r0+ ; +8
ld r3, @r0+ || addx r2, r3 ; +12
ld r4, @r0+ || addx r2, r4 ; +16
ld r5, @r0+ || addx r2, r5 ; +20
ld r3, @r0+ || addx r2, r3 ; +24
ld r4, @r0+ || addx r2, r4 ; +28
addx r2, r5 || addi r6, #-1
addx r2, r3
addx r2, r4
bnez r6, 1b
addx r2, r6 ; r6=0
cmp r0, r0 ; This clears c-bit
.fillinsn
2: and3 r6, r1, #0x1c ; withdraw len
beqz r6, 4f
srli r6, #2
.fillinsn
3: ld r4, @r0+ || addi r6, #-1
addx r2, r4
bnez r6, 3b
addx r2, r6 ; r6=0
cmp r0, r0 ; This clears c-bit
.fillinsn
4: and3 r1, r1, #3
beqz r1, 7f ; if len == 0 goto end
and3 r6, r1, #2
beqz r6, 5f ; if len < 2 goto 5f(1byte)
lduh r4, @r0 || addi r0, #2
addi r1, #-2 || slli r4, #16
addx r2, r4
beqz r1, 6f
.fillinsn
5: ldub r4, @r0 || ldi r1, #0
#ifndef __LITTLE_ENDIAN__
slli r4, #8
#endif
addx r2, r4
.fillinsn
6: addx r2, r1
.fillinsn
7:
and3 r0, r2, #0xffff
srli r2, #16
add r0, r2
srl3 r2, r0, #16
beqz r2, 1f
addi r0, #1
and3 r0, r0, #0xffff
.fillinsn
1:
beqz r7, 1f ; swap the upper byte for the lower
and3 r2, r0, #0xff
srl3 r0, r0, #8
slli r2, #8
or r0, r2
.fillinsn
1:
pop r2 || cmp r0, r0
addx r0, r2 || ldi r2, #0
addx r0, r2
jmp r14
#else /* not CONFIG_ISA_DUAL_ISSUE */
/*
* Experiments with Ethernet and SLIP connections show that buff
* is aligned on either a 2-byte or 4-byte boundary. We get at
* least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
* Fortunately, it is easy to convert 2-byte alignment to 4-byte
* alignment for the unrolled loop.
*/
.text
ENTRY(csum_partial)
; Function args
; r0: unsigned char *buff
; r1: int len
; r2: unsigned int sum
push r2
ldi r2, #0
and3 r7, r0, #1 ; Check alignment.
beqz r7, 1f ; Jump if alignment is ok.
; 1-byte mis aligned
ldub r4, @r0
addi r0, #1
addi r1, #-1 ; Alignment uses up bytes.
cmp r0, r0 ; clear c-bit
ldi r3, #0
addx r2, r4
addx r2, r3
.fillinsn
1:
and3 r4, r0, #2 ; Check alignment.
beqz r4, 2f ; Jump if alignment is ok.
addi r1, #-2 ; Alignment uses up two bytes.
cmp r0, r0 ; clear c-bit
bgtz r1, 1f ; Jump if we had at least two bytes.
addi r1, #2 ; len(r1) was < 2. Deal with it.
bra 4f
.fillinsn
1:
; 2-byte aligned
lduh r4, @r0
addi r0, #2
ldi r3, #0
addx r2, r4
addx r2, r3
.fillinsn
2:
; 4-byte aligned
cmp r0, r0 ; clear c-bit
srl3 r6, r1, #5
beqz r6, 2f
.fillinsn
1: ld r3, @r0+
ld r4, @r0+ ; +4
ld r5, @r0+ ; +8
addx r2, r3
addx r2, r4
addx r2, r5
ld r3, @r0+ ; +12
ld r4, @r0+ ; +16
ld r5, @r0+ ; +20
addx r2, r3
addx r2, r4
addx r2, r5
ld r3, @r0+ ; +24
ld r4, @r0+ ; +28
addi r6, #-1
addx r2, r3
addx r2, r4
bnez r6, 1b
addx r2, r6 ; r6=0
cmp r0, r0 ; This clears c-bit
.fillinsn
2: and3 r6, r1, #0x1c ; withdraw len
beqz r6, 4f
srli r6, #2
.fillinsn
3: ld r4, @r0+
addi r6, #-1
addx r2, r4
bnez r6, 3b
addx r2, r6 ; r6=0
cmp r0, r0 ; This clears c-bit
.fillinsn
4: and3 r1, r1, #3
beqz r1, 7f ; if len == 0 goto end
and3 r6, r1, #2
beqz r6, 5f ; if len < 2 goto 5f(1byte)
lduh r4, @r0
addi r0, #2
addi r1, #-2
slli r4, #16
addx r2, r4
beqz r1, 6f
.fillinsn
5: ldub r4, @r0
#ifndef __LITTLE_ENDIAN__
slli r4, #8
#endif
addx r2, r4
.fillinsn
6: ldi r5, #0
addx r2, r5
.fillinsn
7:
and3 r0, r2, #0xffff
srli r2, #16
add r0, r2
srl3 r2, r0, #16
beqz r2, 1f
addi r0, #1
and3 r0, r0, #0xffff
.fillinsn
1:
beqz r7, 1f
mv r2, r0
srl3 r0, r2, #8
and3 r2, r2, #0xff
slli r2, #8
or r0, r2
.fillinsn
1:
pop r2
cmp r0, r0
addx r0, r2
ldi r2, #0
addx r0, r2
jmp r14
#endif /* not CONFIG_ISA_DUAL_ISSUE */
/*
unsigned int csum_partial_copy_generic (const char *src, char *dst,
int len, int sum, int *src_err_ptr, int *dst_err_ptr)
*/
/*
* Copy from ds while checksumming, otherwise like csum_partial
*
* The macros SRC and DST specify the type of access for the instruction.
* thus we can call a custom exception handler for all access types.
*
* FIXME: could someone double-check whether I haven't mixed up some SRC and
* DST definitions? It's damn hard to trigger all cases. I hope I got
* them all but there's no guarantee.
*/
ENTRY(csum_partial_copy_generic)
nop
nop
nop
nop
jmp r14
nop
nop
nop