Skip to content

Commit

Permalink
[IA64] implement csum_ipv6_magic for ia64.
Browse files Browse the repository at this point in the history
The asm version is 4.4 times faster than the generic C version and
10X smaller in code size.

Signed-off-by: Ken Chen <[email protected]>
Signed-off-by: Tony Luck <[email protected]>
  • Loading branch information
Chen, Kenneth W authored and aegl committed Dec 7, 2006
1 parent 5b4d568 commit 007d77d
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 2 deletions.
55 changes: 53 additions & 2 deletions arch/ia64/lib/ip_fast_csum.S
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
* in0: address of buffer to checksum (char *)
* in1: length of the buffer (int)
*
* Copyright (C) 2002 Intel Corp.
* Copyright (C) 2002 Ken Chen <[email protected]>
* Copyright (C) 2002, 2006 Intel Corp.
* Copyright (C) 2002, 2006 Ken Chen <[email protected]>
*/

#include <asm/asmmacro.h>
Expand All @@ -25,6 +25,9 @@

#define in0 r32
#define in1 r33
#define in2 r34
#define in3 r35
#define in4 r36
#define ret0 r8

GLOBAL_ENTRY(ip_fast_csum)
Expand Down Expand Up @@ -88,3 +91,51 @@ GLOBAL_ENTRY(ip_fast_csum)
mov b0=r34
br.ret.sptk.many b0
END(ip_fast_csum)

GLOBAL_ENTRY(csum_ipv6_magic)
ld4 r20=[in0],4
ld4 r21=[in1],4
dep r15=in3,in2,32,16
;;
ld4 r22=[in0],4
ld4 r23=[in1],4
mux1 r15=r15,@rev
;;
ld4 r24=[in0],4
ld4 r25=[in1],4
shr.u r15=r15,16
add r16=r20,r21
add r17=r22,r23
;;
ld4 r26=[in0],4
ld4 r27=[in1],4
add r18=r24,r25
add r8=r16,r17
;;
add r19=r26,r27
add r8=r8,r18
;;
add r8=r8,r19
add r15=r15,in4
;;
add r8=r8,r15
;;
shr.u r10=r8,32 // now fold sum into short
zxt4 r11=r8
;;
add r8=r10,r11
;;
shr.u r10=r8,16 // yeah, keep it rolling
zxt2 r11=r8
;;
add r8=r10,r11
;;
shr.u r10=r8,16 // three times lucky
zxt2 r11=r8
;;
add r8=r10,r11
mov r9=0xffff
;;
andcm r8=r9,r8
br.ret.sptk.many b0
END(csum_ipv6_magic)
6 changes: 6 additions & 0 deletions include/asm-ia64/checksum.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,10 @@ static inline __sum16 csum_fold(__wsum csum)
return (__force __sum16)~sum;
}

#define _HAVE_ARCH_IPV6_CSUM 1
struct in6_addr;
extern unsigned short int csum_ipv6_magic(struct in6_addr *saddr,
struct in6_addr *daddr, __u32 len, unsigned short proto,
unsigned int csum);

#endif /* _ASM_IA64_CHECKSUM_H */

0 comments on commit 007d77d

Please sign in to comment.