forked from torvalds/linux
-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[IA64] implement csum_ipv6_magic for ia64.
The asm version is 4.4 times faster than the generic C version and 10X smaller in code size. Signed-off-by: Ken Chen <[email protected]> Signed-off-by: Tony Luck <[email protected]>
- Loading branch information
Showing
2 changed files
with
59 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,8 +8,8 @@ | |
* in0: address of buffer to checksum (char *) | ||
* in1: length of the buffer (int) | ||
* | ||
* Copyright (C) 2002 Intel Corp. | ||
* Copyright (C) 2002 Ken Chen <[email protected]> | ||
* Copyright (C) 2002, 2006 Intel Corp. | ||
* Copyright (C) 2002, 2006 Ken Chen <[email protected]> | ||
*/ | ||
|
||
#include <asm/asmmacro.h> | ||
|
@@ -25,6 +25,9 @@ | |
|
||
#define in0 r32 | ||
#define in1 r33 | ||
#define in2 r34 | ||
#define in3 r35 | ||
#define in4 r36 | ||
#define ret0 r8 | ||
|
||
GLOBAL_ENTRY(ip_fast_csum) | ||
|
@@ -88,3 +91,51 @@ GLOBAL_ENTRY(ip_fast_csum) | |
mov b0=r34 | ||
br.ret.sptk.many b0 | ||
END(ip_fast_csum) | ||
|
||
GLOBAL_ENTRY(csum_ipv6_magic) | ||
ld4 r20=[in0],4 | ||
ld4 r21=[in1],4 | ||
dep r15=in3,in2,32,16 | ||
;; | ||
ld4 r22=[in0],4 | ||
ld4 r23=[in1],4 | ||
mux1 r15=r15,@rev | ||
;; | ||
ld4 r24=[in0],4 | ||
ld4 r25=[in1],4 | ||
shr.u r15=r15,16 | ||
add r16=r20,r21 | ||
add r17=r22,r23 | ||
;; | ||
ld4 r26=[in0],4 | ||
ld4 r27=[in1],4 | ||
add r18=r24,r25 | ||
add r8=r16,r17 | ||
;; | ||
add r19=r26,r27 | ||
add r8=r8,r18 | ||
;; | ||
add r8=r8,r19 | ||
add r15=r15,in4 | ||
;; | ||
add r8=r8,r15 | ||
;; | ||
shr.u r10=r8,32 // now fold sum into short | ||
zxt4 r11=r8 | ||
;; | ||
add r8=r10,r11 | ||
;; | ||
shr.u r10=r8,16 // yeah, keep it rolling | ||
zxt2 r11=r8 | ||
;; | ||
add r8=r10,r11 | ||
;; | ||
shr.u r10=r8,16 // three times lucky | ||
zxt2 r11=r8 | ||
;; | ||
add r8=r10,r11 | ||
mov r9=0xffff | ||
;; | ||
andcm r8=r9,r8 | ||
br.ret.sptk.many b0 | ||
END(csum_ipv6_magic) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters