Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * Optmized version of the ip_fast_csum() function | |
3 | * Used for calculating IP header checksum | |
4 | * | |
5 | * Return: 16bit checksum, complemented | |
6 | * | |
7 | * Inputs: | |
8 | * in0: address of buffer to checksum (char *) | |
9 | * in1: length of the buffer (int) | |
10 | * | |
007d77d0 CK |
11 | * Copyright (C) 2002, 2006 Intel Corp. |
12 | * Copyright (C) 2002, 2006 Ken Chen <kenneth.w.chen@intel.com> | |
1da177e4 LT |
13 | */ |
14 | ||
15 | #include <asm/asmmacro.h> | |
e007c533 | 16 | #include <asm/export.h> |
1da177e4 LT |
17 | |
18 | /* | |
19 | * Since we know that most likely this function is called with buf aligned | |
20 | * on 4-byte boundary and 20 bytes in length, we can execution rather quickly | |
21 | * versus calling generic version of do_csum, which has lots of overhead in | |
22 | * handling various alignments and sizes. However, due to lack of constrains | |
23 | * put on the function input argument, cases with alignment not on 4-byte or | |
24 | * size not equal to 20 bytes will be handled by the generic do_csum function. | |
25 | */ | |
26 | ||
27 | #define in0 r32 | |
28 | #define in1 r33 | |
007d77d0 CK |
29 | #define in2 r34 |
30 | #define in3 r35 | |
31 | #define in4 r36 | |
1da177e4 LT |
32 | #define ret0 r8 |
33 | ||
34 | GLOBAL_ENTRY(ip_fast_csum) | |
35 | .prologue | |
36 | .body | |
37 | cmp.ne p6,p7=5,in1 // size other than 20 byte? | |
38 | and r14=3,in0 // is it aligned on 4-byte? | |
39 | add r15=4,in0 // second source pointer | |
40 | ;; | |
41 | cmp.ne.or.andcm p6,p7=r14,r0 | |
42 | ;; | |
43 | (p7) ld4 r20=[in0],8 | |
44 | (p7) ld4 r21=[r15],8 | |
45 | (p6) br.spnt .generic | |
46 | ;; | |
47 | ld4 r22=[in0],8 | |
48 | ld4 r23=[r15],8 | |
49 | ;; | |
50 | ld4 r24=[in0] | |
51 | add r20=r20,r21 | |
52 | add r22=r22,r23 | |
53 | ;; | |
54 | add r20=r20,r22 | |
55 | ;; | |
56 | add r20=r20,r24 | |
57 | ;; | |
58 | shr.u ret0=r20,16 // now need to add the carry | |
59 | zxt2 r20=r20 | |
60 | ;; | |
61 | add r20=ret0,r20 | |
62 | ;; | |
63 | shr.u ret0=r20,16 // add carry again | |
64 | zxt2 r20=r20 | |
65 | ;; | |
66 | add r20=ret0,r20 | |
67 | ;; | |
68 | shr.u ret0=r20,16 | |
69 | zxt2 r20=r20 | |
70 | ;; | |
71 | add r20=ret0,r20 | |
6dbfc19b | 72 | mov r9=0xffff |
1da177e4 | 73 | ;; |
6dbfc19b | 74 | andcm ret0=r9,r20 |
1da177e4 LT |
75 | .restore sp // reset frame state |
76 | br.ret.sptk.many b0 | |
77 | ;; | |
78 | ||
79 | .generic: | |
80 | .prologue | |
81 | .save ar.pfs, r35 | |
82 | alloc r35=ar.pfs,2,2,2,0 | |
83 | .save rp, r34 | |
84 | mov r34=b0 | |
85 | .body | |
86 | dep.z out1=in1,2,30 | |
87 | mov out0=in0 | |
88 | ;; | |
89 | br.call.sptk.many b0=do_csum | |
90 | ;; | |
91 | andcm ret0=-1,ret0 | |
92 | mov ar.pfs=r35 | |
93 | mov b0=r34 | |
94 | br.ret.sptk.many b0 | |
95 | END(ip_fast_csum) | |
e007c533 | 96 | EXPORT_SYMBOL(ip_fast_csum) |
007d77d0 CK |
97 | |
98 | GLOBAL_ENTRY(csum_ipv6_magic) | |
99 | ld4 r20=[in0],4 | |
100 | ld4 r21=[in1],4 | |
5afe18d2 | 101 | zxt4 in2=in2 |
007d77d0 CK |
102 | ;; |
103 | ld4 r22=[in0],4 | |
104 | ld4 r23=[in1],4 | |
5afe18d2 | 105 | dep r15=in3,in2,32,16 |
007d77d0 CK |
106 | ;; |
107 | ld4 r24=[in0],4 | |
108 | ld4 r25=[in1],4 | |
5afe18d2 | 109 | mux1 r15=r15,@rev |
007d77d0 CK |
110 | add r16=r20,r21 |
111 | add r17=r22,r23 | |
5afe18d2 | 112 | zxt4 in4=in4 |
007d77d0 CK |
113 | ;; |
114 | ld4 r26=[in0],4 | |
115 | ld4 r27=[in1],4 | |
5afe18d2 | 116 | shr.u r15=r15,16 |
007d77d0 CK |
117 | add r18=r24,r25 |
118 | add r8=r16,r17 | |
119 | ;; | |
120 | add r19=r26,r27 | |
121 | add r8=r8,r18 | |
122 | ;; | |
123 | add r8=r8,r19 | |
124 | add r15=r15,in4 | |
125 | ;; | |
126 | add r8=r8,r15 | |
127 | ;; | |
128 | shr.u r10=r8,32 // now fold sum into short | |
129 | zxt4 r11=r8 | |
130 | ;; | |
131 | add r8=r10,r11 | |
132 | ;; | |
133 | shr.u r10=r8,16 // yeah, keep it rolling | |
134 | zxt2 r11=r8 | |
135 | ;; | |
136 | add r8=r10,r11 | |
137 | ;; | |
138 | shr.u r10=r8,16 // three times lucky | |
139 | zxt2 r11=r8 | |
140 | ;; | |
141 | add r8=r10,r11 | |
142 | mov r9=0xffff | |
143 | ;; | |
144 | andcm r8=r9,r8 | |
145 | br.ret.sptk.many b0 | |
146 | END(csum_ipv6_magic) | |
e007c533 | 147 | EXPORT_SYMBOL(csum_ipv6_magic) |