arch/mips/lib-32/csum_partial.S

   1 /*
   2  * This file is subject to the terms and conditions of the GNU General Public
   3  * License.  See the file "COPYING" in the main directory of this archive
   4  * for more details.
   5  *
   6  * Copyright (C) 1998 Ralf Baechle
   7  */
   8 #include <asm/asm.h>
   9 #include <asm/regdef.h>
  10
  11 #define ADDC(sum,reg)                   \
  12         addu    sum, reg;               \
  13         sltu    v1, sum, reg;           \
  14         addu    sum, v1
  15
  16 #define CSUM_BIGCHUNK(src, offset, sum, t0, t1, t2, t3) \
  17         lw      t0, (offset + 0x00)(src); \
  18         lw      t1, (offset + 0x04)(src); \
  19         lw      t2, (offset + 0x08)(src); \
  20         lw      t3, (offset + 0x0c)(src); \
  21         ADDC(sum, t0);                    \
  22         ADDC(sum, t1);                    \
  23         ADDC(sum, t2);                    \
  24         ADDC(sum, t3);                    \
  25         lw      t0, (offset + 0x10)(src); \
  26         lw      t1, (offset + 0x14)(src); \
  27         lw      t2, (offset + 0x18)(src); \
  28         lw      t3, (offset + 0x1c)(src); \
  29         ADDC(sum, t0);                    \
  30         ADDC(sum, t1);                    \
  31         ADDC(sum, t2);                    \
  32         ADDC(sum, t3);                    \
  33
  34 /*
  35  * a0: source address
  36  * a1: length of the area to checksum
  37  * a2: partial checksum
  38  */
  39
  40 #define src a0
  41 #define dest a1
  42 #define sum v0
  43
  44         .text
  45         .set    noreorder
  46
  47 /* unknown src alignment and < 8 bytes to go  */
  48 small_csumcpy:
  49         move    a1, t2
  50
  51         andi    t0, a1, 4
  52         beqz    t0, 1f
  53          andi   t0, a1, 2
  54
  55         /* Still a full word to go  */
  56         ulw     t1, (src)
  57         addiu   src, 4
  58         ADDC(sum, t1)
  59
  60 1:      move    t1, zero
  61         beqz    t0, 1f
  62          andi   t0, a1, 1
  63
  64         /* Still a halfword to go  */
  65         ulhu    t1, (src)
  66         addiu   src, 2
  67
  68 1:      beqz    t0, 1f
  69          sll    t1, t1, 16
  70
  71         lbu     t2, (src)
  72          nop
  73
  74 #ifdef __MIPSEB__
  75         sll     t2, t2, 8
  76 #endif
  77         or      t1, t2
  78
  79 1:      ADDC(sum, t1)
  80
  81         /* fold checksum */
  82         sll     v1, sum, 16
  83         addu    sum, v1
  84         sltu    v1, sum, v1
  85         srl     sum, sum, 16
  86         addu    sum, v1
  87
  88         /* odd buffer alignment? */
  89         beqz    t7, 1f
  90          nop
  91         sll     v1, sum, 8
  92         srl     sum, sum, 8
  93         or      sum, v1
  94         andi    sum, 0xffff
  95 1:
  96         .set    reorder
  97         /* Add the passed partial csum.  */
  98         ADDC(sum, a2)
  99         jr      ra
 100         .set    noreorder
 101
 102 /* ------------------------------------------------------------------------- */
 103
 104         .align  5
 105 LEAF(csum_partial)
 106         move    sum, zero
 107         move    t7, zero
 108
 109         sltiu   t8, a1, 0x8
 110         bnez    t8, small_csumcpy               /* < 8 bytes to copy */
 111          move   t2, a1
 112
 113         beqz    a1, out
 114          andi   t7, src, 0x1                    /* odd buffer? */
 115
 116 hword_align:
 117         beqz    t7, word_align
 118          andi   t8, src, 0x2
 119
 120         lbu     t0, (src)
 121         subu    a1, a1, 0x1
 122 #ifdef __MIPSEL__
 123         sll     t0, t0, 8
 124 #endif
 125         ADDC(sum, t0)
 126         addu    src, src, 0x1
 127         andi    t8, src, 0x2
 128
 129 word_align:
 130         beqz    t8, dword_align
 131          sltiu  t8, a1, 56
 132
 133         lhu     t0, (src)
 134         subu    a1, a1, 0x2
 135         ADDC(sum, t0)
 136         sltiu   t8, a1, 56
 137         addu    src, src, 0x2
 138
 139 dword_align:
 140         bnez    t8, do_end_words
 141          move   t8, a1
 142
 143         andi    t8, src, 0x4
 144         beqz    t8, qword_align
 145          andi   t8, src, 0x8
 146
 147         lw      t0, 0x00(src)
 148         subu    a1, a1, 0x4
 149         ADDC(sum, t0)
 150         addu    src, src, 0x4
 151         andi    t8, src, 0x8
 152
 153 qword_align:
 154         beqz    t8, oword_align
 155          andi   t8, src, 0x10
 156
 157         lw      t0, 0x00(src)
 158         lw      t1, 0x04(src)
 159         subu    a1, a1, 0x8
 160         ADDC(sum, t0)
 161         ADDC(sum, t1)
 162         addu    src, src, 0x8
 163         andi    t8, src, 0x10
 164
 165 oword_align:
 166         beqz    t8, begin_movement
 167          srl    t8, a1, 0x7
 168
 169         lw      t3, 0x08(src)
 170         lw      t4, 0x0c(src)
 171         lw      t0, 0x00(src)
 172         lw      t1, 0x04(src)
 173         ADDC(sum, t3)
 174         ADDC(sum, t4)
 175         ADDC(sum, t0)
 176         ADDC(sum, t1)
 177         subu    a1, a1, 0x10
 178         addu    src, src, 0x10
 179         srl     t8, a1, 0x7
 180
 181 begin_movement:
 182         beqz    t8, 1f
 183          andi   t2, a1, 0x40
 184
 185 move_128bytes:
 186         CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
 187         CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
 188         CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4)
 189         CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4)
 190         subu    t8, t8, 0x01
 191         bnez    t8, move_128bytes
 192          addu   src, src, 0x80
 193
 194 1:
 195         beqz    t2, 1f
 196          andi   t2, a1, 0x20
 197
 198 move_64bytes:
 199         CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
 200         CSUM_BIGCHUNK(src, 0x20, sum, t0, t1, t3, t4)
 201         addu    src, src, 0x40
 202
 203 1:
 204         beqz    t2, do_end_words
 205          andi   t8, a1, 0x1c
 206
 207 move_32bytes:
 208         CSUM_BIGCHUNK(src, 0x00, sum, t0, t1, t3, t4)
 209         andi    t8, a1, 0x1c
 210         addu    src, src, 0x20
 211
 212 do_end_words:
 213         beqz    t8, maybe_end_cruft
 214          srl    t8, t8, 0x2
 215
 216 end_words:
 217         lw      t0, (src)
 218         subu    t8, t8, 0x1
 219         ADDC(sum, t0)
 220         bnez    t8, end_words
 221          addu   src, src, 0x4
 222
 223 maybe_end_cruft:
 224         andi    t2, a1, 0x3
 225
 226 small_memcpy:
 227  j small_csumcpy; move a1, t2
 228         beqz    t2, out
 229          move   a1, t2
 230
 231 end_bytes:
 232         lb      t0, (src)
 233         subu    a1, a1, 0x1
 234         bnez    a2, end_bytes
 235          addu   src, src, 0x1
 236
 237 out:
 238         jr      ra
 239          move   v0, sum
 240         END(csum_partial)