release/src-rt-6.x.4708/linux/linux-2.6.36/arch/sh/lib/checksum.S

   1 /* $Id: checksum.S,v 1.10 2001/07/06 13:11:32 Exp $
   2  *
   3  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   4  *              operating system.  INET is implemented using the  BSD Socket
   5  *              interface as the means of communication with the user level.
   6  *
   7  *              IP/TCP/UDP checksumming routines
   8  *
   9  * Authors:     Jorge Cwik, <jorge@laser.satlink.net>
  10  *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
  11  *              Tom May, <ftom@netcom.com>
  12  *              Pentium Pro/II routines:
  13  *              Alexander Kjeldaas <astor@guardian.no>
  14  *              Finn Arne Gangstad <finnag@guardian.no>
  15  *              Lots of code moved from tcp.c and ip.c; see those files
  16  *              for more names.
  17  *
  18  * Changes:     Ingo Molnar, converted csum_partial_copy() to 2.1 exception
  19  *                           handling.
  20  *              Andi Kleen,  add zeroing on error
  21  *                   converted to pure assembler
  22  *
  23  * SuperH version:  Copyright (C) 1999  Niibe Yutaka
  24  *
  25  *              This program is free software; you can redistribute it and/or
  26  *              modify it under the terms of the GNU General Public License
  27  *              as published by the Free Software Foundation; either version
  28  *              2 of the License, or (at your option) any later version.
  29  */
  30
  31 #include <asm/errno.h>
  32 #include <linux/linkage.h>
  33
  34 /*
  35  * computes a partial checksum, e.g. for TCP/UDP fragments
  36  */
  37
  38 /*
  39  * asmlinkage __wsum csum_partial(const void *buf, int len, __wsum sum);
  40  */
  41
  42 .text
  43 ENTRY(csum_partial)
  44           /*
  45            * Experiments with Ethernet and SLIP connections show that buff
  46            * is aligned on either a 2-byte or 4-byte boundary.  We get at
  47            * least a twofold speedup on 486 and Pentium if it is 4-byte aligned.
  48            * Fortunately, it is easy to convert 2-byte alignment to 4-byte
  49            * alignment for the unrolled loop.
  50            */
  51         mov     r4, r0
  52         tst     #3, r0          ! Check alignment.
  53         bt/s    2f              ! Jump if alignment is ok.
  54          mov    r4, r7          ! Keep a copy to check for alignment
  55         !
  56         tst     #1, r0          ! Check alignment.
  57         bt      21f             ! Jump if alignment is boundary of 2bytes.
  58
  59         ! buf is odd
  60         tst     r5, r5
  61         add     #-1, r5
  62         bt      9f
  63         mov.b   @r4+, r0
  64         extu.b  r0, r0
  65         addc    r0, r6          ! t=0 from previous tst
  66         mov     r6, r0
  67         shll8   r6
  68         shlr16  r0
  69         shlr8   r0
  70         or      r0, r6
  71         mov     r4, r0
  72         tst     #2, r0
  73         bt      2f
  74 21:
  75         ! buf is 2 byte aligned (len could be 0)
  76         add     #-2, r5         ! Alignment uses up two bytes.
  77         cmp/pz  r5              !
  78         bt/s    1f              ! Jump if we had at least two bytes.
  79          clrt
  80         bra     6f
  81          add    #2, r5          ! r5 was < 2.  Deal with it.
  82 1:
  83         mov.w   @r4+, r0
  84         extu.w  r0, r0
  85         addc    r0, r6
  86         bf      2f
  87         add     #1, r6
  88 2:
  89         ! buf is 4 byte aligned (len could be 0)
  90         mov     r5, r1
  91         mov     #-5, r0
  92         shld    r0, r1
  93         tst     r1, r1
  94         bt/s    4f              ! if it's =0, go to 4f
  95          clrt
  96         .align  2
  97 3:
  98         mov.l   @r4+, r0
  99         mov.l   @r4+, r2
 100         mov.l   @r4+, r3
 101         addc    r0, r6
 102         mov.l   @r4+, r0
 103         addc    r2, r6
 104         mov.l   @r4+, r2
 105         addc    r3, r6
 106         mov.l   @r4+, r3
 107         addc    r0, r6
 108         mov.l   @r4+, r0
 109         addc    r2, r6
 110         mov.l   @r4+, r2
 111         addc    r3, r6
 112         addc    r0, r6
 113         addc    r2, r6
 114         movt    r0
 115         dt      r1
 116         bf/s    3b
 117          cmp/eq #1, r0
 118         ! here, we know r1==0
 119         addc    r1, r6                  ! add carry to r6
 120 4:
 121         mov     r5, r0
 122         and     #0x1c, r0
 123         tst     r0, r0
 124         bt      6f
 125         ! 4 bytes or more remaining
 126         mov     r0, r1
 127         shlr2   r1
 128         mov     #0, r2
 129 5:
 130         addc    r2, r6
 131         mov.l   @r4+, r2
 132         movt    r0
 133         dt      r1
 134         bf/s    5b
 135          cmp/eq #1, r0
 136         addc    r2, r6
 137         addc    r1, r6          ! r1==0 here, so it means add carry-bit
 138 6:
 139         ! 3 bytes or less remaining
 140         mov     #3, r0
 141         and     r0, r5
 142         tst     r5, r5
 143         bt      9f              ! if it's =0 go to 9f
 144         mov     #2, r1
 145         cmp/hs  r1, r5
 146         bf      7f
 147         mov.w   @r4+, r0
 148         extu.w  r0, r0
 149         cmp/eq  r1, r5
 150         bt/s    8f
 151          clrt
 152         shll16  r0
 153         addc    r0, r6
 154 7:
 155         mov.b   @r4+, r0
 156         extu.b  r0, r0
 157 #ifndef __LITTLE_ENDIAN__
 158         shll8   r0
 159 #endif
 160 8:
 161         addc    r0, r6
 162         mov     #0, r0
 163         addc    r0, r6
 164 9:
 165         ! Check if the buffer was misaligned, if so realign sum
 166         mov     r7, r0
 167         tst     #1, r0
 168         bt      10f
 169         mov     r6, r0
 170         shll8   r6
 171         shlr16  r0
 172         shlr8   r0
 173         or      r0, r6
 174 10:
 175         rts
 176          mov    r6, r0
 177
 178 /*
 179 unsigned int csum_partial_copy_generic (const char *src, char *dst, int len,
 180                                         int sum, int *src_err_ptr, int *dst_err_ptr)
 181  */
 182
 183
 184 #define SRC(...)                        \
 185         9999: __VA_ARGS__ ;             \
 186         .section __ex_table, "a";       \
 187         .long 9999b, 6001f      ;       \
 188         .previous
 189
 190 #define DST(...)                        \
 191         9999: __VA_ARGS__ ;             \
 192         .section __ex_table, "a";       \
 193         .long 9999b, 6002f      ;       \
 194         .previous
 195
 196 !
 197 ! r4:   const char *SRC
 198 ! r5:   char *DST
 199 ! r6:   int LEN
 200 ! r7:   int SUM
 201 !
 202 ! on stack:
 203 ! int *SRC_ERR_PTR
 204 ! int *DST_ERR_PTR
 205 !
 206 ENTRY(csum_partial_copy_generic)
 207         mov.l   r5,@-r15
 208         mov.l   r6,@-r15
 209
 210         mov     #3,r0           ! Check src and dest are equally aligned
 211         mov     r4,r1
 212         and     r0,r1
 213         and     r5,r0
 214         cmp/eq  r1,r0
 215         bf      3f              ! Different alignments, use slow version
 216         tst     #1,r0           ! Check dest word aligned
 217         bf      3f              ! If not, do it the slow way
 218
 219         mov     #2,r0
 220         tst     r0,r5           ! Check dest alignment.
 221         bt      2f              ! Jump if alignment is ok.
 222         add     #-2,r6          ! Alignment uses up two bytes.
 223         cmp/pz  r6              ! Jump if we had at least two bytes.
 224         bt/s    1f
 225          clrt
 226         add     #2,r6           ! r6 was < 2.   Deal with it.
 227         bra     4f
 228          mov    r6,r2
 229
 230 3:      ! Handle different src and dest alignments.
 231         ! This is not common, so simple byte by byte copy will do.
 232         mov     r6,r2
 233         shlr    r6
 234         tst     r6,r6
 235         bt      4f
 236         clrt
 237         .align  2
 238 5:
 239 SRC(    mov.b   @r4+,r1         )
 240 SRC(    mov.b   @r4+,r0         )
 241         extu.b  r1,r1
 242 DST(    mov.b   r1,@r5          )
 243 DST(    mov.b   r0,@(1,r5)      )
 244         extu.b  r0,r0
 245         add     #2,r5
 246
 247 #ifdef  __LITTLE_ENDIAN__
 248         shll8   r0
 249 #else
 250         shll8   r1
 251 #endif
 252         or      r1,r0
 253
 254         addc    r0,r7
 255         movt    r0
 256         dt      r6
 257         bf/s    5b
 258          cmp/eq #1,r0
 259         mov     #0,r0
 260         addc    r0, r7
 261
 262         mov     r2, r0
 263         tst     #1, r0
 264         bt      7f
 265         bra     5f
 266          clrt
 267
 268         ! src and dest equally aligned, but to a two byte boundary.
 269         ! Handle first two bytes as a special case
 270         .align  2
 271 1:
 272 SRC(    mov.w   @r4+,r0         )
 273 DST(    mov.w   r0,@r5          )
 274         add     #2,r5
 275         extu.w  r0,r0
 276         addc    r0,r7
 277         mov     #0,r0
 278         addc    r0,r7
 279 2:
 280         mov     r6,r2
 281         mov     #-5,r0
 282         shld    r0,r6
 283         tst     r6,r6
 284         bt/s    2f
 285          clrt
 286         .align  2
 287 1:
 288 SRC(    mov.l   @r4+,r0         )
 289 SRC(    mov.l   @r4+,r1         )
 290         addc    r0,r7
 291 DST(    mov.l   r0,@r5          )
 292 DST(    mov.l   r1,@(4,r5)      )
 293         addc    r1,r7
 294
 295 SRC(    mov.l   @r4+,r0         )
 296 SRC(    mov.l   @r4+,r1         )
 297         addc    r0,r7
 298 DST(    mov.l   r0,@(8,r5)      )
 299 DST(    mov.l   r1,@(12,r5)     )
 300         addc    r1,r7
 301
 302 SRC(    mov.l   @r4+,r0         )
 303 SRC(    mov.l   @r4+,r1         )
 304         addc    r0,r7
 305 DST(    mov.l   r0,@(16,r5)     )
 306 DST(    mov.l   r1,@(20,r5)     )
 307         addc    r1,r7
 308
 309 SRC(    mov.l   @r4+,r0         )
 310 SRC(    mov.l   @r4+,r1         )
 311         addc    r0,r7
 312 DST(    mov.l   r0,@(24,r5)     )
 313 DST(    mov.l   r1,@(28,r5)     )
 314         addc    r1,r7
 315         add     #32,r5
 316         movt    r0
 317         dt      r6
 318         bf/s    1b
 319          cmp/eq #1,r0
 320         mov     #0,r0
 321         addc    r0,r7
 322
 323 2:      mov     r2,r6
 324         mov     #0x1c,r0
 325         and     r0,r6
 326         cmp/pl  r6
 327         bf/s    4f
 328          clrt
 329         shlr2   r6
 330 3:
 331 SRC(    mov.l   @r4+,r0 )
 332         addc    r0,r7
 333 DST(    mov.l   r0,@r5  )
 334         add     #4,r5
 335         movt    r0
 336         dt      r6
 337         bf/s    3b
 338          cmp/eq #1,r0
 339         mov     #0,r0
 340         addc    r0,r7
 341 4:      mov     r2,r6
 342         mov     #3,r0
 343         and     r0,r6
 344         cmp/pl  r6
 345         bf      7f
 346         mov     #2,r1
 347         cmp/hs  r1,r6
 348         bf      5f
 349 SRC(    mov.w   @r4+,r0 )
 350 DST(    mov.w   r0,@r5  )
 351         extu.w  r0,r0
 352         add     #2,r5
 353         cmp/eq  r1,r6
 354         bt/s    6f
 355          clrt
 356         shll16  r0
 357         addc    r0,r7
 358 5:
 359 SRC(    mov.b   @r4+,r0 )
 360 DST(    mov.b   r0,@r5  )
 361         extu.b  r0,r0
 362 #ifndef __LITTLE_ENDIAN__
 363         shll8   r0
 364 #endif
 365 6:      addc    r0,r7
 366         mov     #0,r0
 367         addc    r0,r7
 368 7:
 369 5000:
 370
 371 # Exception handler:
 372 .section .fixup, "ax"
 373
 374 6001:
 375         mov.l   @(8,r15),r0                     ! src_err_ptr
 376         mov     #-EFAULT,r1
 377         mov.l   r1,@r0
 378
 379         ! zero the complete destination - computing the rest
 380         ! is too much work
 381         mov.l   @(4,r15),r5             ! dst
 382         mov.l   @r15,r6                 ! len
 383         mov     #0,r7
 384 1:      mov.b   r7,@r5
 385         dt      r6
 386         bf/s    1b
 387          add    #1,r5
 388         mov.l   8000f,r0
 389         jmp     @r0
 390          nop
 391         .align  2
 392 8000:   .long   5000b
 393
 394 6002:
 395         mov.l   @(12,r15),r0                    ! dst_err_ptr
 396         mov     #-EFAULT,r1
 397         mov.l   r1,@r0
 398         mov.l   8001f,r0
 399         jmp     @r0
 400          nop
 401         .align  2
 402 8001:   .long   5000b
 403
 404 .previous
 405         add     #8,r15
 406         rts
 407          mov    r7,r0