arch/cris/arch-v10/lib/checksum.S

   1 /* $Id: checksum.S,v 1.1 2001/12/17 13:59:27 bjornw Exp $
   2  * A fast checksum routine using movem
   3  * Copyright (c) 1998-2001 Axis Communications AB
   4  *
   5  * csum_partial(const unsigned char * buff, int len, unsigned int sum)
   6  */
   7
   8         .globl  csum_partial
   9 csum_partial:
  10
  11         ;; r10 - src
  12         ;; r11 - length
  13         ;; r12 - checksum
  14
  15         ;; check for breakeven length between movem and normal word looping versions
  16         ;; we also do _NOT_ want to compute a checksum over more than the
  17         ;; actual length when length < 40
  18
  19         cmpu.w  80,$r11
  20         blo     _word_loop
  21         nop
  22
  23         ;; need to save the registers we use below in the movem loop
  24         ;; this overhead is why we have a check above for breakeven length
  25         ;; only r0 - r8 have to be saved, the other ones are clobber-able
  26         ;; according to the ABI
  27
  28         subq    9*4,$sp
  29         movem   $r8,[$sp]
  30
  31         ;; do a movem checksum
  32
  33         subq    10*4,$r11       ; update length for the first loop
  34
  35 _mloop: movem   [$r10+],$r9     ; read 10 longwords
  36
  37         ;; perform dword checksumming on the 10 longwords
  38
  39         add.d   $r0,$r12
  40         ax
  41         add.d   $r1,$r12
  42         ax
  43         add.d   $r2,$r12
  44         ax
  45         add.d   $r3,$r12
  46         ax
  47         add.d   $r4,$r12
  48         ax
  49         add.d   $r5,$r12
  50         ax
  51         add.d   $r6,$r12
  52         ax
  53         add.d   $r7,$r12
  54         ax
  55         add.d   $r8,$r12
  56         ax
  57         add.d   $r9,$r12
  58
  59         ;; fold the carry into the checksum, to avoid having to loop the carry
  60         ;; back into the top
  61
  62         ax
  63         addq    0,$r12
  64         ax                      ; do it again, since we might have generated a carry
  65         addq    0,$r12
  66
  67         subq    10*4,$r11
  68         bge     _mloop
  69         nop
  70
  71         addq    10*4,$r11       ; compensate for last loop underflowing length
  72
  73         movem   [$sp+],$r8      ; restore regs
  74
  75 _word_loop:
  76         ;; only fold if there is anything to fold.
  77
  78         cmpq    0,$r12
  79         beq     _no_fold
  80
  81         ;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below.
  82         ;; r9 and r13 can be used as temporaries.
  83
  84         moveq   -1,$r9          ; put 0xffff in r9, faster than move.d 0xffff,r9
  85         lsrq    16,$r9
  86
  87         move.d  $r12,$r13
  88         lsrq    16,$r13         ; r13 = checksum >> 16
  89         and.d   $r9,$r12                ; checksum = checksum & 0xffff
  90         add.d   $r13,$r12               ; checksum += r13
  91         move.d  $r12,$r13               ; do the same again, maybe we got a carry last add
  92         lsrq    16,$r13
  93         and.d   $r9,$r12
  94         add.d   $r13,$r12
  95
  96 _no_fold:
  97         cmpq    2,$r11
  98         blt     _no_words
  99         nop
 100
 101         ;; checksum the rest of the words
 102
 103         subq    2,$r11
 104
 105 _wloop: subq    2,$r11
 106         bge     _wloop
 107         addu.w  [$r10+],$r12
 108
 109         addq    2,$r11
 110
 111 _no_words:
 112         ;; see if we have one odd byte more
 113         cmpq    1,$r11
 114         beq     _do_byte
 115         nop
 116         ret
 117         move.d  $r12, $r10
 118
 119 _do_byte:
 120         ;; copy and checksum the last byte
 121         addu.b  [$r10],$r12
 122         ret
 123         move.d  $r12, $r10
 124