Update copyright notices with scripts/update-copyrights
[glibc.git] / ports / sysdeps / alpha / alphaev5 / lshift.S
blob459221f25653e26d3ddfabab82948893a6e092aa
1  # Alpha EV5 __mpn_lshift --
3  # Copyright (C) 1994-2014 Free Software Foundation, Inc.
5  # This file is part of the GNU MP Library.
7  # The GNU MP Library is free software; you can redistribute it and/or modify
8  # it under the terms of the GNU Lesser General Public License as published by
9  # the Free Software Foundation; either version 2.1 of the License, or (at your
10  # option) any later version.
12  # The GNU MP Library is distributed in the hope that it will be useful, but
13  # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14  # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15  # License for more details.
17  # You should have received a copy of the GNU Lesser General Public License
18  # along with the GNU MP Library.  If not, see <http://www.gnu.org/licenses/>.
21  # INPUT PARAMETERS
22  # res_ptr      r16
23  # s1_ptr       r17
24  # size         r18
25  # cnt          r19
27  # This code runs at 3.25 cycles/limb on the EV5.
29         .set    noreorder
30         .set    noat
31 .text
32         .align  3
33         .globl  __mpn_lshift
34         .ent    __mpn_lshift
35 __mpn_lshift:
36         .frame  $30,0,$26,0
38         s8addq  $18,$17,$17     # make r17 point at end of s1
39         ldq     $4,-8($17)      # load first limb
40         subq    $31,$19,$20
41         s8addq  $18,$16,$16     # make r16 point at end of RES
42         subq    $18,1,$18
43         and     $18,4-1,$28     # number of limbs in first loop
44         srl     $4,$20,$0       # compute function result
46         beq     $28,.L0
47         subq    $18,$28,$18
49         .align  3
50 .Loop0: ldq     $3,-16($17)
51         subq    $16,8,$16
52         sll     $4,$19,$5
53         subq    $17,8,$17
54         subq    $28,1,$28
55         srl     $3,$20,$6
56         or      $3,$3,$4
57         or      $5,$6,$8
58         stq     $8,0($16)
59         bne     $28,.Loop0
61 .L0:    sll     $4,$19,$24
62         beq     $18,.Lend
63  # warm up phase 1
64         ldq     $1,-16($17)
65         subq    $18,4,$18
66         ldq     $2,-24($17)
67         ldq     $3,-32($17)
68         ldq     $4,-40($17)
69         beq     $18,.Lend1
70  # warm up phase 2
71         srl     $1,$20,$7
72         sll     $1,$19,$21
73         srl     $2,$20,$8
74         ldq     $1,-48($17)
75         sll     $2,$19,$22
76         ldq     $2,-56($17)
77         srl     $3,$20,$5
78         or      $7,$24,$7
79         sll     $3,$19,$23
80         or      $8,$21,$8
81         srl     $4,$20,$6
82         ldq     $3,-64($17)
83         sll     $4,$19,$24
84         ldq     $4,-72($17)
85         subq    $18,4,$18
86         beq     $18,.Lend2
87         .align  4
88  # main loop
89 .Loop:  stq     $7,-8($16)
90         or      $5,$22,$5
91         stq     $8,-16($16)
92         or      $6,$23,$6
94         srl     $1,$20,$7
95         subq    $18,4,$18
96         sll     $1,$19,$21
97         unop    # ldq   $31,-96($17)
99         srl     $2,$20,$8
100         ldq     $1,-80($17)
101         sll     $2,$19,$22
102         ldq     $2,-88($17)
104         stq     $5,-24($16)
105         or      $7,$24,$7
106         stq     $6,-32($16)
107         or      $8,$21,$8
109         srl     $3,$20,$5
110         unop    # ldq   $31,-96($17)
111         sll     $3,$19,$23
112         subq    $16,32,$16
114         srl     $4,$20,$6
115         ldq     $3,-96($17)
116         sll     $4,$19,$24
117         ldq     $4,-104($17)
119         subq    $17,32,$17
120         bne     $18,.Loop
121  # cool down phase 2/1
122 .Lend2: stq     $7,-8($16)
123         or      $5,$22,$5
124         stq     $8,-16($16)
125         or      $6,$23,$6
126         srl     $1,$20,$7
127         sll     $1,$19,$21
128         srl     $2,$20,$8
129         sll     $2,$19,$22
130         stq     $5,-24($16)
131         or      $7,$24,$7
132         stq     $6,-32($16)
133         or      $8,$21,$8
134         srl     $3,$20,$5
135         sll     $3,$19,$23
136         srl     $4,$20,$6
137         sll     $4,$19,$24
138  # cool down phase 2/2
139         stq     $7,-40($16)
140         or      $5,$22,$5
141         stq     $8,-48($16)
142         or      $6,$23,$6
143         stq     $5,-56($16)
144         stq     $6,-64($16)
145  # cool down phase 2/3
146         stq     $24,-72($16)
147         ret     $31,($26),1
149  # cool down phase 1/1
150 .Lend1: srl     $1,$20,$7
151         sll     $1,$19,$21
152         srl     $2,$20,$8
153         sll     $2,$19,$22
154         srl     $3,$20,$5
155         or      $7,$24,$7
156         sll     $3,$19,$23
157         or      $8,$21,$8
158         srl     $4,$20,$6
159         sll     $4,$19,$24
160  # cool down phase 1/2
161         stq     $7,-8($16)
162         or      $5,$22,$5
163         stq     $8,-16($16)
164         or      $6,$23,$6
165         stq     $5,-24($16)
166         stq     $6,-32($16)
167         stq     $24,-40($16)
168         ret     $31,($26),1
170 .Lend:  stq     $24,-8($16)
171         ret     $31,($26),1
172         .end    __mpn_lshift