1 /* Optimized version of the standard strncpy() function.
2 This file is part of the GNU C Library.
3 Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
4 Contributed by Dan Pop <Dan.Pop@cern.ch>
5 and Jakub Jelinek <jakub@redhat.com>.
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library; if not, write to the Free
19 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 In this form, it assumes little endian mode.
55 alloc r2 = ar.pfs, 3, 0, 29, 32
61 mov ret0 = in0 // return value = dest
63 mov saved_pr = pr // save the predicate registers
65 mov saved_lc = ar.lc // save the loop counter
66 mov ar.ec = 0 // ec is not guaranteed to
67 // be zero upon function entry
69 cmp.geu p6, p5 = 24, in2
70 (p6) br.cond.spnt .short_len
71 sub tmp = r0, in0 ;; // tmp = -dest
73 mov dest = in0 // dest
75 and tmp = 7, tmp ;; // loopcnt = -dest % 8
76 cmp.eq p6, p7 = tmp, r0
77 adds loopcnt = -1, tmp // --loopcnt
78 (p6) br.cond.sptk .dest_aligned ;;
79 sub len = len, tmp // len -= -dest % 8
81 .l1: // copy -dest % 8 bytes
82 (p5) ld1 c = [src], 1 // c = *src++
84 st1 [dest] = c, 1 // *dest++ = c
87 (p7) br.cond.dpnt .found0_align
89 .dest_aligned: // p7 should be cleared here
90 shr.u c = len, 3 // c = len / 8
91 and sh1 = 7, src // sh1 = src % 8
92 and asrc = -8, src ;; // asrc = src & -OPSIZ -- align src
93 adds c = (MEMLAT-1), c // c = (len / 8) + MEMLAT - 1
95 mov pr.rot = 1 << 16 // set rotating predicates
96 shl sh1 = sh1, 3 ;; // sh1 = 8 * (src % 8)
97 mov ar.lc = c // "infinite" loop
98 sub sh2 = 64, sh1 // sh2 = 64 - sh1
99 cmp.eq p6, p0 = sh1, r0 // is the src aligned?
100 (p6) br.cond.sptk .src_aligned
101 adds c = -(MEMLAT-1), c ;; // c = (len / 8)
107 (p6) st8 [dest] = value, 8 // store val to dest
108 ld8.s r[0] = [asrc], 8
109 shr.u value = r[1], sh1 ;; // value = w0 >> sh1
110 czx1.r pos = value ;; // do we have an "early" zero
111 cmp.lt p7, p0 = pos, thresh // in w0 >> sh1?
112 adds len = -8, len // len -= 8
113 (p7) br.cond.dpnt .nonalign_found0
114 chk.s r[0], .recovery2 // it is safe to do that only
115 .back2: // after the previous test
116 shl tmp = r[0], sh2 // tmp = w1 << sh2
118 or value = value, tmp ;; // value |= tmp
119 czx1.r pos = value ;;
120 cmp.ne p7, p6 = 8, pos
121 (p7) br.cond.dpnt .nonalign_found0
124 br.cond.sptk .not_found0 ;;
126 cmp.gtu p6, p0 = -8, len
127 (p6) br.cond.dptk .found0
129 br.cond.sptk .not_found0 ;;
134 (p[0]) ld8.s r[0] = [src], 8
135 (p[MEMLAT]) chk.s r[MEMLAT], .recovery3
137 (p[MEMLAT]) mov value = r[MEMLAT]
138 (p[MEMLAT]) czx1.r pos = r[MEMLAT] ;;
139 (p[MEMLAT]) cmp.ne p7, p0 = 8, pos
140 (p[MEMLAT]) adds len = -8, len // len -= 8
141 (p7) br.cond.dpnt .found0
142 (p[MEMLAT]) st8 [dest] = r[MEMLAT], 8
145 chk.s r[MEMLAT-1], .recovery4
147 mov value = r[MEMLAT-1]
150 cmp.eq p5, p6 = len, r0
152 (p5) br.cond.dptk .restore_and_exit ;;
155 (p6) extr.u c = value, 0, 8 // c = value & 0xff
156 (p6) shr.u value = value, 8 ;;
158 cmp.ne p6, p0 = c, r0
160 br.cond.sptk .restore_and_exit
168 shr.u loopcnt = len, 4 // loopcnt = len / 16
170 cmp.eq p6, p0 = loopcnt, r0
171 adds loopcnt = -1, loopcnt
174 andcm value = value, c
175 mov ar.lc = loopcnt ;;
176 cmp.le p7, p0 = 8, len
177 adds dest2 = 16, dest
178 st8 [dest] = value, 8
180 (p6) br.cond.dpnt .l6 ;;
186 (p7) st8 [dest] = r0, 8
187 cmp.eq p5, p0 = len, r0
189 (p5) br.cond.dptk .restore_and_exit ;;
195 mov ar.lc = saved_lc // restore the loop counter
196 mov pr = saved_pr, -1 // restore the predicate registers
200 cmp.eq p5, p0 = in2, r0
201 adds loopcnt = -1, in2
202 (p5) br.cond.spnt .restore_and_exit ;;
203 mov ar.lc = loopcnt // p6 should be set when we get here
205 (p6) ld1 c = [in1], 1 // c = *src++
207 st1 [in0] = c, 1 // *dest++ = c
208 (p6) cmp.ne p6, p0 = c, r0
211 mov ar.lc = saved_lc // restore the loop counter
212 mov pr = saved_pr, -1 // restore the predicate registers
216 add tmp = -8, asrc ;;
217 cmp.gtu p8, p5 = c, thresh ;;
218 (p8) ld8 r[0] = [tmp]
222 add tmp = -(MEMLAT + 1) * 8, src ;;
223 ld8 r[MEMLAT] = [tmp]
226 cmp.eq p5, p6 = len, r0
227 add tmp = -MEMLAT * 8, src ;;
228 (p6) ld8 r[MEMLAT - 1] = [tmp]
229 (p5) mov r[MEMLAT - 1] = r0
232 libc_hidden_builtin_def (strncpy)