1 /* Optimized version of the standard strcpy() function.
2 This file is part of the GNU C Library.
3 Copyright (C) 2000, 2001, 2003 Free Software Foundation, Inc.
4 Contributed by Dan Pop <Dan.Pop@cern.ch>.
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 In this form, it assumes little endian mode. For big endian mode, the
28 the two shifts in .l2 must be inverted:
30 shl value = r[1], sh1 // value = w0 << sh1
31 shr.u tmp = r[0], sh2 // tmp = w1 >> sh2
56 alloc r2 = ar.pfs, 2, 0, 30, 32
62 mov ret0 = in0 // return value = dest
64 mov saved_pr = pr // save the predicate registers
66 mov saved_lc = ar.lc // save the loop counter
68 sub tmp = r0, in0 ;; // tmp = -dest
69 mov dest = in0 // dest
71 and loopcnt = 7, tmp ;; // loopcnt = -dest % 8
72 cmp.eq p6, p0 = loopcnt, r0
73 adds loopcnt = -1, loopcnt // --loopcnt
74 (p6) br.cond.sptk .dest_aligned ;;
76 .l1: // copy -dest % 8 bytes
77 ld1 c = [src], 1 // c = *src++
79 st1 [dest] = c, 1 // *dest++ = c
81 (p6) br.cond.dpnt .restore_and_exit
84 and sh1 = 7, src // sh1 = src % 8
85 mov ar.lc = -1 // "infinite" loop
86 and asrc = -8, src ;; // asrc = src & -OPSIZ -- align src
88 mov pr.rot = 1 << 16 // set rotating predicates
89 cmp.ne p7, p0 = r0, r0 // clear p7
90 shl sh1 = sh1, 3 ;; // sh1 = 8 * (src % 8)
91 sub sh2 = 64, sh1 // sh2 = 64 - sh1
92 cmp.eq p6, p0 = sh1, r0 // is the src aligned?
93 (p6) br.cond.sptk .src_aligned ;;
94 ld8 r[1] = [asrc],8 ;;
98 ld8.s r[0] = [asrc], 8
99 shr.u value = r[1], sh1 ;; // value = w0 >> sh1
100 czx1.r pos = value ;; // do we have an "early" zero
101 cmp.lt p7, p0 = pos, thresh // in w0 >> sh1?
102 (p7) br.cond.dpnt .found0
103 chk.s r[0], .recovery2 // it is safe to do that only
104 .back2: // after the previous test
105 shl tmp = r[0], sh2 // tmp = w1 << sh2
107 or value = value, tmp ;; // value |= tmp
108 czx1.r pos = value ;;
109 cmp.ne p7, p0 = 8, pos
110 (p7) br.cond.dpnt .found0
111 st8 [dest] = value, 8 // store val to dest
115 (p[0]) ld8.s r[0] = [src], 8
116 (p[MEMLAT]) chk.s r[MEMLAT], .recovery3
118 (p[MEMLAT]) mov value = r[MEMLAT]
119 (p[MEMLAT]) czx1.r pos = r[MEMLAT] ;;
120 (p[MEMLAT]) cmp.ne p7, p0 = 8, pos
121 (p7) br.cond.dpnt .found0
122 (p[MEMLAT]) st8 [dest] = r[MEMLAT], 8
127 extr.u c = value, 0, 8 // c = value & 0xff
128 shr.u value = value, 8
133 mov ar.lc = saved_lc // restore the loop counter
134 mov pr = saved_pr, -1 // restore the predicate registers
137 add tmp = -8, asrc ;;
141 add tmp = -(MEMLAT + 1) * 8, src ;;
142 ld8 r[MEMLAT] = [tmp]
145 libc_hidden_builtin_def (strcpy)