1 /* memcmp - compare memory
3 Copyright (C) 2013-2023 Free Software Foundation, Inc.
5 This file is part of the GNU C Library.
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library. If not, see
19 <https://www.gnu.org/licenses/>. */
25 * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses.
53 ldp data1, data3, [src1]
54 ldp data2, data4, [src2]
55 ccmp data1, data2, 0, ne
56 ccmp data3, data4, 0, eq
59 add src1end, src1, limit
60 add src2end, src2, limit
69 ldp data1, data3, [src1, 16]
70 ldp data2, data4, [src2, 16]
72 ccmp data3, data4, 0, eq
77 ldp data1, data3, [src1, 32]
78 ldp data2, data4, [src2, 32]
80 ccmp data3, data4, 0, eq
88 /* Compare last 1-16 bytes using unaligned access. */
90 ldp data1, data3, [src1end, -16]
91 ldp data2, data4, [src2end, -16]
94 csel data1, data1, data3, ne
95 csel data2, data2, data4, ne
97 /* Compare data bytes and set return value to 0, -1 or 1. */
105 cneg result, result, lo
110 add src1end, src1, limit
111 add src2end, src2, limit
112 tbz limit, 3, L(less8)
115 ldr data3, [src1end, -8]
116 ldr data4, [src2end, -8]
121 tbz limit, 2, L(less4)
124 ldr data3w, [src1end, -4]
125 ldr data4w, [src2end, -4]
129 tbz limit, 1, L(less2)
136 tbz limit, 0, L(return_zero)
137 ldrb data1w, [src1end, -1]
138 ldrb data2w, [src2end, -1]
139 sub result, data1w, data2w
144 ldp data1, data3, [src1, 16]
145 ldp data2, data4, [src2, 16]
147 ccmp data3, data4, 0, eq
150 /* Align src2 and adjust src1, src2 and limit. */
154 add limit, limit, tmp
156 sub limit, limit, 64 + 16
162 subs limit, limit, 64
165 eor v0.16b, v0.16b, v1.16b
166 eor v1.16b, v2.16b, v3.16b
169 umaxp v0.16b, v0.16b, v1.16b
172 eor v1.16b, v2.16b, v3.16b
173 eor v2.16b, v4.16b, v5.16b
174 umaxp v1.16b, v1.16b, v2.16b
175 umaxp v0.16b, v0.16b, v1.16b
176 umaxp v0.16b, v0.16b, v0.16b
181 /* If equal, process last 1-64 bytes using scalar loop. */
182 add limit, limit, 64 + 16
185 /* Determine the 8-byte aligned offset of the first difference. */
193 ldr data1, [src1, tmp]
194 ldr data2, [src2, tmp]
195 #ifndef __AARCH64EB__
201 cneg result, result, lo
206 weak_alias (memcmp, bcmp)
208 strong_alias (memcmp, __memcmpeq)
209 libc_hidden_builtin_def (memcmp)
210 libc_hidden_def (__memcmpeq)