1 /* Optimized strncmp implementation for PowerPC64/POWER9.
2 Copyright (C) 2016-2018 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
18 #ifdef __LITTLE_ENDIAN__
21 /* Implements the function
23 int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t [r5] n)
25 The implementation uses unaligned doubleword access to avoid specialized
26 code paths depending of data alignment for first 32 bytes and uses
27 vectorised loops after that. */
30 # define STRNCMP strncmp
33 /* TODO: Change this to actual instructions when minimum binutils is upgraded
34 to 2.27. Macros are defined below for these newer instructions in order
35 to maintain compatibility. */
36 # define VCTZLSBB(r,v) .long (0x10010602 | ((r)<<(32-11)) | ((v)<<(32-21)))
38 # define VEXTUBRX(t,a,b) .long (0x1000070d \
43 # define VCMPNEZB(t,a,b) .long (0x10000507 \
48 /* Get 16 bytes for unaligned case.
49 reg1: Vector to hold next 16 bytes.
50 reg2: Address to read from.
51 reg3: Permute control vector. */
52 # define GET16BYTES(reg1, reg2, reg3) \
54 vperm v8, v2, reg1, reg3; \
55 vcmpequb. v8, v0, v8; \
66 vperm reg1, v9, reg1, reg3;
68 /* TODO: change this to .machine power9 when minimum binutils
69 is upgraded to 2.27. */
71 ENTRY_TOCLESS (STRNCMP, 4)
72 /* Check if size is 0. */
77 /* Check if [s1]+32 or [s2]+32 will cross a 4K page boundary using
80 (((size_t) s1) % PAGE_SIZE > (PAGE_SIZE - ITER_SIZE))
82 with PAGE_SIZE being 4096 and ITER_SIZE begin 32. */
84 cmpldi cr7, r8, 4096-32
87 cmpldi cr7, r9, 4096-32
90 /* For short strings up to 32 bytes, load both s1 and s2 using
91 unaligned dwords and compare. */
99 bne cr0, L(different1)
101 /* If the strings compared are equal, but size is less or equal
113 bne cr0, L(different1)
117 /* Update pointers and size. */
128 bne cr0, L(different1)
139 bne cr0, L(different1)
144 /* Update pointers and size. */
149 /* Now it has checked for first 32 bytes, align source1 to doubleword
150 and adjust source2 address. */
156 lvsr v6, 0, r4 /* Compute mask. */
161 /* Both s1 and s2 are unaligned. */
162 GET16BYTES(v5, r4, v6)
163 lvsr v10, 0, r3 /* Compute mask. */
166 GET16BYTES(v4, r3, v10)
171 /* Align s1 to qw and adjust s2 address. */
184 /* There are 2 loops depending on the input alignment.
185 Each loop gets 16 bytes from s1 and s2, checks for null
186 and compares them. Loops until a mismatch or null occurs. */
189 GET16BYTES(v5, r4, v6)
191 bne cr6, L(different)
199 GET16BYTES(v5, r4, v6)
201 bne cr6, L(different)
209 GET16BYTES(v5, r4, v6)
211 bne cr6, L(different)
219 GET16BYTES(v5, r4, v6)
221 bne cr6, L(different)
233 bne cr6, L(different)
243 bne cr6, L(different)
253 bne cr6, L(different)
263 bne cr6, L(different)
270 /* Calculate and return the difference. */
288 /* The code now checks if r8 and r5 are different by issuing a
289 cmpb and shifts the result based on its output:
291 leadzero = (__builtin_ffsl (z1) - 1);
292 leadzero = leadzero > (n-1)*8 ? (n-1)*8 : leadzero;
293 r1 = (r1 >> leadzero) & 0xFFUL;
294 r2 = (r2 >> leadzero) & 0xFFUL;
307 ble cr7, L(different2)
320 /* If unaligned 16 bytes reads across a 4K page boundary, it uses
321 a simple byte a byte comparison until the page alignment for s1
329 bne cr7, L(byte_ne_3)
331 beq cr7, L(byte_ne_0)
347 bne cr7, L(byte_ne_2)
348 beq cr5, L(byte_ne_0)
353 bdnz L(pagecross_loop0)
376 libc_hidden_builtin_def(strncmp)
378 #include <sysdeps/powerpc/powerpc64/power8/strncmp.S>