1 /* Vector optimized 32/64 bit S/390 version of strrchr.
2 Copyright (C) 2015-2017 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
19 #if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)
22 # include "asm-syntax.h"
26 /* char *strrchr (const char *s, int c)
27 Locate the last character c in string.
30 -r0=loaded bytes in first part of s.
31 -r1=pointer to last occurence of c or NULL if not found.
37 -v17=index of found element
39 -v19=part of s with last occurence of c.
44 .machinemode "zarch_nohighgprs"
46 vlbb %v16,0(%r2),6 /* Load s until next 4k-byte boundary. */
47 lcbb %r0,0(%r2),6 /* Get bytes to 4k-byte boundary or 16. */
49 vlvgb %v18,%r3,0 /* Generate vector which elements are all c.
50 if c > 255, c will be truncated. */
53 lghi %r1,-1 /* Currently no c found. */
54 lghi %r5,0 /* current_len = 0. */
56 vfeezbs %v17,%v16,%v18 /* Find element equal or zero. */
57 vlgvb %r4,%v17,7 /* Load byte index of c/zero or 16. */
58 clrjl %r4,%r0,.Lfound_first_part /* Found c/zero in loaded bytes. */
60 /* Align s to 16 byte. */
61 risbgn %r4,%r2,60,128+63,0 /* %r3 = bits 60-63 of %r2 'and' 15. */
62 lghi %r5,16 /* current_len = 16. */
63 slr %r5,%r4 /* Compute bytes to 16bytes boundary. */
66 vl %v16,0(%r5,%r2) /* Load s. */
67 vfeezbs %v17,%v16,%v18 /* Find element equal with zero search. */
68 jno .Lfound /* Found c/zero (cc=0|1|2). */
70 vfeezbs %v17,%v16,%v18
73 vfeezbs %v17,%v16,%v18
76 vfeezbs %v17,%v16,%v18
80 j .Lloop /* No character and no zero -> loop. */
83 la %r5,16(%r5) /* Use la since aghi would clobber cc. */
89 je .Lzero /* Found zero, but no c before that zero. */
90 /* Save this part of s to check for further matches after reaching
91 the end of the complete string. */
95 jh .Lzero /* Found a zero after the found c. */
96 aghi %r5,16 /* Start search of next part of s. */
100 /* This code is only executed if the found c/zero is whithin loaded
101 bytes. If no c/zero was found (cc==3) the found index = 16, thus
102 this code is not called.
103 Resulting condition code of vector find element equal:
104 cc==0: no c, found zero
105 cc==1: c found, no zero
106 cc==2: c found, found zero after c
107 cc==3: no c, no zero (this case can be ignored). */
108 je .Lzero /* Found zero, but no c before that zero. */
110 locgrne %r1,%r5 /* Mark c as found in first part of s. */
113 jl .Lalign /* No zero (e.g. if vr was fully loaded)
114 -> Align and loop afterwards. */
116 /* Found a zero in vr. If vr was not fully loaded due to block
117 boundary, the remaining bytes are filled with zero and we can't
118 rely on zero indication of condition code here! */
120 vfenezb %v17,%v16,%v16 /* Find zero. */
121 vlgvb %r4,%v17,7 /* Load byte index of zero or 16. */
122 clrjl %r4,%r0,.Lzero /* Zero within loaded bytes -> end. */
123 j .Lalign /* Align and loop afterwards. */
126 vlgvb %r4,%v17,7 /* Load byte index of zero. */
128 la %r2,0(%r5,%r2) /* Return pointer to zero. */
132 /* Reached end of string. Check if one c was found before. */
133 clije %r3,0,.Lend_searched_zero /* Found zero and c is zero. */
135 cgfi %r1,-1 /* No c found -> return NULL. */
139 larl %r3,.Lpermute_mask /* Load permute mask. */
142 /* c was found and is part of v19. */
143 vfenezb %v17,%v19,%v19 /* Find zero. */
144 vlgvb %r4,%v17,7 /* Load byte index of zero or 16. */
146 clgfi %r5,0 /* Loaded byte count in v19 is 16, ... */
147 lochine %r0,16 /* ... if v19 is not the first part of s. */
148 ahi %r0,-1 /* Convert byte count to highest index. */
151 locrl %r4,%r0 /* r4 = min (zero-index, highest-index). */
153 /* Right-shift of v19 to mask bytes after zero. */
154 clije %r4,15,.Lzero_permute /* No shift is needed if highest index
157 slr %r0,%r4 /* Compute byte count for vector shift right. */
158 sll %r0,3 /* Convert to bit count. */
160 vsrlb %v19,%v19,%v17 /* Vector shift right by byte by number of bytes
161 specified in bits 1-4 of byte 7 in v17. */
163 /* Reverse bytes in v19. */
165 vperm %v19,%v19,%v19,%v20 /* Permute v19 to reversed order. */
167 /* Find c in reversed v19. */
168 vfeeb %v19,%v19,%v18 /* Find c. */
170 vlgvb %r3,%v19,7 /* Load byte index of c. */
172 /* Compute index in real s and return. */
174 la %r2,0(%r4,%r2) /* Return pointer to zero. */
177 .byte 0x0F,0x0E,0x0D,0x0C,0x0B,0x0A,0x09,0x08
178 .byte 0x07,0x06,0x05,0x04,0x03,0x02,0x01,0x00
180 #endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */