sysdeps/aarch64/strchrnul.S

   1 /* strchrnul - find a character or nul in a string
   2
   3    Copyright (C) 2014-2024 Free Software Foundation, Inc.
   4
   5    This file is part of the GNU C Library.
   6
   7    The GNU C Library is free software; you can redistribute it and/or
   8    modify it under the terms of the GNU Lesser General Public
   9    License as published by the Free Software Foundation; either
  10    version 2.1 of the License, or (at your option) any later version.
  11
  12    The GNU C Library is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    Lesser General Public License for more details.
  16
  17    You should have received a copy of the GNU Lesser General Public
  18    License along with the GNU C Library.  If not, see
  19    <https://www.gnu.org/licenses/>.  */
  20
  21 #include <sysdep.h>
  22
  23 /* Assumptions:
  24  *
  25  * ARMv8-a, AArch64, Advanced SIMD.
  26  * MTE compatible.
  27  */
  28
  29 #define srcin           x0
  30 #define chrin           w1
  31 #define result          x0
  32
  33 #define src             x2
  34 #define tmp1            x1
  35 #define tmp2            x3
  36
  37 #define vrepchr         v0
  38 #define vdata           v1
  39 #define qdata           q1
  40 #define vhas_nul        v2
  41 #define vhas_chr        v3
  42 #define vend            v4
  43 #define dend            d4
  44
  45 /*
  46    Core algorithm:
  47    For each 16-byte chunk we calculate a 64-bit nibble mask value with four bits
  48    per byte. We take 4 bits of every comparison byte with shift right and narrow
  49    by 4 instruction. Since the bits in the nibble mask reflect the order in
  50    which things occur in the original string, counting leading zeros identifies
  51    exactly which byte matched.  */
  52
  53 ENTRY (__strchrnul)
  54         PTR_ARG (0)
  55         bic     src, srcin, 15
  56         dup     vrepchr.16b, chrin
  57         ld1     {vdata.16b}, [src]
  58         cmeq    vhas_chr.16b, vdata.16b, vrepchr.16b
  59         cmhs    vhas_chr.16b, vhas_chr.16b, vdata.16b
  60         lsl     tmp2, srcin, 2
  61         shrn    vend.8b, vhas_chr.8h, 4         /* 128->64 */
  62         fmov    tmp1, dend
  63         lsr     tmp1, tmp1, tmp2        /* Mask padding bits.  */
  64         cbz     tmp1, L(loop)
  65
  66         rbit    tmp1, tmp1
  67         clz     tmp1, tmp1
  68         add     result, srcin, tmp1, lsr 2
  69         ret
  70
  71         .p2align 4
  72 L(loop):
  73         ldr     qdata, [src, 16]
  74         cmeq    vhas_chr.16b, vdata.16b, vrepchr.16b
  75         cmhs    vhas_chr.16b, vhas_chr.16b, vdata.16b
  76         umaxp   vend.16b, vhas_chr.16b, vhas_chr.16b
  77         fmov    tmp1, dend
  78         cbnz    tmp1, L(end)
  79         ldr     qdata, [src, 32]!
  80         cmeq    vhas_chr.16b, vdata.16b, vrepchr.16b
  81         cmhs    vhas_chr.16b, vhas_chr.16b, vdata.16b
  82         umaxp   vend.16b, vhas_chr.16b, vhas_chr.16b
  83         fmov    tmp1, dend
  84         cbz     tmp1, L(loop)
  85         sub     src, src, 16
  86 L(end):
  87         shrn    vend.8b, vhas_chr.8h, 4         /* 128->64 */
  88         add     src, src, 16
  89         fmov    tmp1, dend
  90 #ifndef __AARCH64EB__
  91         rbit    tmp1, tmp1
  92 #endif
  93         clz     tmp1, tmp1
  94         add     result, src, tmp1, lsr 2
  95         ret
  96
  97 END(__strchrnul)
  98 libc_hidden_def (__strchrnul)
  99 weak_alias (__strchrnul, strchrnul)