sysdeps/aarch64/strchrnul.S

   1 /* strchrnul - find a character or nul in a string
   2
   3    Copyright (C) 2014-2021 Free Software Foundation, Inc.
   4
   5    This file is part of the GNU C Library.
   6
   7    The GNU C Library is free software; you can redistribute it and/or
   8    modify it under the terms of the GNU Lesser General Public
   9    License as published by the Free Software Foundation; either
  10    version 2.1 of the License, or (at your option) any later version.
  11
  12    The GNU C Library is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15    Lesser General Public License for more details.
  16
  17    You should have received a copy of the GNU Lesser General Public
  18    License along with the GNU C Library.  If not, see
  19    <https://www.gnu.org/licenses/>.  */
  20
  21 #include <sysdep.h>
  22
  23 /* Assumptions:
  24  *
  25  * ARMv8-a, AArch64, Advanced SIMD.
  26  * MTE compatible.
  27  */
  28
  29 #define srcin           x0
  30 #define chrin           w1
  31 #define result          x0
  32
  33 #define src             x2
  34 #define tmp1            x1
  35 #define tmp2            x3
  36 #define tmp2w           w3
  37
  38 #define vrepchr         v0
  39 #define vdata           v1
  40 #define qdata           q1
  41 #define vhas_nul        v2
  42 #define vhas_chr        v3
  43 #define vrepmask        v4
  44 #define vend            v5
  45 #define dend            d5
  46
  47 /* Core algorithm:
  48
  49    For each 16-byte chunk we calculate a 64-bit syndrome value with four bits
  50    per byte. For even bytes, bits 0-3 are set if the relevant byte matched the
  51    requested character or the byte is NUL. Bits 4-7 must be zero. Bits 4-7 are
  52    set likewise for odd bytes so that adjacent bytes can be merged. Since the
  53    bits in the syndrome reflect the order in which things occur in the original
  54    string, counting trailing zeros identifies exactly which byte matched.  */
  55
  56 ENTRY (__strchrnul)
  57         PTR_ARG (0)
  58         bic     src, srcin, 15
  59         dup     vrepchr.16b, chrin
  60         ld1     {vdata.16b}, [src]
  61         mov     tmp2w, 0xf00f
  62         dup     vrepmask.8h, tmp2w
  63         cmeq    vhas_chr.16b, vdata.16b, vrepchr.16b
  64         cmhs    vhas_chr.16b, vhas_chr.16b, vdata.16b
  65         lsl     tmp2, srcin, 2
  66         and     vhas_chr.16b, vhas_chr.16b, vrepmask.16b
  67         addp    vend.16b, vhas_chr.16b, vhas_chr.16b            /* 128->64 */
  68         fmov    tmp1, dend
  69         lsr     tmp1, tmp1, tmp2        /* Mask padding bits.  */
  70         cbz     tmp1, L(loop)
  71
  72         rbit    tmp1, tmp1
  73         clz     tmp1, tmp1
  74         add     result, srcin, tmp1, lsr 2
  75         ret
  76
  77         .p2align 4
  78 L(loop):
  79         ldr     qdata, [src, 16]!
  80         cmeq    vhas_chr.16b, vdata.16b, vrepchr.16b
  81         cmhs    vhas_chr.16b, vhas_chr.16b, vdata.16b
  82         umaxp   vend.16b, vhas_chr.16b, vhas_chr.16b
  83         fmov    tmp1, dend
  84         cbz     tmp1, L(loop)
  85
  86         and     vhas_chr.16b, vhas_chr.16b, vrepmask.16b
  87         addp    vend.16b, vhas_chr.16b, vhas_chr.16b            /* 128->64 */
  88         fmov    tmp1, dend
  89 #ifndef __AARCH64EB__
  90         rbit    tmp1, tmp1
  91 #endif
  92         clz     tmp1, tmp1
  93         add     result, src, tmp1, lsr 2
  94         ret
  95
  96 END(__strchrnul)
  97 weak_alias (__strchrnul, strchrnul)