From 81c6380887c6d62c56e5f0f85a241f759f58b2fd Mon Sep 17 00:00:00 2001 From: Stefan Liebler Date: Wed, 25 May 2016 17:18:05 +0200 Subject: [PATCH] S390: Optimize iso-8859-1 to ibm037 iconv-module. This patch reworks the s390 specific module which used the z900 translate one to one instruction. Now the g5 translate instruction is used, because it outperforms the troo instruction. ChangeLog: * sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c (TROO_LOOP): Rename to TR_LOOP and usage of tr instead of troo instruction. --- ChangeLog | 5 ++ sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c | 93 +++++++++++++++++----------- 2 files changed, 61 insertions(+), 37 deletions(-) diff --git a/ChangeLog b/ChangeLog index 285f4fb8dd..f303dea9d5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,10 @@ 2016-05-25 Stefan Liebler + * sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c (TROO_LOOP): + Rename to TR_LOOP and usage of tr instead of troo instruction. + +2016-05-25 Stefan Liebler + * sysdeps/s390/multiarch/gconv_simple.c: New File. * sysdeps/s390/multiarch/Makefile (sysdep_routines): Add gconv_simple. diff --git a/sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c b/sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c index c59f87f18d..3b63e6a94f 100644 --- a/sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c +++ b/sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c @@ -1,7 +1,6 @@ /* Conversion between ISO 8859-1 and IBM037. - This module uses the Z900 variant of the Translate One To One - instruction. + This module uses the translate instruction. Copyright (C) 1997-2016 Free Software Foundation, Inc. Author: Andreas Krebbel @@ -176,50 +175,70 @@ __attribute__ ((aligned (8))) = #define MIN_NEEDED_FROM 1 #define MIN_NEEDED_TO 1 -/* The Z900 variant of troo forces us to always specify a test - character which ends the translation. So if we run into the - situation where the translation has been interrupted due to the - test character we translate the character by hand and jump back - into the instruction. */ - -#define TROO_LOOP(TABLE) \ +#define TR_LOOP(TABLE) \ { \ - register const unsigned char test __asm__ ("0") = 0; \ - register const unsigned char *pTable __asm__ ("1") = TABLE; \ - register unsigned char *pOutput __asm__ ("2") = outptr; \ - register uint64_t length __asm__ ("3"); \ - const unsigned char* pInput = inptr; \ - uint64_t tmp; \ - \ - length = (inend - inptr < outend - outptr \ - ? inend - inptr : outend - outptr); \ + size_t length = (inend - inptr < outend - outptr \ + ? inend - inptr : outend - outptr); \ \ - __asm__ volatile ("0: \n\t" \ - " troo %0,%1 \n\t" \ - " jz 1f \n\t" \ - " jo 0b \n\t" \ - " llgc %3,0(%1) \n\t" \ - " la %3,0(%3,%4) \n\t" \ - " mvc 0(1,%0),0(%3) \n\t" \ - " aghi %1,1 \n\t" \ - " aghi %0,1 \n\t" \ - " aghi %2,-1 \n\t" \ - " j 0b \n\t" \ - "1: \n" \ + /* Process in 256 byte blocks. */ \ + if (__builtin_expect (length >= 256, 0)) \ + { \ + size_t blocks = length / 256; \ + __asm__ __volatile__("0: mvc 0(256,%[R_OUT]),0(%[R_IN])\n\t" \ + " tr 0(256,%[R_OUT]),0(%[R_TBL])\n\t" \ + " la %[R_IN],256(%[R_IN])\n\t" \ + " la %[R_OUT],256(%[R_OUT])\n\t" \ + " brctg %[R_LI],0b\n\t" \ + : /* outputs */ [R_IN] "+a" (inptr) \ + , [R_OUT] "+a" (outptr), [R_LI] "+d" (blocks) \ + : /* inputs */ [R_TBL] "a" (TABLE) \ + : /* clobber list */ "memory" \ + ); \ + length = length % 256; \ + } \ \ - : "+a" (pOutput), "+a" (pInput), "+d" (length), "=&a" (tmp) \ - : "a" (pTable), "d" (test) \ - : "cc"); \ + /* Process remaining 0...248 bytes in 8byte blocks. */ \ + if (length >= 8) \ + { \ + size_t blocks = length / 8; \ + for (int i = 0; i < blocks; i++) \ + { \ + outptr[0] = TABLE[inptr[0]]; \ + outptr[1] = TABLE[inptr[1]]; \ + outptr[2] = TABLE[inptr[2]]; \ + outptr[3] = TABLE[inptr[3]]; \ + outptr[4] = TABLE[inptr[4]]; \ + outptr[5] = TABLE[inptr[5]]; \ + outptr[6] = TABLE[inptr[6]]; \ + outptr[7] = TABLE[inptr[7]]; \ + inptr += 8; \ + outptr += 8; \ + } \ + length = length % 8; \ + } \ \ - inptr = pInput; \ - outptr = pOutput; \ + /* Process remaining 0...7 bytes. */ \ + switch (length) \ + { \ + case 7: outptr[6] = TABLE[inptr[6]]; \ + case 6: outptr[5] = TABLE[inptr[5]]; \ + case 5: outptr[4] = TABLE[inptr[4]]; \ + case 4: outptr[3] = TABLE[inptr[3]]; \ + case 3: outptr[2] = TABLE[inptr[2]]; \ + case 2: outptr[1] = TABLE[inptr[1]]; \ + case 1: outptr[0] = TABLE[inptr[0]]; \ + case 0: break; \ + } \ + inptr += length; \ + outptr += length; \ } + /* First define the conversion function from ISO 8859-1 to CP037. */ #define MIN_NEEDED_INPUT MIN_NEEDED_FROM #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO #define LOOPFCT FROM_LOOP -#define BODY TROO_LOOP (table_iso8859_1_to_cp037) +#define BODY TR_LOOP (table_iso8859_1_to_cp037) #include @@ -228,7 +247,7 @@ __attribute__ ((aligned (8))) = #define MIN_NEEDED_INPUT MIN_NEEDED_TO #define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM #define LOOPFCT TO_LOOP -#define BODY TROO_LOOP (table_cp037_iso8859_1); +#define BODY TR_LOOP (table_cp037_iso8859_1); #include -- 2.11.4.GIT