From b282631e367f474bbd9e5d92e2aa54b3ce44efe8 Mon Sep 17 00:00:00 2001 From: Will Schmidt Date: Wed, 18 Apr 2012 14:52:25 -0500 Subject: [PATCH] Simplify power6 wordcopy by adding [fwd|bwd]_align_merge macros. --- ChangeLog | 8 ++ sysdeps/powerpc/powerpc32/power6/wordcopy.c | 117 +++++------------ .../{powerpc32 => powerpc64}/power6/wordcopy.c | 143 +++++++++------------ 3 files changed, 103 insertions(+), 165 deletions(-) copy sysdeps/powerpc/{powerpc32 => powerpc64}/power6/wordcopy.c (64%) diff --git a/ChangeLog b/ChangeLog index e56e7e704a..cdd29819f3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2012-04-18 Will Schmidt + + * sysdeps/powerpc/powerpc64/power6/wordcopy.c: Add fwd_align_merge and + bwd_align_merge macros. + (_wordcopy_fwd_dest_aligned): Use fwd_align_merge macro calls. + (_wordcopy_bwd_dest_aligned): Use bwd_align_merge macro calls. + * sysdeps/powerpc/powerpc32/power6/wordcopy.c: Likewise. + 2012-04-18 David S. Miller * sysdeps/sparc/sparc64/memcopy.h: Delete. diff --git a/sysdeps/powerpc/powerpc32/power6/wordcopy.c b/sysdeps/powerpc/powerpc32/power6/wordcopy.c index 2594b1d578..d0df56f22c 100644 --- a/sysdeps/powerpc/powerpc32/power6/wordcopy.c +++ b/sysdeps/powerpc/powerpc32/power6/wordcopy.c @@ -1,5 +1,5 @@ /* _memcopy.c -- subroutines for memory copy functions. - Copyright (C) 1991, 1996, 2006 Free Software Foundation, Inc. + Copyright (C) 1991-2012 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Torbjorn Granlund (tege@sics.se). Updated for POWER6 by Steven Munroe (sjmunroe@us.ibm.com). @@ -65,6 +65,20 @@ _wordcopy_fwd_aligned (dstp, srcp, len) DSTP should be aligned for memory operations on `op_t's, but SRCP must *not* be aligned. */ +#define fwd_align_merge(align) \ + do \ + { \ + a1 = ((op_t *) srcp)[1]; \ + a2 = ((op_t *) srcp)[2]; \ + ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (32-align*8)); \ + ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (32-align*8)); \ + a0 = a2; \ + srcp += 2 * OPSIZ; \ + dstp += 2 * OPSIZ; \ + len -= 2; \ + } \ + while (len != 0) + void _wordcopy_fwd_dest_aligned (dstp, srcp, len) long int dstp; @@ -104,49 +118,13 @@ _wordcopy_fwd_dest_aligned (dstp, srcp, len) switch (align) { case 1: - do - { - a1 = ((op_t *) srcp)[1]; - a2 = ((op_t *) srcp)[2]; - ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (32-8)); - ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (32-8)); - a0 = a2; - - srcp += 2 * OPSIZ; - dstp += 2 * OPSIZ; - len -= 2; - } - while (len != 0); + fwd_align_merge (1); break; case 2: - do - { - a1 = ((op_t *) srcp)[1]; - a2 = ((op_t *) srcp)[2]; - ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (32-16)); - ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (32-16)); - a0 = a2; - - srcp += 2 * OPSIZ; - dstp += 2 * OPSIZ; - len -= 2; - } - while (len != 0); + fwd_align_merge (2); break; case 3: - do - { - a1 = ((op_t *) srcp)[1]; - a2 = ((op_t *) srcp)[2]; - ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (32-24)); - ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (32-24)); - a0 = a2; - - srcp += 2 * OPSIZ; - dstp += 2 * OPSIZ; - len -= 2; - } - while (len != 0); + fwd_align_merge (3); break; } @@ -191,6 +169,20 @@ _wordcopy_bwd_aligned (dstp, srcp, len) while (len != 0); } +#define bwd_align_merge(align) \ + do \ + { \ + srcp -= 2 * OPSIZ; \ + dstp -= 2 * OPSIZ; \ + a1 = ((op_t *) srcp)[1]; \ + a0 = ((op_t *) srcp)[0]; \ + ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (32-align*8)); \ + ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (32-align*8)); \ + a2 = a0; \ + len -= 2; \ + } \ + while (len != 0) + /* _wordcopy_bwd_dest_aligned -- Copy block finishing right before SRCP to block finishing right before DSTP with LEN `op_t' words (not LEN bytes!). DSTP should be aligned for memory @@ -235,52 +227,13 @@ _wordcopy_bwd_dest_aligned (dstp, srcp, len) switch (align) { case 1: - do - { - srcp -= 2 * OPSIZ; - dstp -= 2 * OPSIZ; - - a1 = ((op_t *) srcp)[1]; - a0 = ((op_t *) srcp)[0]; - ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (32-8)); - ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (32-8)); - a2 = a0; - - len -= 2; - } - while (len != 0); + bwd_align_merge (1); break; case 2: - do - { - srcp -= 2 * OPSIZ; - dstp -= 2 * OPSIZ; - - a1 = ((op_t *) srcp)[1]; - a0 = ((op_t *) srcp)[0]; - ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (32-16)); - ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (32-16)); - a2 = a0; - - len -= 2; - } - while (len != 0); + bwd_align_merge (2); break; case 3: - do - { - srcp -= 2 * OPSIZ; - dstp -= 2 * OPSIZ; - - a1 = ((op_t *) srcp)[1]; - a0 = ((op_t *) srcp)[0]; - ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (32-24)); - ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (32-24)); - a2 = a0; - - len -= 2; - } - while (len != 0); + bwd_align_merge (3); break; } } diff --git a/sysdeps/powerpc/powerpc32/power6/wordcopy.c b/sysdeps/powerpc/powerpc64/power6/wordcopy.c similarity index 64% copy from sysdeps/powerpc/powerpc32/power6/wordcopy.c copy to sysdeps/powerpc/powerpc64/power6/wordcopy.c index 2594b1d578..b18b97faab 100644 --- a/sysdeps/powerpc/powerpc32/power6/wordcopy.c +++ b/sysdeps/powerpc/powerpc64/power6/wordcopy.c @@ -1,8 +1,7 @@ /* _memcopy.c -- subroutines for memory copy functions. - Copyright (C) 1991, 1996, 2006 Free Software Foundation, Inc. + Copyright (C) 1991-2012 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Torbjorn Granlund (tege@sics.se). - Updated for POWER6 by Steven Munroe (sjmunroe@us.ibm.com). The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -60,6 +59,21 @@ _wordcopy_fwd_aligned (dstp, srcp, len) while (len != 0); } +#define fwd_align_merge(align) \ + do \ + { \ + a1 = ((op_t *) srcp)[1]; \ + a2 = ((op_t *) srcp)[2]; \ + ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (64-align*8)); \ + ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (64-align*8)); \ + a0 = a2; \ + srcp += 2 * OPSIZ; \ + dstp += 2 * OPSIZ; \ + len -= 2; \ + } \ + while (len != 0) + + /* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to block beginning at DSTP with LEN `op_t' words (not LEN bytes!). DSTP should be aligned for memory operations on `op_t's, but SRCP must @@ -104,49 +118,25 @@ _wordcopy_fwd_dest_aligned (dstp, srcp, len) switch (align) { case 1: - do - { - a1 = ((op_t *) srcp)[1]; - a2 = ((op_t *) srcp)[2]; - ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (32-8)); - ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (32-8)); - a0 = a2; - - srcp += 2 * OPSIZ; - dstp += 2 * OPSIZ; - len -= 2; - } - while (len != 0); + fwd_align_merge (1); break; case 2: - do - { - a1 = ((op_t *) srcp)[1]; - a2 = ((op_t *) srcp)[2]; - ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (32-16)); - ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (32-16)); - a0 = a2; - - srcp += 2 * OPSIZ; - dstp += 2 * OPSIZ; - len -= 2; - } - while (len != 0); + fwd_align_merge (2); break; case 3: - do - { - a1 = ((op_t *) srcp)[1]; - a2 = ((op_t *) srcp)[2]; - ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (32-24)); - ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (32-24)); - a0 = a2; - - srcp += 2 * OPSIZ; - dstp += 2 * OPSIZ; - len -= 2; - } - while (len != 0); + fwd_align_merge (3); + break; + case 4: + fwd_align_merge (4); + break; + case 5: + fwd_align_merge (5); + break; + case 6: + fwd_align_merge (6); + break; + case 7: + fwd_align_merge (7); break; } @@ -191,6 +181,20 @@ _wordcopy_bwd_aligned (dstp, srcp, len) while (len != 0); } +#define bwd_align_merge(align) \ + do \ + { \ + srcp -= 2 * OPSIZ; \ + dstp -= 2 * OPSIZ; \ + a1 = ((op_t *) srcp)[1]; \ + a0 = ((op_t *) srcp)[0]; \ + ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (64-align*8)); \ + ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (64-align*8)); \ + a2 = a0; \ + len -= 2; \ + } \ + while (len != 0) + /* _wordcopy_bwd_dest_aligned -- Copy block finishing right before SRCP to block finishing right before DSTP with LEN `op_t' words (not LEN bytes!). DSTP should be aligned for memory @@ -235,52 +239,25 @@ _wordcopy_bwd_dest_aligned (dstp, srcp, len) switch (align) { case 1: - do - { - srcp -= 2 * OPSIZ; - dstp -= 2 * OPSIZ; - - a1 = ((op_t *) srcp)[1]; - a0 = ((op_t *) srcp)[0]; - ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (32-8)); - ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (32-8)); - a2 = a0; - - len -= 2; - } - while (len != 0); + bwd_align_merge (1); break; case 2: - do - { - srcp -= 2 * OPSIZ; - dstp -= 2 * OPSIZ; - - a1 = ((op_t *) srcp)[1]; - a0 = ((op_t *) srcp)[0]; - ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (32-16)); - ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (32-16)); - a2 = a0; - - len -= 2; - } - while (len != 0); + bwd_align_merge (2); break; case 3: - do - { - srcp -= 2 * OPSIZ; - dstp -= 2 * OPSIZ; - - a1 = ((op_t *) srcp)[1]; - a0 = ((op_t *) srcp)[0]; - ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (32-24)); - ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (32-24)); - a2 = a0; - - len -= 2; - } - while (len != 0); + bwd_align_merge (3); + break; + case 4: + bwd_align_merge (4); + break; + case 5: + bwd_align_merge (5); + break; + case 6: + bwd_align_merge (6); + break; + case 7: + bwd_align_merge (7); break; } } -- 2.11.4.GIT