From 64a78dd8f45d4260c9cdd1fdd2cad2c4264404a4 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Tue, 22 Jul 2003 02:13:49 +0000 Subject: [PATCH] Doing the address arithmetic in C both files become identical ... --- arch/mips/mm-32/Makefile | 2 +- arch/mips/mm-64/Makefile | 2 +- arch/mips/mm-64/pg-sb1.c | 214 --------------------------------------- arch/mips/mm/Makefile | 5 +- arch/mips/{mm-32 => mm}/pg-sb1.c | 117 +++++++++++---------- 5 files changed, 62 insertions(+), 278 deletions(-) delete mode 100644 arch/mips/mm-64/pg-sb1.c rename arch/mips/{mm-32 => mm}/pg-sb1.c (63%) diff --git a/arch/mips/mm-32/Makefile b/arch/mips/mm-32/Makefile index c88dc8b3af4..ede5b20685c 100644 --- a/arch/mips/mm-32/Makefile +++ b/arch/mips/mm-32/Makefile @@ -15,6 +15,6 @@ obj-$(CONFIG_CPU_RM7000) += pg-r4k.o tlbex-r4k.o obj-$(CONFIG_CPU_R10000) += pg-r4k.o tlbex-r4k.o obj-$(CONFIG_CPU_MIPS32) += pg-r4k.o tlbex-r4k.o obj-$(CONFIG_CPU_MIPS64) += pg-r4k.o tlbex-r4k.o -obj-$(CONFIG_CPU_SB1) += pg-sb1.o tlbex-r4k.o +obj-$(CONFIG_CPU_SB1) += tlbex-r4k.o EXTRA_AFLAGS := $(CFLAGS) diff --git a/arch/mips/mm-64/Makefile b/arch/mips/mm-64/Makefile index 5f8efa1fef0..d2b50453cab 100644 --- a/arch/mips/mm-64/Makefile +++ b/arch/mips/mm-64/Makefile @@ -11,7 +11,7 @@ obj-$(CONFIG_CPU_NEVADA) += pg-r4k.o tlb-glue-r4k.o obj-$(CONFIG_CPU_R5432) += pg-r4k.o tlb-glue-r4k.o obj-$(CONFIG_CPU_RM7000) += pg-r4k.o tlb-glue-r4k.o obj-$(CONFIG_CPU_R10000) += pg-r4k.o tlb-glue-r4k.o -obj-$(CONFIG_CPU_SB1) += pg-sb1.o tlb-glue-sb1.o +obj-$(CONFIG_CPU_SB1) += tlb-glue-sb1.o obj-$(CONFIG_CPU_MIPS64) += pg-r4k.o tlb-glue-r4k.o # diff --git a/arch/mips/mm-64/pg-sb1.c b/arch/mips/mm-64/pg-sb1.c deleted file mode 100644 index 8fe52311a8c..00000000000 --- a/arch/mips/mm-64/pg-sb1.c +++ /dev/null @@ -1,214 +0,0 @@ -/* - * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com) - * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org) - * Copyright (C) 2000 SiByte, Inc. - * - * Written by Justin Carlson of SiByte, Inc. - * and Kip Walker of Broadcom Corp. - * - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version 2 - * of the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. - */ - -#include -#include -#include - -#include -#include -#include -#include - -#ifdef CONFIG_SB1_PASS_1_WORKAROUNDS -#define SB1_PREF_LOAD_STREAMED_HINT "0" -#define SB1_PREF_STORE_STREAMED_HINT "1" -#else -#define SB1_PREF_LOAD_STREAMED_HINT "4" -#define SB1_PREF_STORE_STREAMED_HINT "5" -#endif - -/* These are the functions hooked by the memory management function pointers */ -void sb1_clear_page(void *page) -{ - /* - * JDCXXX - This should be bottlenecked by the write buffer, but these - * things tend to be mildly unpredictable...should check this on the - * performance model - * - * We prefetch 4 lines ahead. We're also "cheating" slightly here... - * since we know we're on an SB1, we force the assembler to take - * 64-bit operands to speed things up - */ - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - ".set noat \n" - ".set mips4 \n" - " daddiu $1, %0, %2 \n" /* Calculate the end of the page to clear */ -#ifdef CONFIG_CPU_HAS_PREFETCH - " pref " SB1_PREF_STORE_STREAMED_HINT ", 0(%0) \n" /* Prefetch the first 4 lines */ - " pref " SB1_PREF_STORE_STREAMED_HINT ", 32(%0) \n" - " pref " SB1_PREF_STORE_STREAMED_HINT ", 64(%0) \n" - " pref " SB1_PREF_STORE_STREAMED_HINT ", 96(%0) \n" -#endif - "1: sd $0, 0(%0) \n" /* Throw out a cacheline of 0's */ - " sd $0, 8(%0) \n" - " sd $0, 16(%0) \n" - " sd $0, 24(%0) \n" -#ifdef CONFIG_CPU_HAS_PREFETCH - " pref " SB1_PREF_STORE_STREAMED_HINT ",128(%0) \n" /* Prefetch 4 lines ahead */ -#endif - " bne $1, %0, 1b \n" - " daddiu %0, %0, 32\n" /* Next cacheline (This instruction better be short piped!) */ - ".set pop \n" - : "=r" (page) - : "0" (page), "I" (PAGE_SIZE-32) - : "memory"); - -} - -void sb1_copy_page(void *to, void *from) -{ - /* - * This should be optimized in assembly...can't use ld/sd, though, - * because the top 32 bits could be nuked if we took an interrupt - * during the routine. And this is not a good place to be cli()'ing - * - * The pref's used here are using "streaming" hints, which cause the - * copied data to be kicked out of the cache sooner. A page copy often - * ends up copying a lot more data than is commonly used, so this seems - * to make sense in terms of reducing cache pollution, but I've no real - * performance data to back this up - */ - - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - ".set noat \n" - ".set mips4 \n" - " daddiu $1, %0, %4 \n" /* Calculate the end of the page to copy */ -#ifdef CONFIG_CPU_HAS_PREFETCH - " pref " SB1_PREF_LOAD_STREAMED_HINT ", 0(%0) \n" /* Prefetch the first 3 lines */ - " pref " SB1_PREF_STORE_STREAMED_HINT ", 0(%1) \n" - " pref " SB1_PREF_LOAD_STREAMED_HINT ", 32(%0) \n" - " pref " SB1_PREF_STORE_STREAMED_HINT ", 32(%1) \n" - " pref " SB1_PREF_LOAD_STREAMED_HINT ", 64(%0) \n" - " pref " SB1_PREF_STORE_STREAMED_HINT ", 64(%1) \n" -#endif - "1: lw $2, 0(%0) \n" /* Block copy a cacheline */ - " lw $3, 4(%0) \n" - " lw $4, 8(%0) \n" - " lw $5, 12(%0) \n" - " lw $6, 16(%0) \n" - " lw $7, 20(%0) \n" - " lw $8, 24(%0) \n" - " lw $9, 28(%0) \n" -#ifdef CONFIG_CPU_HAS_PREFETCH - " pref " SB1_PREF_LOAD_STREAMED_HINT ", 96(%0) \n" /* Prefetch ahead */ - " pref " SB1_PREF_STORE_STREAMED_HINT ", 96(%1) \n" -#endif - " sw $2, 0(%1) \n" - " sw $3, 4(%1) \n" - " sw $4, 8(%1) \n" - " sw $5, 12(%1) \n" - " sw $6, 16(%1) \n" - " sw $7, 20(%1) \n" - " sw $8, 24(%1) \n" - " sw $9, 28(%1) \n" - " daddiu %1, %1, 32 \n" /* Next cacheline */ - " nop \n" /* Force next add to short pipe */ - " nop \n" /* Force next add to short pipe */ - " bne $1, %0, 1b \n" - " daddiu %0, %0, 32 \n" /* Next cacheline */ - ".set pop \n" - : "=r" (to), "=r" (from) - : "0" (from), "1" (to), "I" (PAGE_SIZE-32) - : "$2","$3","$4","$5","$6","$7","$8","$9","memory"); -} - - -#ifdef CONFIG_SIBYTE_DMA_PAGEOPS - -/* - * Pad descriptors to cacheline, since each is exclusively owned by a - * particular CPU. - */ -typedef struct dmadscr_s { - uint64_t dscr_a; - uint64_t dscr_b; - uint64_t pad_a; - uint64_t pad_b; -} dmadscr_t; - -static dmadscr_t page_descr[NR_CPUS] __attribute__((aligned(SMP_CACHE_BYTES))); - -void sb1_dma_init(void) -{ - int cpu = smp_processor_id(); - uint64_t base_val = PHYSADDR(&page_descr[cpu]) | V_DM_DSCR_BASE_RINGSZ(1); - - __raw_writeq(base_val, - IO_SPACE_BASE + A_DM_REGISTER(cpu, R_DM_DSCR_BASE)); - __raw_writeq(base_val | M_DM_DSCR_BASE_RESET, - IO_SPACE_BASE + A_DM_REGISTER(cpu, R_DM_DSCR_BASE)); - __raw_writeq(base_val | M_DM_DSCR_BASE_ENABL, - IO_SPACE_BASE + A_DM_REGISTER(cpu, R_DM_DSCR_BASE)); -} - -void sb1_clear_page_dma(void *page) -{ - int cpu = smp_processor_id(); - - /* if the page is above Kseg0, use old way */ - if (KSEGX(page) != K0BASE) - return sb1_clear_page(page); - - page_descr[cpu].dscr_a = PHYSADDR(page) | M_DM_DSCRA_ZERO_MEM | M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT; - page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE); - __raw_writeq(1, IO_SPACE_BASE + A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)); - - /* - * Don't really want to do it this way, but there's no - * reliable way to delay completion detection. - */ - while (!(__raw_readq(IO_SPACE_BASE + A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)) & M_DM_DSCR_BASE_INTERRUPT)) - ; - __raw_readq(IO_SPACE_BASE + A_DM_REGISTER(cpu, R_DM_DSCR_BASE)); -} - -void sb1_copy_page_dma(void *to, void *from) -{ - unsigned long from_phys = PHYSADDR(from); - unsigned long to_phys = PHYSADDR(to); - int cpu = smp_processor_id(); - - /* if either page is above Kseg0, use old way */ - if ((KSEGX(to) != K0BASE) || (KSEGX(from) != K0BASE)) - return sb1_copy_page(to, from); - - page_descr[cpu].dscr_a = PHYSADDR(to_phys) | M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT; - page_descr[cpu].dscr_b = PHYSADDR(from_phys) | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE); - __raw_writeq(1, IO_SPACE_BASE + A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)); - - /* - * Don't really want to do it this way, but there's no - * reliable way to delay completion detection. - */ - while (!(__raw_readq(IO_SPACE_BASE + A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)) & M_DM_DSCR_BASE_INTERRUPT)) - ; - __raw_readq(IO_SPACE_BASE + A_DM_REGISTER(cpu, R_DM_DSCR_BASE)); -} - -#endif diff --git a/arch/mips/mm/Makefile b/arch/mips/mm/Makefile index 12b7a76c61a..a0d2aa124d0 100644 --- a/arch/mips/mm/Makefile +++ b/arch/mips/mm/Makefile @@ -18,8 +18,9 @@ obj-$(CONFIG_CPU_R4X00) += c-r4k.o tlb-r4k.o obj-$(CONFIG_CPU_R5000) += c-r4k.o tlb-r4k.o obj-$(CONFIG_CPU_R5432) += c-r4k.o tlb-r4k.o obj-$(CONFIG_CPU_RM7000) += c-r4k.o tlb-r4k.o -obj-$(CONFIG_CPU_SB1) += c-sb1.o cerr-sb1.o cex-sb1.o tlb-sb1.o -obj-$(CONFIG_CPU_TX39XX) += pg-r3k.o c-tx39.o tlb-r3k.o tlbex-r3k.o +obj-$(CONFIG_CPU_SB1) += c-sb1.o cerr-sb1.o cex-sb1.o pg-sb1.o \ + tlb-sb1.o +obj-$(CONFIG_CPU_TX39XX) += c-tx39.o pg-r3k.o tlb-r3k.o tlbex-r3k.o obj-$(CONFIG_CPU_TX49XX) += c-r4k.o tlb-r4k.o obj-$(CONFIG_CPU_VR41XX) += c-r4k.o tlb-r4k.o diff --git a/arch/mips/mm-32/pg-sb1.c b/arch/mips/mm/pg-sb1.c similarity index 63% rename from arch/mips/mm-32/pg-sb1.c rename to arch/mips/mm/pg-sb1.c index a5d47d87298..59fc95a0e2d 100644 --- a/arch/mips/mm-32/pg-sb1.c +++ b/arch/mips/mm/pg-sb1.c @@ -21,7 +21,6 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - #include #include #include @@ -42,6 +41,9 @@ /* These are the functions hooked by the memory management function pointers */ void sb1_clear_page(void *page) { + unsigned char *addr = (unsigned char *) page; + unsigned char *end = addr + PAGE_SIZE; + /* * JDCXXX - This should be bottlenecked by the write buffer, but these * things tend to be mildly unpredictable...should check this on the @@ -51,36 +53,36 @@ void sb1_clear_page(void *page) * since we know we're on an SB1, we force the assembler to take * 64-bit operands to speed things up */ - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - ".set noat \n" - ".set mips4 \n" - " addiu $1, %0, %2 \n" /* Calculate the end of the page to clear */ + do { + __asm__ __volatile__( + " .set mips4 \n" #ifdef CONFIG_CPU_HAS_PREFETCH - " pref " SB1_PREF_STORE_STREAMED_HINT ", 0(%0) \n" /* Prefetch the first 4 lines */ - " pref " SB1_PREF_STORE_STREAMED_HINT ", 32(%0) \n" - " pref " SB1_PREF_STORE_STREAMED_HINT ", 64(%0) \n" - " pref " SB1_PREF_STORE_STREAMED_HINT ", 96(%0) \n" + " pref " SB1_PREF_STORE_STREAMED_HINT ", 0(%0) \n" /* Prefetch the first 4 lines */ + " pref " SB1_PREF_STORE_STREAMED_HINT ", 32(%0) \n" + " pref " SB1_PREF_STORE_STREAMED_HINT ", 64(%0) \n" + " pref " SB1_PREF_STORE_STREAMED_HINT ", 96(%0) \n" #endif - "1: sd $0, 0(%0) \n" /* Throw out a cacheline of 0's */ - " sd $0, 8(%0) \n" - " sd $0, 16(%0) \n" - " sd $0, 24(%0) \n" + "1: sd $0, 0(%0) \n" /* Throw out a cacheline of 0's */ + " sd $0, 8(%0) \n" + " sd $0, 16(%0) \n" + " sd $0, 24(%0) \n" #ifdef CONFIG_CPU_HAS_PREFETCH - " pref " SB1_PREF_STORE_STREAMED_HINT ",128(%0) \n" /* Prefetch 4 lines ahead */ + " pref " SB1_PREF_STORE_STREAMED_HINT ",128(%0) \n" /* Prefetch 4 lines ahead */ #endif - " bne $1, %0, 1b \n" - " addiu %0, %0, 32 \n" /* Next cacheline (This instruction better be short piped!) */ - ".set pop \n" - : "=r" (page) - : "0" (page), "I" (PAGE_SIZE-32) + " .set mips0 \n" + : + : "r" (addr) : "memory"); - + addr += 32; + } while (addr != end); } void sb1_copy_page(void *to, void *from) { + unsigned char *src = from; + unsigned char *dst = to; + unsigned char *end = src + PAGE_SIZE; + /* * This should be optimized in assembly...can't use ld/sd, though, * because the top 32 bits could be nuked if we took an interrupt @@ -93,49 +95,44 @@ void sb1_copy_page(void *to, void *from) * performance data to back this up */ - __asm__ __volatile__( - ".set push \n" - ".set noreorder \n" - ".set noat \n" - ".set mips4 \n" - " addiu $1, %0, %4 \n" /* Calculate the end of the page to copy */ + do { + __asm__ __volatile__( + " .set mips4 \n" #ifdef CONFIG_CPU_HAS_PREFETCH - " pref " SB1_PREF_LOAD_STREAMED_HINT ", 0(%0) \n" /* Prefetch the first 3 lines */ - " pref " SB1_PREF_STORE_STREAMED_HINT ", 0(%1) \n" - " pref " SB1_PREF_LOAD_STREAMED_HINT ", 32(%0) \n" - " pref " SB1_PREF_STORE_STREAMED_HINT ", 32(%1) \n" - " pref " SB1_PREF_LOAD_STREAMED_HINT ", 64(%0) \n" - " pref " SB1_PREF_STORE_STREAMED_HINT ", 64(%1) \n" + " pref " SB1_PREF_LOAD_STREAMED_HINT ", 0(%0)\n" /* Prefetch the first 3 lines */ + " pref " SB1_PREF_STORE_STREAMED_HINT ", 0(%1)\n" + " pref " SB1_PREF_LOAD_STREAMED_HINT ", 32(%0)\n" + " pref " SB1_PREF_STORE_STREAMED_HINT ", 32(%1)\n" + " pref " SB1_PREF_LOAD_STREAMED_HINT ", 64(%0)\n" + " pref " SB1_PREF_STORE_STREAMED_HINT ", 64(%1)\n" #endif - "1: lw $2, 0(%0) \n" /* Block copy a cacheline */ - " lw $3, 4(%0) \n" - " lw $4, 8(%0) \n" - " lw $5, 12(%0) \n" - " lw $6, 16(%0) \n" - " lw $7, 20(%0) \n" - " lw $8, 24(%0) \n" - " lw $9, 28(%0) \n" + "1: lw $2, 0(%0) \n" /* Block copy a cacheline */ + " lw $3, 4(%0) \n" + " lw $4, 8(%0) \n" + " lw $5, 12(%0) \n" + " lw $6, 16(%0) \n" + " lw $7, 20(%0) \n" + " lw $8, 24(%0) \n" + " lw $9, 28(%0) \n" #ifdef CONFIG_CPU_HAS_PREFETCH - " pref " SB1_PREF_LOAD_STREAMED_HINT ", 96(%0) \n" /* Prefetch ahead */ - " pref " SB1_PREF_STORE_STREAMED_HINT ", 96(%1) \n" + " pref " SB1_PREF_LOAD_STREAMED_HINT ", 96(%0) \n" /* Prefetch ahead */ + " pref " SB1_PREF_STORE_STREAMED_HINT ", 96(%1) \n" #endif - " sw $2, 0(%1) \n" - " sw $3, 4(%1) \n" - " sw $4, 8(%1) \n" - " sw $5, 12(%1) \n" - " sw $6, 16(%1) \n" - " sw $7, 20(%1) \n" - " sw $8, 24(%1) \n" - " sw $9, 28(%1) \n" - " addiu %1, %1, 32 \n" /* Next cacheline */ - " nop \n" /* Force next add to short pipe */ - " nop \n" /* Force next add to short pipe */ - " bne $1, %0, 1b \n" - " addiu %0, %0, 32 \n" /* Next cacheline */ - ".set pop \n" - : "=r" (to), "=r" (from) - : "0" (from), "1" (to), "I" (PAGE_SIZE-32) + " sw $2, 0(%1) \n" + " sw $3, 4(%1) \n" + " sw $4, 8(%1) \n" + " sw $5, 12(%1) \n" + " sw $6, 16(%1) \n" + " sw $7, 20(%1) \n" + " sw $8, 24(%1) \n" + " sw $9, 28(%1) \n" + " .set mips0 \n" + : + : "r" (src), "r" (dst) : "$2","$3","$4","$5","$6","$7","$8","$9","memory"); + src += 32; + dst += 32; + } while (src != end); } -- 2.11.4.GIT