arch/mips/mm/pg-sb1.c

   1 /*
   2  * Copyright (C) 1996 David S. Miller (dm@engr.sgi.com)
   3  * Copyright (C) 1997, 2001 Ralf Baechle (ralf@gnu.org)
   4  * Copyright (C) 2000 SiByte, Inc.
   5  *
   6  * Written by Justin Carlson of SiByte, Inc.
   7  *         and Kip Walker of Broadcom Corp.
   8  *
   9  *
  10  * This program is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU General Public License
  12  * as published by the Free Software Foundation; either version 2
  13  * of the License, or (at your option) any later version.
  14  *
  15  * This program is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18  * GNU General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU General Public License
  21  * along with this program; if not, write to the Free Software
  22  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  23  */
  24 #include <linux/config.h>
  25 #include <linux/module.h>
  26 #include <linux/sched.h>
  27 #include <linux/smp.h>
  28
  29 #include <asm/io.h>
  30 #include <asm/sibyte/sb1250.h>
  31 #include <asm/sibyte/sb1250_regs.h>
  32 #include <asm/sibyte/sb1250_dma.h>
  33
  34 #ifdef CONFIG_SB1_PASS_1_WORKAROUNDS
  35 #define SB1_PREF_LOAD_STREAMED_HINT "0"
  36 #define SB1_PREF_STORE_STREAMED_HINT "1"
  37 #else
  38 #define SB1_PREF_LOAD_STREAMED_HINT "4"
  39 #define SB1_PREF_STORE_STREAMED_HINT "5"
  40 #endif
  41
  42 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS
  43 static inline void clear_page_cpu(void *page)
  44 #else
  45 void clear_page(void *page)
  46 #endif
  47 {
  48         unsigned char *addr = (unsigned char *) page;
  49         unsigned char *end = addr + PAGE_SIZE;
  50
  51         /*
  52          * JDCXXX - This should be bottlenecked by the write buffer, but these
  53          * things tend to be mildly unpredictable...should check this on the
  54          * performance model
  55          *
  56          * We prefetch 4 lines ahead.  We're also "cheating" slightly here...
  57          * since we know we're on an SB1, we force the assembler to take
  58          * 64-bit operands to speed things up
  59          */
  60         do {
  61                 __asm__ __volatile__(
  62                 "       .set    mips4           \n"
  63 #ifdef CONFIG_CPU_HAS_PREFETCH
  64                 "       pref    " SB1_PREF_STORE_STREAMED_HINT ",  0(%0)  \n"  /* Prefetch the first 4 lines */
  65                 "       pref    " SB1_PREF_STORE_STREAMED_HINT ", 32(%0)  \n"
  66                 "       pref    " SB1_PREF_STORE_STREAMED_HINT ", 64(%0)  \n"
  67                 "       pref    " SB1_PREF_STORE_STREAMED_HINT ", 96(%0)  \n"
  68 #endif
  69                 "1:     sd      $0,  0(%0)      \n"  /* Throw out a cacheline of 0's */
  70                 "       sd      $0,  8(%0)      \n"
  71                 "       sd      $0, 16(%0)      \n"
  72                 "       sd      $0, 24(%0)      \n"
  73 #ifdef CONFIG_CPU_HAS_PREFETCH
  74                 "       pref    " SB1_PREF_STORE_STREAMED_HINT ",128(%0)  \n"  /* Prefetch 4 lines ahead     */
  75 #endif
  76                 "       .set    mips0           \n"
  77                 :
  78                 : "r" (addr)
  79                 : "memory");
  80                 addr += 32;
  81         } while (addr != end);
  82 }
  83
  84 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS
  85 static inline void copy_page_cpu(void *to, void *from)
  86 #else
  87 void copy_page(void *to, void *from)
  88 #endif
  89 {
  90         unsigned char *src = from;
  91         unsigned char *dst = to;
  92         unsigned char *end = src + PAGE_SIZE;
  93
  94         /*
  95          * This should be optimized in assembly...can't use ld/sd, though,
  96          * because the top 32 bits could be nuked if we took an interrupt
  97          * during the routine.  And this is not a good place to be cli()'ing
  98          *
  99          * The pref's used here are using "streaming" hints, which cause the
 100          * copied data to be kicked out of the cache sooner.  A page copy often
 101          * ends up copying a lot more data than is commonly used, so this seems
 102          * to make sense in terms of reducing cache pollution, but I've no real
 103          * performance data to back this up
 104          */
 105
 106         do {
 107                 __asm__ __volatile__(
 108                 "       .set    mips4                                   \n"
 109 #ifdef CONFIG_CPU_HAS_PREFETCH
 110                 "       pref    " SB1_PREF_LOAD_STREAMED_HINT  ",  0(%0)\n"  /* Prefetch the first 3 lines */
 111                 "       pref    " SB1_PREF_STORE_STREAMED_HINT ",  0(%1)\n"
 112                 "       pref    " SB1_PREF_LOAD_STREAMED_HINT  ",  32(%0)\n"
 113                 "       pref    " SB1_PREF_STORE_STREAMED_HINT ",  32(%1)\n"
 114                 "       pref    " SB1_PREF_LOAD_STREAMED_HINT  ",  64(%0)\n"
 115                 "       pref    " SB1_PREF_STORE_STREAMED_HINT ",  64(%1)\n"
 116 #endif
 117                 "1:     lw      $2,  0(%0)      \n"  /* Block copy a cacheline */
 118                 "       lw      $3,  4(%0)      \n"
 119                 "       lw      $4,  8(%0)      \n"
 120                 "       lw      $5, 12(%0)      \n"
 121                 "       lw      $6, 16(%0)      \n"
 122                 "       lw      $7, 20(%0)      \n"
 123                 "       lw      $8, 24(%0)      \n"
 124                 "       lw      $9, 28(%0)      \n"
 125 #ifdef CONFIG_CPU_HAS_PREFETCH
 126                 "       pref    " SB1_PREF_LOAD_STREAMED_HINT  ", 96(%0)  \n"  /* Prefetch ahead         */
 127                 "       pref    " SB1_PREF_STORE_STREAMED_HINT ", 96(%1)  \n"
 128 #endif
 129                 "       sw      $2,  0(%1)      \n"
 130                 "       sw      $3,  4(%1)      \n"
 131                 "       sw      $4,  8(%1)      \n"
 132                 "       sw      $5, 12(%1)      \n"
 133                 "       sw      $6, 16(%1)      \n"
 134                 "       sw      $7, 20(%1)      \n"
 135                 "       sw      $8, 24(%1)      \n"
 136                 "       sw      $9, 28(%1)      \n"
 137                 "       .set    mips0           \n"
 138                 :
 139                 : "r" (src), "r" (dst)
 140                 : "$2","$3","$4","$5","$6","$7","$8","$9","memory");
 141                 src += 32;
 142                 dst += 32;
 143         } while (src != end);
 144 }
 145
 146
 147 #ifdef CONFIG_SIBYTE_DMA_PAGEOPS
 148
 149 /*
 150  * Pad descriptors to cacheline, since each is exclusively owned by a
 151  * particular CPU.
 152  */
 153 typedef struct dmadscr_s {
 154         uint64_t  dscr_a;
 155         uint64_t  dscr_b;
 156         uint64_t  pad_a;
 157         uint64_t  pad_b;
 158 } dmadscr_t;
 159
 160 static dmadscr_t page_descr[NR_CPUS] __attribute__((aligned(SMP_CACHE_BYTES)));
 161
 162 void sb1_dma_init(void)
 163 {
 164         int cpu = smp_processor_id();
 165         uint64_t base_val = PHYSADDR(&page_descr[cpu]) | V_DM_DSCR_BASE_RINGSZ(1);
 166
 167         __raw_writeq(base_val,
 168                      IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
 169         __raw_writeq(base_val | M_DM_DSCR_BASE_RESET,
 170                      IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
 171         __raw_writeq(base_val | M_DM_DSCR_BASE_ENABL,
 172                      IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
 173 }
 174
 175 void clear_page(void *page)
 176 {
 177         int cpu = smp_processor_id();
 178
 179         /* if the page is above Kseg0, use old way */
 180         if (KSEGX(page) != CAC_BASE)
 181                 return clear_page_cpu(page);
 182
 183         page_descr[cpu].dscr_a = PHYSADDR(page) | M_DM_DSCRA_ZERO_MEM | M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT;
 184         page_descr[cpu].dscr_b = V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
 185         __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
 186
 187         /*
 188          * Don't really want to do it this way, but there's no
 189          * reliable way to delay completion detection.
 190          */
 191         while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)) & M_DM_DSCR_BASE_INTERRUPT)))
 192                 ;
 193         __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
 194 }
 195
 196 void copy_page(void *to, void *from)
 197 {
 198         unsigned long from_phys = PHYSADDR(from);
 199         unsigned long to_phys = PHYSADDR(to);
 200         int cpu = smp_processor_id();
 201
 202         /* if either page is above Kseg0, use old way */
 203         if ((KSEGX(to) != CAC_BASE) || (KSEGX(from) != CAC_BASE))
 204                 return copy_page_cpu(to, from);
 205
 206         page_descr[cpu].dscr_a = PHYSADDR(to_phys) | M_DM_DSCRA_L2C_DEST | M_DM_DSCRA_INTERRUPT;
 207         page_descr[cpu].dscr_b = PHYSADDR(from_phys) | V_DM_DSCRB_SRC_LENGTH(PAGE_SIZE);
 208         __raw_writeq(1, IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_COUNT)));
 209
 210         /*
 211          * Don't really want to do it this way, but there's no
 212          * reliable way to delay completion detection.
 213          */
 214         while (!(__raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE_DEBUG)) & M_DM_DSCR_BASE_INTERRUPT)))
 215                 ;
 216         __raw_readq(IOADDR(A_DM_REGISTER(cpu, R_DM_DSCR_BASE)));
 217 }
 218
 219 #endif
 220
 221 EXPORT_SYMBOL(clear_page);
 222 EXPORT_SYMBOL(copy_page);