Daily bump.
[official-gcc.git] / libgcc / config / spu / cachemgr.c
blob11ff1057b8f096d91c9cfc6f1444c2507752b803
1 /* Copyright (C) 2008-2015 Free Software Foundation, Inc.
3 This file is part of GCC.
5 GCC is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free
7 Software Foundation; either version 3, or (at your option) any later
8 version.
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 for more details.
15 Under Section 7 of GPL version 3, you are granted additional
16 permissions described in the GCC Runtime Library Exception, version
17 3.1, as published by the Free Software Foundation.
19 You should have received a copy of the GNU General Public License and
20 a copy of the GCC Runtime Library Exception along with this program;
21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
22 <http://www.gnu.org/licenses/>. */
24 #include <spu_mfcio.h>
25 #include <spu_internals.h>
26 #include <spu_intrinsics.h>
27 #include <spu_cache.h>
29 extern unsigned long long __ea_local_store;
30 extern char __cache_tag_array_size;
32 #define LINE_SIZE 128
33 #define TAG_MASK (LINE_SIZE - 1)
35 #define WAYS 4
36 #define SET_MASK ((int) &__cache_tag_array_size - LINE_SIZE)
38 #define CACHE_LINES ((int) &__cache_tag_array_size / \
39 sizeof (struct __cache_tag_array) * WAYS)
41 struct __cache_tag_array
43 unsigned int tag_lo[WAYS];
44 unsigned int tag_hi[WAYS];
45 void *base[WAYS];
46 int reserved[WAYS];
47 vector unsigned short dirty_bits[WAYS];
50 extern struct __cache_tag_array __cache_tag_array[];
51 extern char __cache[];
53 /* In order to make the code seem a little cleaner, and to avoid having
54 64/32 bit ifdefs all over the place, we use macros. */
56 #ifdef __EA64__
57 typedef unsigned long long addr;
59 #define CHECK_TAG(_entry, _way, _tag) \
60 ((_entry)->tag_lo[(_way)] == ((_tag) & 0xFFFFFFFF) \
61 && (_entry)->tag_hi[(_way)] == ((_tag) >> 32))
63 #define GET_TAG(_entry, _way) \
64 ((unsigned long long)(_entry)->tag_hi[(_way)] << 32 \
65 | (unsigned long long)(_entry)->tag_lo[(_way)])
67 #define SET_TAG(_entry, _way, _tag) \
68 (_entry)->tag_lo[(_way)] = (_tag) & 0xFFFFFFFF; \
69 (_entry)->tag_hi[(_way)] = (_tag) >> 32
71 #else /*__EA32__*/
72 typedef unsigned long addr;
74 #define CHECK_TAG(_entry, _way, _tag) \
75 ((_entry)->tag_lo[(_way)] == (_tag))
77 #define GET_TAG(_entry, _way) \
78 ((_entry)->tag_lo[(_way)])
80 #define SET_TAG(_entry, _way, _tag) \
81 (_entry)->tag_lo[(_way)] = (_tag)
83 #endif
85 /* In GET_ENTRY, we cast away the high 32 bits,
86 as the tag is only in the low 32. */
88 #define GET_ENTRY(_addr) \
89 ((struct __cache_tag_array *) \
90 si_to_uint (si_a (si_and (si_from_uint ((unsigned int) (addr) (_addr)), \
91 si_from_uint (SET_MASK)), \
92 si_from_uint ((unsigned int) __cache_tag_array))))
94 #define GET_CACHE_LINE(_addr, _way) \
95 ((void *) (__cache + ((_addr) & SET_MASK) * WAYS) + ((_way) * LINE_SIZE));
97 #define CHECK_DIRTY(_vec) (si_to_uint (si_orx ((qword) (_vec))))
98 #define SET_EMPTY(_entry, _way) ((_entry)->tag_lo[(_way)] = 1)
99 #define CHECK_EMPTY(_entry, _way) ((_entry)->tag_lo[(_way)] == 1)
101 #define LS_FLAG 0x80000000
102 #define SET_IS_LS(_entry, _way) ((_entry)->reserved[(_way)] |= LS_FLAG)
103 #define CHECK_IS_LS(_entry, _way) ((_entry)->reserved[(_way)] & LS_FLAG)
104 #define GET_LRU(_entry, _way) ((_entry)->reserved[(_way)] & ~LS_FLAG)
106 static int dma_tag = 32;
108 static void
109 __cache_evict_entry (struct __cache_tag_array *entry, int way)
111 addr tag = GET_TAG (entry, way);
113 if (CHECK_DIRTY (entry->dirty_bits[way]) && !CHECK_IS_LS (entry, way))
115 #ifdef NONATOMIC
116 /* Non-atomic writes. */
117 unsigned int oldmask, mach_stat;
118 char *line = ((void *) 0);
120 /* Enter critical section. */
121 mach_stat = spu_readch (SPU_RdMachStat);
122 spu_idisable ();
124 /* Issue DMA request. */
125 line = GET_CACHE_LINE (entry->tag_lo[way], way);
126 mfc_put (line, tag, LINE_SIZE, dma_tag, 0, 0);
128 /* Wait for DMA completion. */
129 oldmask = mfc_read_tag_mask ();
130 mfc_write_tag_mask (1 << dma_tag);
131 mfc_read_tag_status_all ();
132 mfc_write_tag_mask (oldmask);
134 /* Leave critical section. */
135 if (__builtin_expect (mach_stat & 1, 0))
136 spu_ienable ();
137 #else
138 /* Allocate a buffer large enough that we know it has 128 bytes
139 that are 128 byte aligned (for DMA). */
141 char buffer[LINE_SIZE + 127];
142 qword *buf_ptr = (qword *) (((unsigned int) (buffer) + 127) & ~127);
143 qword *line = GET_CACHE_LINE (entry->tag_lo[way], way);
144 qword bits;
145 unsigned int mach_stat;
147 /* Enter critical section. */
148 mach_stat = spu_readch (SPU_RdMachStat);
149 spu_idisable ();
153 /* We atomically read the current memory into a buffer
154 modify the dirty bytes in the buffer, and write it
155 back. If writeback fails, loop and try again. */
157 mfc_getllar (buf_ptr, tag, 0, 0);
158 mfc_read_atomic_status ();
160 /* The method we're using to write 16 dirty bytes into
161 the buffer at a time uses fsmb which in turn uses
162 the least significant 16 bits of word 0, so we
163 load the bits and rotate so that the first bit of
164 the bitmap is in the first bit that fsmb will use. */
166 bits = (qword) entry->dirty_bits[way];
167 bits = si_rotqbyi (bits, -2);
169 /* Si_fsmb creates the mask of dirty bytes.
170 Use selb to nab the appropriate bits. */
171 buf_ptr[0] = si_selb (buf_ptr[0], line[0], si_fsmb (bits));
173 /* Rotate to next 16 byte section of cache. */
174 bits = si_rotqbyi (bits, 2);
176 buf_ptr[1] = si_selb (buf_ptr[1], line[1], si_fsmb (bits));
177 bits = si_rotqbyi (bits, 2);
178 buf_ptr[2] = si_selb (buf_ptr[2], line[2], si_fsmb (bits));
179 bits = si_rotqbyi (bits, 2);
180 buf_ptr[3] = si_selb (buf_ptr[3], line[3], si_fsmb (bits));
181 bits = si_rotqbyi (bits, 2);
182 buf_ptr[4] = si_selb (buf_ptr[4], line[4], si_fsmb (bits));
183 bits = si_rotqbyi (bits, 2);
184 buf_ptr[5] = si_selb (buf_ptr[5], line[5], si_fsmb (bits));
185 bits = si_rotqbyi (bits, 2);
186 buf_ptr[6] = si_selb (buf_ptr[6], line[6], si_fsmb (bits));
187 bits = si_rotqbyi (bits, 2);
188 buf_ptr[7] = si_selb (buf_ptr[7], line[7], si_fsmb (bits));
189 bits = si_rotqbyi (bits, 2);
191 mfc_putllc (buf_ptr, tag, 0, 0);
193 while (mfc_read_atomic_status ());
195 /* Leave critical section. */
196 if (__builtin_expect (mach_stat & 1, 0))
197 spu_ienable ();
198 #endif
201 /* In any case, marking the lo tag with 1 which denotes empty. */
202 SET_EMPTY (entry, way);
203 entry->dirty_bits[way] = (vector unsigned short) si_from_uint (0);
206 void
207 __cache_evict (__ea void *ea)
209 addr tag = (addr) ea & ~TAG_MASK;
210 struct __cache_tag_array *entry = GET_ENTRY (ea);
211 int i = 0;
213 /* Cycles through all the possible ways an address could be at
214 and evicts the way if found. */
216 for (i = 0; i < WAYS; i++)
217 if (CHECK_TAG (entry, i, tag))
218 __cache_evict_entry (entry, i);
221 static void *
222 __cache_fill (int way, addr tag)
224 unsigned int oldmask, mach_stat;
225 char *line = ((void *) 0);
227 /* Reserve our DMA tag. */
228 if (dma_tag == 32)
229 dma_tag = mfc_tag_reserve ();
231 /* Enter critical section. */
232 mach_stat = spu_readch (SPU_RdMachStat);
233 spu_idisable ();
235 /* Issue DMA request. */
236 line = GET_CACHE_LINE (tag, way);
237 mfc_get (line, tag, LINE_SIZE, dma_tag, 0, 0);
239 /* Wait for DMA completion. */
240 oldmask = mfc_read_tag_mask ();
241 mfc_write_tag_mask (1 << dma_tag);
242 mfc_read_tag_status_all ();
243 mfc_write_tag_mask (oldmask);
245 /* Leave critical section. */
246 if (__builtin_expect (mach_stat & 1, 0))
247 spu_ienable ();
249 return (void *) line;
252 static void
253 __cache_miss (__ea void *ea, struct __cache_tag_array *entry, int way)
256 addr tag = (addr) ea & ~TAG_MASK;
257 unsigned int lru = 0;
258 int i = 0;
259 int idx = 0;
261 /* If way > 4, then there are no empty slots, so we must evict
262 the least recently used entry. */
263 if (way >= 4)
265 for (i = 0; i < WAYS; i++)
267 if (GET_LRU (entry, i) > lru)
269 lru = GET_LRU (entry, i);
270 idx = i;
273 __cache_evict_entry (entry, idx);
274 way = idx;
277 /* Set the empty entry's tag and fill it's cache line. */
279 SET_TAG (entry, way, tag);
280 entry->reserved[way] = 0;
282 /* Check if the address is just an effective address within the
283 SPU's local store. */
285 /* Because the LS is not 256k aligned, we can't do a nice and mask
286 here to compare, so we must check the whole range. */
288 if ((addr) ea >= (addr) __ea_local_store
289 && (addr) ea < (addr) (__ea_local_store + 0x40000))
291 SET_IS_LS (entry, way);
292 entry->base[way] =
293 (void *) ((unsigned int) ((addr) ea -
294 (addr) __ea_local_store) & ~0x7f);
296 else
298 entry->base[way] = __cache_fill (way, tag);
302 void *
303 __cache_fetch_dirty (__ea void *ea, int n_bytes_dirty)
305 #ifdef __EA64__
306 unsigned int tag_hi;
307 qword etag_hi;
308 #endif
309 unsigned int tag_lo;
310 struct __cache_tag_array *entry;
312 qword etag_lo;
313 qword equal;
314 qword bit_mask;
315 qword way;
317 /* This first chunk, we merely fill the pointer and tag. */
319 entry = GET_ENTRY (ea);
321 #ifndef __EA64__
322 tag_lo =
323 si_to_uint (si_andc
324 (si_shufb
325 (si_from_uint ((addr) ea), si_from_uint (0),
326 si_from_uint (0x00010203)), si_from_uint (TAG_MASK)));
327 #else
328 tag_lo =
329 si_to_uint (si_andc
330 (si_shufb
331 (si_from_ullong ((addr) ea), si_from_uint (0),
332 si_from_uint (0x04050607)), si_from_uint (TAG_MASK)));
334 tag_hi =
335 si_to_uint (si_shufb
336 (si_from_ullong ((addr) ea), si_from_uint (0),
337 si_from_uint (0x00010203)));
338 #endif
340 /* Increment LRU in reserved bytes. */
341 si_stqd (si_ai (si_lqd (si_from_ptr (entry), 48), 1),
342 si_from_ptr (entry), 48);
344 missreturn:
345 /* Check if the entry's lo_tag is equal to the address' lo_tag. */
346 etag_lo = si_lqd (si_from_ptr (entry), 0);
347 equal = si_ceq (etag_lo, si_from_uint (tag_lo));
348 #ifdef __EA64__
349 /* And the high tag too. */
350 etag_hi = si_lqd (si_from_ptr (entry), 16);
351 equal = si_and (equal, (si_ceq (etag_hi, si_from_uint (tag_hi))));
352 #endif
354 if ((si_to_uint (si_orx (equal)) == 0))
355 goto misshandler;
357 if (n_bytes_dirty)
359 /* way = 0x40,0x50,0x60,0x70 for each way, which is also the
360 offset of the appropriate dirty bits. */
361 way = si_shli (si_clz (si_gbb (equal)), 2);
363 /* To create the bit_mask, we set it to all 1s (uint -1), then we
364 shift it over (128 - n_bytes_dirty) times. */
366 bit_mask = si_from_uint (-1);
368 bit_mask =
369 si_shlqby (bit_mask, si_from_uint ((LINE_SIZE - n_bytes_dirty) / 8));
371 bit_mask =
372 si_shlqbi (bit_mask, si_from_uint ((LINE_SIZE - n_bytes_dirty) % 8));
374 /* Rotate it around to the correct offset. */
375 bit_mask =
376 si_rotqby (bit_mask,
377 si_from_uint (-1 * ((addr) ea & TAG_MASK) / 8));
379 bit_mask =
380 si_rotqbi (bit_mask,
381 si_from_uint (-1 * ((addr) ea & TAG_MASK) % 8));
383 /* Update the dirty bits. */
384 si_stqx (si_or (si_lqx (si_from_ptr (entry), way), bit_mask),
385 si_from_ptr (entry), way);
388 /* We've definitely found the right entry, set LRU (reserved) to 0
389 maintaining the LS flag (MSB). */
391 si_stqd (si_andc
392 (si_lqd (si_from_ptr (entry), 48),
393 si_and (equal, si_from_uint (~(LS_FLAG)))),
394 si_from_ptr (entry), 48);
396 return (void *)
397 si_to_uint (si_a
398 (si_orx
399 (si_and (si_lqd (si_from_ptr (entry), 32), equal)),
400 si_from_uint (((unsigned int) (addr) ea) & TAG_MASK)));
402 misshandler:
403 equal = si_ceqi (etag_lo, 1);
404 __cache_miss (ea, entry, (si_to_uint (si_clz (si_gbb (equal))) - 16) >> 2);
405 goto missreturn;
408 void *
409 __cache_fetch (__ea void *ea)
411 return __cache_fetch_dirty (ea, 0);
414 void
415 __cache_touch (__ea void *ea __attribute__ ((unused)))
417 /* NO-OP for now. */
420 void __cache_flush (void) __attribute__ ((destructor));
421 void
422 __cache_flush (void)
424 struct __cache_tag_array *entry = __cache_tag_array;
425 unsigned int i;
426 int j;
428 /* Cycle through each cache entry and evict all used ways. */
430 for (i = 0; i < CACHE_LINES / WAYS; i++)
432 for (j = 0; j < WAYS; j++)
433 if (!CHECK_EMPTY (entry, j))
434 __cache_evict_entry (entry, j);
436 entry++;