2 * SN Platform GRU Driver
4 * DRIVER TABLE MANAGER + GRU CONTEXT LOAD/UNLOAD
6 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #include <linux/kernel.h>
24 #include <linux/slab.h>
26 #include <linux/spinlock.h>
27 #include <linux/sched.h>
28 #include <linux/device.h>
29 #include <linux/list.h>
30 #include <linux/err.h>
31 #include <asm/uv/uv_hub.h>
33 #include "grutables.h"
34 #include "gruhandles.h"
36 unsigned long gru_options __read_mostly
;
38 static struct device_driver gru_driver
= {
42 static struct device gru_device
= {
44 .driver
= &gru_driver
,
47 struct device
*grudev
= &gru_device
;
50 * Select a gru fault map to be used by the current cpu. Note that
51 * multiple cpus may be using the same map.
52 * ZZZ should "shift" be used?? Depends on HT cpu numbering
53 * ZZZ should be inline but did not work on emulator
55 int gru_cpu_fault_map_id(void)
57 return uv_blade_processor_id() % GRU_NUM_TFM
;
60 /*--------- ASID Management -------------------------------------------
62 * Initially, assign asids sequentially from MIN_ASID .. MAX_ASID.
63 * Once MAX is reached, flush the TLB & start over. However,
64 * some asids may still be in use. There won't be many (percentage wise) still
65 * in use. Search active contexts & determine the value of the first
66 * asid in use ("x"s below). Set "limit" to this value.
67 * This defines a block of assignable asids.
69 * When "limit" is reached, search forward from limit+1 and determine the
70 * next block of assignable asids.
72 * Repeat until MAX_ASID is reached, then start over again.
74 * Each time MAX_ASID is reached, increment the asid generation. Since
75 * the search for in-use asids only checks contexts with GRUs currently
76 * assigned, asids in some contexts will be missed. Prior to loading
77 * a context, the asid generation of the GTS asid is rechecked. If it
78 * doesn't match the current generation, a new asid will be assigned.
80 * 0---------------x------------x---------------------x----|
81 * ^-next ^-limit ^-MAX_ASID
83 * All asid manipulation & context loading/unloading is protected by the
87 /* Hit the asid limit. Start over */
88 static int gru_wrap_asid(struct gru_state
*gru
)
90 gru_dbg(grudev
, "gid %d\n", gru
->gs_gid
);
96 /* Find the next chunk of unused asids */
97 static int gru_reset_asid_limit(struct gru_state
*gru
, int asid
)
99 int i
, gid
, inuse_asid
, limit
;
101 gru_dbg(grudev
, "gid %d, asid 0x%x\n", gru
->gs_gid
, asid
);
105 asid
= gru_wrap_asid(gru
);
106 gru_flush_all_tlb(gru
);
109 for (i
= 0; i
< GRU_NUM_CCH
; i
++) {
110 if (!gru
->gs_gts
[i
] || is_kernel_context(gru
->gs_gts
[i
]))
112 inuse_asid
= gru
->gs_gts
[i
]->ts_gms
->ms_asids
[gid
].mt_asid
;
113 gru_dbg(grudev
, "gid %d, gts %p, gms %p, inuse 0x%x, cxt %d\n",
114 gru
->gs_gid
, gru
->gs_gts
[i
], gru
->gs_gts
[i
]->ts_gms
,
116 if (inuse_asid
== asid
) {
120 * empty range: reset the range limit and
124 if (asid
>= MAX_ASID
)
125 asid
= gru_wrap_asid(gru
);
130 if ((inuse_asid
> asid
) && (inuse_asid
< limit
))
133 gru
->gs_asid_limit
= limit
;
135 gru_dbg(grudev
, "gid %d, new asid 0x%x, new_limit 0x%x\n", gru
->gs_gid
,
140 /* Assign a new ASID to a thread context. */
141 static int gru_assign_asid(struct gru_state
*gru
)
145 gru
->gs_asid
+= ASID_INC
;
147 if (asid
>= gru
->gs_asid_limit
)
148 asid
= gru_reset_asid_limit(gru
, asid
);
150 gru_dbg(grudev
, "gid %d, asid 0x%x\n", gru
->gs_gid
, asid
);
155 * Clear n bits in a word. Return a word indicating the bits that were cleared.
156 * Optionally, build an array of chars that contain the bit numbers allocated.
158 static unsigned long reserve_resources(unsigned long *p
, int n
, int mmax
,
161 unsigned long bits
= 0;
165 i
= find_first_bit(p
, mmax
);
176 unsigned long gru_reserve_cb_resources(struct gru_state
*gru
, int cbr_au_count
,
179 return reserve_resources(&gru
->gs_cbr_map
, cbr_au_count
, GRU_CBR_AU
,
183 unsigned long gru_reserve_ds_resources(struct gru_state
*gru
, int dsr_au_count
,
186 return reserve_resources(&gru
->gs_dsr_map
, dsr_au_count
, GRU_DSR_AU
,
190 static void reserve_gru_resources(struct gru_state
*gru
,
191 struct gru_thread_state
*gts
)
193 gru
->gs_active_contexts
++;
195 gru_reserve_cb_resources(gru
, gts
->ts_cbr_au_count
,
198 gru_reserve_ds_resources(gru
, gts
->ts_dsr_au_count
, NULL
);
201 static void free_gru_resources(struct gru_state
*gru
,
202 struct gru_thread_state
*gts
)
204 gru
->gs_active_contexts
--;
205 gru
->gs_cbr_map
|= gts
->ts_cbr_map
;
206 gru
->gs_dsr_map
|= gts
->ts_dsr_map
;
210 * Check if a GRU has sufficient free resources to satisfy an allocation
211 * request. Note: GRU locks may or may not be held when this is called. If
212 * not held, recheck after acquiring the appropriate locks.
214 * Returns 1 if sufficient resources, 0 if not
216 static int check_gru_resources(struct gru_state
*gru
, int cbr_au_count
,
217 int dsr_au_count
, int max_active_contexts
)
219 return hweight64(gru
->gs_cbr_map
) >= cbr_au_count
220 && hweight64(gru
->gs_dsr_map
) >= dsr_au_count
221 && gru
->gs_active_contexts
< max_active_contexts
;
225 * TLB manangment requires tracking all GRU chiplets that have loaded a GSEG
228 static int gru_load_mm_tracker(struct gru_state
*gru
,
229 struct gru_thread_state
*gts
)
231 struct gru_mm_struct
*gms
= gts
->ts_gms
;
232 struct gru_mm_tracker
*asids
= &gms
->ms_asids
[gru
->gs_gid
];
233 unsigned short ctxbitmap
= (1 << gts
->ts_ctxnum
);
236 spin_lock(&gms
->ms_asid_lock
);
237 asid
= asids
->mt_asid
;
239 spin_lock(&gru
->gs_asid_lock
);
240 if (asid
== 0 || (asids
->mt_ctxbitmap
== 0 && asids
->mt_asid_gen
!=
242 asid
= gru_assign_asid(gru
);
243 asids
->mt_asid
= asid
;
244 asids
->mt_asid_gen
= gru
->gs_asid_gen
;
249 spin_unlock(&gru
->gs_asid_lock
);
251 BUG_ON(asids
->mt_ctxbitmap
& ctxbitmap
);
252 asids
->mt_ctxbitmap
|= ctxbitmap
;
253 if (!test_bit(gru
->gs_gid
, gms
->ms_asidmap
))
254 __set_bit(gru
->gs_gid
, gms
->ms_asidmap
);
255 spin_unlock(&gms
->ms_asid_lock
);
258 "gid %d, gts %p, gms %p, ctxnum %d, asid 0x%x, asidmap 0x%lx\n",
259 gru
->gs_gid
, gts
, gms
, gts
->ts_ctxnum
, asid
,
264 static void gru_unload_mm_tracker(struct gru_state
*gru
,
265 struct gru_thread_state
*gts
)
267 struct gru_mm_struct
*gms
= gts
->ts_gms
;
268 struct gru_mm_tracker
*asids
;
269 unsigned short ctxbitmap
;
271 asids
= &gms
->ms_asids
[gru
->gs_gid
];
272 ctxbitmap
= (1 << gts
->ts_ctxnum
);
273 spin_lock(&gms
->ms_asid_lock
);
274 spin_lock(&gru
->gs_asid_lock
);
275 BUG_ON((asids
->mt_ctxbitmap
& ctxbitmap
) != ctxbitmap
);
276 asids
->mt_ctxbitmap
^= ctxbitmap
;
277 gru_dbg(grudev
, "gid %d, gts %p, gms %p, ctxnum 0x%d, asidmap 0x%lx\n",
278 gru
->gs_gid
, gts
, gms
, gts
->ts_ctxnum
, gms
->ms_asidmap
[0]);
279 spin_unlock(&gru
->gs_asid_lock
);
280 spin_unlock(&gms
->ms_asid_lock
);
284 * Decrement the reference count on a GTS structure. Free the structure
285 * if the reference count goes to zero.
287 void gts_drop(struct gru_thread_state
*gts
)
289 if (gts
&& atomic_dec_return(>s
->ts_refcnt
) == 0) {
291 gru_drop_mmu_notifier(gts
->ts_gms
);
298 * Locate the GTS structure for the current thread.
300 static struct gru_thread_state
*gru_find_current_gts_nolock(struct gru_vma_data
303 struct gru_thread_state
*gts
;
305 list_for_each_entry(gts
, &vdata
->vd_head
, ts_next
)
306 if (gts
->ts_tsid
== tsid
)
312 * Allocate a thread state structure.
314 struct gru_thread_state
*gru_alloc_gts(struct vm_area_struct
*vma
,
315 int cbr_au_count
, int dsr_au_count
, int options
, int tsid
)
317 struct gru_thread_state
*gts
;
318 struct gru_mm_struct
*gms
;
321 bytes
= DSR_BYTES(dsr_au_count
) + CBR_BYTES(cbr_au_count
);
322 bytes
+= sizeof(struct gru_thread_state
);
323 gts
= kmalloc(bytes
, GFP_KERNEL
);
325 return ERR_PTR(-ENOMEM
);
328 memset(gts
, 0, sizeof(struct gru_thread_state
)); /* zero out header */
329 atomic_set(>s
->ts_refcnt
, 1);
330 mutex_init(>s
->ts_ctxlock
);
331 gts
->ts_cbr_au_count
= cbr_au_count
;
332 gts
->ts_dsr_au_count
= dsr_au_count
;
333 gts
->ts_user_options
= options
;
334 gts
->ts_user_blade_id
= -1;
335 gts
->ts_user_chiplet_id
= -1;
337 gts
->ts_ctxnum
= NULLCTX
;
338 gts
->ts_tlb_int_select
= -1;
339 gts
->ts_cch_req_slice
= -1;
340 gts
->ts_sizeavail
= GRU_SIZEAVAIL(PAGE_SHIFT
);
342 gts
->ts_mm
= current
->mm
;
344 gms
= gru_register_mmu_notifier();
350 gru_dbg(grudev
, "alloc gts %p\n", gts
);
355 return ERR_CAST(gms
);
359 * Allocate a vma private data structure.
361 struct gru_vma_data
*gru_alloc_vma_data(struct vm_area_struct
*vma
, int tsid
)
363 struct gru_vma_data
*vdata
= NULL
;
365 vdata
= kmalloc(sizeof(*vdata
), GFP_KERNEL
);
369 INIT_LIST_HEAD(&vdata
->vd_head
);
370 spin_lock_init(&vdata
->vd_lock
);
371 gru_dbg(grudev
, "alloc vdata %p\n", vdata
);
376 * Find the thread state structure for the current thread.
378 struct gru_thread_state
*gru_find_thread_state(struct vm_area_struct
*vma
,
381 struct gru_vma_data
*vdata
= vma
->vm_private_data
;
382 struct gru_thread_state
*gts
;
384 spin_lock(&vdata
->vd_lock
);
385 gts
= gru_find_current_gts_nolock(vdata
, tsid
);
386 spin_unlock(&vdata
->vd_lock
);
387 gru_dbg(grudev
, "vma %p, gts %p\n", vma
, gts
);
392 * Allocate a new thread state for a GSEG. Note that races may allow
393 * another thread to race to create a gts.
395 struct gru_thread_state
*gru_alloc_thread_state(struct vm_area_struct
*vma
,
398 struct gru_vma_data
*vdata
= vma
->vm_private_data
;
399 struct gru_thread_state
*gts
, *ngts
;
401 gts
= gru_alloc_gts(vma
, vdata
->vd_cbr_au_count
, vdata
->vd_dsr_au_count
,
402 vdata
->vd_user_options
, tsid
);
406 spin_lock(&vdata
->vd_lock
);
407 ngts
= gru_find_current_gts_nolock(vdata
, tsid
);
411 STAT(gts_double_allocate
);
413 list_add(>s
->ts_next
, &vdata
->vd_head
);
415 spin_unlock(&vdata
->vd_lock
);
416 gru_dbg(grudev
, "vma %p, gts %p\n", vma
, gts
);
421 * Free the GRU context assigned to the thread state.
423 static void gru_free_gru_context(struct gru_thread_state
*gts
)
425 struct gru_state
*gru
;
428 gru_dbg(grudev
, "gts %p, gid %d\n", gts
, gru
->gs_gid
);
430 spin_lock(&gru
->gs_lock
);
431 gru
->gs_gts
[gts
->ts_ctxnum
] = NULL
;
432 free_gru_resources(gru
, gts
);
433 BUG_ON(test_bit(gts
->ts_ctxnum
, &gru
->gs_context_map
) == 0);
434 __clear_bit(gts
->ts_ctxnum
, &gru
->gs_context_map
);
435 gts
->ts_ctxnum
= NULLCTX
;
438 spin_unlock(&gru
->gs_lock
);
445 * Prefetching cachelines help hardware performance.
446 * (Strictly a performance enhancement. Not functionally required).
448 static void prefetch_data(void *p
, int num
, int stride
)
456 static inline long gru_copy_handle(void *d
, void *s
)
458 memcpy(d
, s
, GRU_HANDLE_BYTES
);
459 return GRU_HANDLE_BYTES
;
462 static void gru_prefetch_context(void *gseg
, void *cb
, void *cbe
,
463 unsigned long cbrmap
, unsigned long length
)
467 prefetch_data(gseg
+ GRU_DS_BASE
, length
/ GRU_CACHE_LINE_BYTES
,
468 GRU_CACHE_LINE_BYTES
);
470 for_each_cbr_in_allocation_map(i
, &cbrmap
, scr
) {
471 prefetch_data(cb
, 1, GRU_CACHE_LINE_BYTES
);
472 prefetch_data(cbe
+ i
* GRU_HANDLE_STRIDE
, 1,
473 GRU_CACHE_LINE_BYTES
);
474 cb
+= GRU_HANDLE_STRIDE
;
478 static void gru_load_context_data(void *save
, void *grubase
, int ctxnum
,
479 unsigned long cbrmap
, unsigned long dsrmap
,
482 void *gseg
, *cb
, *cbe
;
483 unsigned long length
;
486 gseg
= grubase
+ ctxnum
* GRU_GSEG_STRIDE
;
487 cb
= gseg
+ GRU_CB_BASE
;
488 cbe
= grubase
+ GRU_CBE_BASE
;
489 length
= hweight64(dsrmap
) * GRU_DSR_AU_BYTES
;
490 gru_prefetch_context(gseg
, cb
, cbe
, cbrmap
, length
);
492 for_each_cbr_in_allocation_map(i
, &cbrmap
, scr
) {
494 save
+= gru_copy_handle(cb
, save
);
495 save
+= gru_copy_handle(cbe
+ i
* GRU_HANDLE_STRIDE
,
498 memset(cb
, 0, GRU_CACHE_LINE_BYTES
);
499 memset(cbe
+ i
* GRU_HANDLE_STRIDE
, 0,
500 GRU_CACHE_LINE_BYTES
);
502 /* Flush CBE to hide race in context restart */
504 gru_flush_cache(cbe
+ i
* GRU_HANDLE_STRIDE
);
505 cb
+= GRU_HANDLE_STRIDE
;
509 memcpy(gseg
+ GRU_DS_BASE
, save
, length
);
511 memset(gseg
+ GRU_DS_BASE
, 0, length
);
514 static void gru_unload_context_data(void *save
, void *grubase
, int ctxnum
,
515 unsigned long cbrmap
, unsigned long dsrmap
)
517 void *gseg
, *cb
, *cbe
;
518 unsigned long length
;
521 gseg
= grubase
+ ctxnum
* GRU_GSEG_STRIDE
;
522 cb
= gseg
+ GRU_CB_BASE
;
523 cbe
= grubase
+ GRU_CBE_BASE
;
524 length
= hweight64(dsrmap
) * GRU_DSR_AU_BYTES
;
526 /* CBEs may not be coherent. Flush them from cache */
527 for_each_cbr_in_allocation_map(i
, &cbrmap
, scr
)
528 gru_flush_cache(cbe
+ i
* GRU_HANDLE_STRIDE
);
529 mb(); /* Let the CL flush complete */
531 gru_prefetch_context(gseg
, cb
, cbe
, cbrmap
, length
);
533 for_each_cbr_in_allocation_map(i
, &cbrmap
, scr
) {
534 save
+= gru_copy_handle(save
, cb
);
535 save
+= gru_copy_handle(save
, cbe
+ i
* GRU_HANDLE_STRIDE
);
536 cb
+= GRU_HANDLE_STRIDE
;
538 memcpy(save
, gseg
+ GRU_DS_BASE
, length
);
541 void gru_unload_context(struct gru_thread_state
*gts
, int savestate
)
543 struct gru_state
*gru
= gts
->ts_gru
;
544 struct gru_context_configuration_handle
*cch
;
545 int ctxnum
= gts
->ts_ctxnum
;
547 if (!is_kernel_context(gts
))
548 zap_vma_ptes(gts
->ts_vma
, UGRUADDR(gts
), GRU_GSEG_PAGESIZE
);
549 cch
= get_cch(gru
->gs_gru_base_vaddr
, ctxnum
);
551 gru_dbg(grudev
, "gts %p\n", gts
);
552 lock_cch_handle(cch
);
553 if (cch_interrupt_sync(cch
))
556 if (!is_kernel_context(gts
))
557 gru_unload_mm_tracker(gru
, gts
);
559 gru_unload_context_data(gts
->ts_gdata
, gru
->gs_gru_base_vaddr
,
560 ctxnum
, gts
->ts_cbr_map
,
562 gts
->ts_data_valid
= 1;
565 if (cch_deallocate(cch
))
567 unlock_cch_handle(cch
);
569 gru_free_gru_context(gts
);
573 * Load a GRU context by copying it from the thread data structure in memory
576 void gru_load_context(struct gru_thread_state
*gts
)
578 struct gru_state
*gru
= gts
->ts_gru
;
579 struct gru_context_configuration_handle
*cch
;
580 int i
, err
, asid
, ctxnum
= gts
->ts_ctxnum
;
582 gru_dbg(grudev
, "gts %p\n", gts
);
583 cch
= get_cch(gru
->gs_gru_base_vaddr
, ctxnum
);
585 lock_cch_handle(cch
);
586 cch
->tfm_fault_bit_enable
=
587 (gts
->ts_user_options
== GRU_OPT_MISS_FMM_POLL
588 || gts
->ts_user_options
== GRU_OPT_MISS_FMM_INTR
);
589 cch
->tlb_int_enable
= (gts
->ts_user_options
== GRU_OPT_MISS_FMM_INTR
);
590 if (cch
->tlb_int_enable
) {
591 gts
->ts_tlb_int_select
= gru_cpu_fault_map_id();
592 cch
->tlb_int_select
= gts
->ts_tlb_int_select
;
594 if (gts
->ts_cch_req_slice
>= 0) {
595 cch
->req_slice_set_enable
= 1;
596 cch
->req_slice
= gts
->ts_cch_req_slice
;
598 cch
->req_slice_set_enable
=0;
600 cch
->tfm_done_bit_enable
= 0;
601 cch
->dsr_allocation_map
= gts
->ts_dsr_map
;
602 cch
->cbr_allocation_map
= gts
->ts_cbr_map
;
604 if (is_kernel_context(gts
)) {
605 cch
->unmap_enable
= 1;
606 cch
->tfm_done_bit_enable
= 1;
607 cch
->cb_int_enable
= 1;
609 cch
->unmap_enable
= 0;
610 cch
->tfm_done_bit_enable
= 0;
611 cch
->cb_int_enable
= 0;
612 asid
= gru_load_mm_tracker(gru
, gts
);
613 for (i
= 0; i
< 8; i
++) {
614 cch
->asid
[i
] = asid
+ i
;
615 cch
->sizeavail
[i
] = gts
->ts_sizeavail
;
619 err
= cch_allocate(cch
);
622 "err %d: cch %p, gts %p, cbr 0x%lx, dsr 0x%lx\n",
623 err
, cch
, gts
, gts
->ts_cbr_map
, gts
->ts_dsr_map
);
627 gru_load_context_data(gts
->ts_gdata
, gru
->gs_gru_base_vaddr
, ctxnum
,
628 gts
->ts_cbr_map
, gts
->ts_dsr_map
, gts
->ts_data_valid
);
632 unlock_cch_handle(cch
);
636 * Update fields in an active CCH:
637 * - retarget interrupts on local blade
638 * - update sizeavail mask
640 int gru_update_cch(struct gru_thread_state
*gts
)
642 struct gru_context_configuration_handle
*cch
;
643 struct gru_state
*gru
= gts
->ts_gru
;
644 int i
, ctxnum
= gts
->ts_ctxnum
, ret
= 0;
646 cch
= get_cch(gru
->gs_gru_base_vaddr
, ctxnum
);
648 lock_cch_handle(cch
);
649 if (cch
->state
== CCHSTATE_ACTIVE
) {
650 if (gru
->gs_gts
[gts
->ts_ctxnum
] != gts
)
652 if (cch_interrupt(cch
))
654 for (i
= 0; i
< 8; i
++)
655 cch
->sizeavail
[i
] = gts
->ts_sizeavail
;
656 gts
->ts_tlb_int_select
= gru_cpu_fault_map_id();
657 cch
->tlb_int_select
= gru_cpu_fault_map_id();
658 cch
->tfm_fault_bit_enable
=
659 (gts
->ts_user_options
== GRU_OPT_MISS_FMM_POLL
660 || gts
->ts_user_options
== GRU_OPT_MISS_FMM_INTR
);
666 unlock_cch_handle(cch
);
671 * Update CCH tlb interrupt select. Required when all the following is true:
672 * - task's GRU context is loaded into a GRU
673 * - task is using interrupt notification for TLB faults
674 * - task has migrated to a different cpu on the same blade where
675 * it was previously running.
677 static int gru_retarget_intr(struct gru_thread_state
*gts
)
679 if (gts
->ts_tlb_int_select
< 0
680 || gts
->ts_tlb_int_select
== gru_cpu_fault_map_id())
683 gru_dbg(grudev
, "retarget from %d to %d\n", gts
->ts_tlb_int_select
,
684 gru_cpu_fault_map_id());
685 return gru_update_cch(gts
);
689 * Unload the gru context if it is not assigned to the correct blade or
690 * chiplet. Misassignment can occur if the process migrates to a different
691 * blade or if the user changes the selected blade/chiplet.
692 * Return 0 if context correct placed, otherwise 1
694 void gru_check_context_placement(struct gru_thread_state
*gts
)
696 struct gru_state
*gru
;
697 int blade_id
, chiplet_id
;
700 * If the current task is the context owner, verify that the
701 * context is correctly placed. This test is skipped for non-owner
702 * references. Pthread apps use non-owner references to the CBRs.
705 if (!gru
|| gts
->ts_tgid_owner
!= current
->tgid
)
708 blade_id
= gts
->ts_user_blade_id
;
710 blade_id
= uv_numa_blade_id();
712 chiplet_id
= gts
->ts_user_chiplet_id
;
713 if (gru
->gs_blade_id
!= blade_id
||
714 (chiplet_id
>= 0 && chiplet_id
!= gru
->gs_chiplet_id
)) {
715 STAT(check_context_unload
);
716 gru_unload_context(gts
, 1);
717 } else if (gru_retarget_intr(gts
)) {
718 STAT(check_context_retarget_intr
);
724 * Insufficient GRU resources available on the local blade. Steal a context from
725 * a process. This is a hack until a _real_ resource scheduler is written....
727 #define next_ctxnum(n) ((n) < GRU_NUM_CCH - 2 ? (n) + 1 : 0)
728 #define next_gru(b, g) (((g) < &(b)->bs_grus[GRU_CHIPLETS_PER_BLADE - 1]) ? \
729 ((g)+1) : &(b)->bs_grus[0])
731 static int is_gts_stealable(struct gru_thread_state
*gts
,
732 struct gru_blade_state
*bs
)
734 if (is_kernel_context(gts
))
735 return down_write_trylock(&bs
->bs_kgts_sema
);
737 return mutex_trylock(>s
->ts_ctxlock
);
740 static void gts_stolen(struct gru_thread_state
*gts
,
741 struct gru_blade_state
*bs
)
743 if (is_kernel_context(gts
)) {
744 up_write(&bs
->bs_kgts_sema
);
745 STAT(steal_kernel_context
);
747 mutex_unlock(>s
->ts_ctxlock
);
748 STAT(steal_user_context
);
752 void gru_steal_context(struct gru_thread_state
*gts
)
754 struct gru_blade_state
*blade
;
755 struct gru_state
*gru
, *gru0
;
756 struct gru_thread_state
*ngts
= NULL
;
757 int ctxnum
, ctxnum0
, flag
= 0, cbr
, dsr
;
758 int blade_id
= gts
->ts_user_blade_id
;
759 int chiplet_id
= gts
->ts_user_chiplet_id
;
762 blade_id
= uv_numa_blade_id();
763 cbr
= gts
->ts_cbr_au_count
;
764 dsr
= gts
->ts_dsr_au_count
;
766 blade
= gru_base
[blade_id
];
767 spin_lock(&blade
->bs_lock
);
769 ctxnum
= next_ctxnum(blade
->bs_lru_ctxnum
);
770 gru
= blade
->bs_lru_gru
;
772 gru
= next_gru(blade
, gru
);
773 blade
->bs_lru_gru
= gru
;
774 blade
->bs_lru_ctxnum
= ctxnum
;
778 if (chiplet_id
< 0 || chiplet_id
== gru
->gs_chiplet_id
) {
779 if (check_gru_resources(gru
, cbr
, dsr
, GRU_NUM_CCH
))
781 spin_lock(&gru
->gs_lock
);
782 for (; ctxnum
< GRU_NUM_CCH
; ctxnum
++) {
783 if (flag
&& gru
== gru0
&& ctxnum
== ctxnum0
)
785 ngts
= gru
->gs_gts
[ctxnum
];
787 * We are grabbing locks out of order, so trylock is
788 * needed. GTSs are usually not locked, so the odds of
789 * success are high. If trylock fails, try to steal a
792 if (ngts
&& is_gts_stealable(ngts
, blade
))
796 spin_unlock(&gru
->gs_lock
);
797 if (ngts
|| (flag
&& gru
== gru0
&& ctxnum
== ctxnum0
))
800 if (flag
&& gru
== gru0
)
804 gru
= next_gru(blade
, gru
);
806 spin_unlock(&blade
->bs_lock
);
809 gts
->ustats
.context_stolen
++;
810 ngts
->ts_steal_jiffies
= jiffies
;
811 gru_unload_context(ngts
, is_kernel_context(ngts
) ? 0 : 1);
812 gts_stolen(ngts
, blade
);
814 STAT(steal_context_failed
);
817 "stole gid %d, ctxnum %d from gts %p. Need cb %d, ds %d;"
818 " avail cb %ld, ds %ld\n",
819 gru
->gs_gid
, ctxnum
, ngts
, cbr
, dsr
, hweight64(gru
->gs_cbr_map
),
820 hweight64(gru
->gs_dsr_map
));
824 * Assign a gru context.
826 static int gru_assign_context_number(struct gru_state
*gru
)
830 ctxnum
= find_first_zero_bit(&gru
->gs_context_map
, GRU_NUM_CCH
);
831 __set_bit(ctxnum
, &gru
->gs_context_map
);
836 * Scan the GRUs on the local blade & assign a GRU context.
838 struct gru_state
*gru_assign_gru_context(struct gru_thread_state
*gts
)
840 struct gru_state
*gru
, *grux
;
841 int i
, max_active_contexts
;
842 int blade_id
= gts
->ts_user_blade_id
;
843 int chiplet_id
= gts
->ts_user_chiplet_id
;
846 blade_id
= uv_numa_blade_id();
849 max_active_contexts
= GRU_NUM_CCH
;
850 for_each_gru_on_blade(grux
, blade_id
, i
) {
851 if (chiplet_id
>= 0 && chiplet_id
!= grux
->gs_chiplet_id
)
853 if (check_gru_resources(grux
, gts
->ts_cbr_au_count
,
854 gts
->ts_dsr_au_count
,
855 max_active_contexts
)) {
857 max_active_contexts
= grux
->gs_active_contexts
;
858 if (max_active_contexts
== 0)
864 spin_lock(&gru
->gs_lock
);
865 if (!check_gru_resources(gru
, gts
->ts_cbr_au_count
,
866 gts
->ts_dsr_au_count
, GRU_NUM_CCH
)) {
867 spin_unlock(&gru
->gs_lock
);
870 reserve_gru_resources(gru
, gts
);
872 gts
->ts_blade
= gru
->gs_blade_id
;
873 gts
->ts_ctxnum
= gru_assign_context_number(gru
);
874 atomic_inc(>s
->ts_refcnt
);
875 gru
->gs_gts
[gts
->ts_ctxnum
] = gts
;
876 spin_unlock(&gru
->gs_lock
);
878 STAT(assign_context
);
880 "gseg %p, gts %p, gid %d, ctx %d, cbr %d, dsr %d\n",
881 gseg_virtual_address(gts
->ts_gru
, gts
->ts_ctxnum
), gts
,
882 gts
->ts_gru
->gs_gid
, gts
->ts_ctxnum
,
883 gts
->ts_cbr_au_count
, gts
->ts_dsr_au_count
);
885 gru_dbg(grudev
, "failed to allocate a GTS %s\n", "");
886 STAT(assign_context_failed
);
895 * Map the user's GRU segment
897 * Note: gru segments alway mmaped on GRU_GSEG_PAGESIZE boundaries.
899 int gru_fault(struct vm_area_struct
*vma
, struct vm_fault
*vmf
)
901 struct gru_thread_state
*gts
;
902 unsigned long paddr
, vaddr
;
904 vaddr
= (unsigned long)vmf
->virtual_address
;
905 gru_dbg(grudev
, "vma %p, vaddr 0x%lx (0x%lx)\n",
906 vma
, vaddr
, GSEG_BASE(vaddr
));
909 /* The following check ensures vaddr is a valid address in the VMA */
910 gts
= gru_find_thread_state(vma
, TSID(vaddr
, vma
));
912 return VM_FAULT_SIGBUS
;
915 mutex_lock(>s
->ts_ctxlock
);
918 gru_check_context_placement(gts
);
921 STAT(load_user_context
);
922 if (!gru_assign_gru_context(gts
)) {
924 mutex_unlock(>s
->ts_ctxlock
);
925 set_current_state(TASK_INTERRUPTIBLE
);
926 schedule_timeout(GRU_ASSIGN_DELAY
); /* true hack ZZZ */
927 if (gts
->ts_steal_jiffies
+ GRU_STEAL_DELAY
< jiffies
)
928 gru_steal_context(gts
);
931 gru_load_context(gts
);
932 paddr
= gseg_physical_address(gts
->ts_gru
, gts
->ts_ctxnum
);
933 remap_pfn_range(vma
, vaddr
& ~(GRU_GSEG_PAGESIZE
- 1),
934 paddr
>> PAGE_SHIFT
, GRU_GSEG_PAGESIZE
,
939 mutex_unlock(>s
->ts_ctxlock
);
941 return VM_FAULT_NOPAGE
;