2 * SN Platform GRU Driver
4 * DRIVER TABLE MANAGER + GRU CONTEXT LOAD/UNLOAD
6 * This file is subject to the terms and conditions of the GNU General Public
7 * License. See the file "COPYING" in the main directory of this archive
10 * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved.
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
16 #include <linux/spinlock.h>
17 #include <linux/sched.h>
18 #include <linux/device.h>
19 #include <linux/list.h>
20 #include <asm/uv/uv_hub.h>
22 #include "grutables.h"
23 #include "gruhandles.h"
25 unsigned long gru_options __read_mostly
;
27 static struct device_driver gru_driver
= {
31 static struct device gru_device
= {
33 .driver
= &gru_driver
,
36 struct device
*grudev
= &gru_device
;
39 * Select a gru fault map to be used by the current cpu. Note that
40 * multiple cpus may be using the same map.
41 * ZZZ should "shift" be used?? Depends on HT cpu numbering
42 * ZZZ should be inline but did not work on emulator
44 int gru_cpu_fault_map_id(void)
46 return uv_blade_processor_id() % GRU_NUM_TFM
;
49 /*--------- ASID Management -------------------------------------------
51 * Initially, assign asids sequentially from MIN_ASID .. MAX_ASID.
52 * Once MAX is reached, flush the TLB & start over. However,
53 * some asids may still be in use. There won't be many (percentage wise) still
54 * in use. Search active contexts & determine the value of the first
55 * asid in use ("x"s below). Set "limit" to this value.
56 * This defines a block of assignable asids.
58 * When "limit" is reached, search forward from limit+1 and determine the
59 * next block of assignable asids.
61 * Repeat until MAX_ASID is reached, then start over again.
63 * Each time MAX_ASID is reached, increment the asid generation. Since
64 * the search for in-use asids only checks contexts with GRUs currently
65 * assigned, asids in some contexts will be missed. Prior to loading
66 * a context, the asid generation of the GTS asid is rechecked. If it
67 * doesn't match the current generation, a new asid will be assigned.
69 * 0---------------x------------x---------------------x----|
70 * ^-next ^-limit ^-MAX_ASID
72 * All asid manipulation & context loading/unloading is protected by the
76 /* Hit the asid limit. Start over */
77 static int gru_wrap_asid(struct gru_state
*gru
)
79 gru_dbg(grudev
, "gru %p\n", gru
);
82 gru_flush_all_tlb(gru
);
86 /* Find the next chunk of unused asids */
87 static int gru_reset_asid_limit(struct gru_state
*gru
, int asid
)
89 int i
, gid
, inuse_asid
, limit
;
91 gru_dbg(grudev
, "gru %p, asid 0x%x\n", gru
, asid
);
95 asid
= gru_wrap_asid(gru
);
98 for (i
= 0; i
< GRU_NUM_CCH
; i
++) {
101 inuse_asid
= gru
->gs_gts
[i
]->ts_gms
->ms_asids
[gid
].mt_asid
;
102 gru_dbg(grudev
, "gru %p, inuse_asid 0x%x, cxtnum %d, gts %p\n",
103 gru
, inuse_asid
, i
, gru
->gs_gts
[i
]);
104 if (inuse_asid
== asid
) {
108 * empty range: reset the range limit and
112 if (asid
>= MAX_ASID
)
113 asid
= gru_wrap_asid(gru
);
118 if ((inuse_asid
> asid
) && (inuse_asid
< limit
))
121 gru
->gs_asid_limit
= limit
;
123 gru_dbg(grudev
, "gru %p, new asid 0x%x, new_limit 0x%x\n", gru
, asid
,
128 /* Assign a new ASID to a thread context. */
129 static int gru_assign_asid(struct gru_state
*gru
)
133 spin_lock(&gru
->gs_asid_lock
);
134 gru
->gs_asid
+= ASID_INC
;
136 if (asid
>= gru
->gs_asid_limit
)
137 asid
= gru_reset_asid_limit(gru
, asid
);
138 spin_unlock(&gru
->gs_asid_lock
);
140 gru_dbg(grudev
, "gru %p, asid 0x%x\n", gru
, asid
);
145 * Clear n bits in a word. Return a word indicating the bits that were cleared.
146 * Optionally, build an array of chars that contain the bit numbers allocated.
148 static unsigned long reserve_resources(unsigned long *p
, int n
, int mmax
,
151 unsigned long bits
= 0;
155 i
= find_first_bit(p
, mmax
);
166 unsigned long gru_reserve_cb_resources(struct gru_state
*gru
, int cbr_au_count
,
169 return reserve_resources(&gru
->gs_cbr_map
, cbr_au_count
, GRU_CBR_AU
,
173 unsigned long gru_reserve_ds_resources(struct gru_state
*gru
, int dsr_au_count
,
176 return reserve_resources(&gru
->gs_dsr_map
, dsr_au_count
, GRU_DSR_AU
,
180 static void reserve_gru_resources(struct gru_state
*gru
,
181 struct gru_thread_state
*gts
)
183 gru
->gs_active_contexts
++;
185 gru_reserve_cb_resources(gru
, gts
->ts_cbr_au_count
,
188 gru_reserve_ds_resources(gru
, gts
->ts_dsr_au_count
, NULL
);
191 static void free_gru_resources(struct gru_state
*gru
,
192 struct gru_thread_state
*gts
)
194 gru
->gs_active_contexts
--;
195 gru
->gs_cbr_map
|= gts
->ts_cbr_map
;
196 gru
->gs_dsr_map
|= gts
->ts_dsr_map
;
200 * Check if a GRU has sufficient free resources to satisfy an allocation
201 * request. Note: GRU locks may or may not be held when this is called. If
202 * not held, recheck after acquiring the appropriate locks.
204 * Returns 1 if sufficient resources, 0 if not
206 static int check_gru_resources(struct gru_state
*gru
, int cbr_au_count
,
207 int dsr_au_count
, int max_active_contexts
)
209 return hweight64(gru
->gs_cbr_map
) >= cbr_au_count
210 && hweight64(gru
->gs_dsr_map
) >= dsr_au_count
211 && gru
->gs_active_contexts
< max_active_contexts
;
215 * TLB manangment requires tracking all GRU chiplets that have loaded a GSEG
218 static int gru_load_mm_tracker(struct gru_state
*gru
, struct gru_mm_struct
*gms
,
221 struct gru_mm_tracker
*asids
= &gms
->ms_asids
[gru
->gs_gid
];
222 unsigned short ctxbitmap
= (1 << ctxnum
);
225 spin_lock(&gms
->ms_asid_lock
);
226 asid
= asids
->mt_asid
;
228 if (asid
== 0 || asids
->mt_asid_gen
!= gru
->gs_asid_gen
) {
229 asid
= gru_assign_asid(gru
);
230 asids
->mt_asid
= asid
;
231 asids
->mt_asid_gen
= gru
->gs_asid_gen
;
237 BUG_ON(asids
->mt_ctxbitmap
& ctxbitmap
);
238 asids
->mt_ctxbitmap
|= ctxbitmap
;
239 if (!test_bit(gru
->gs_gid
, gms
->ms_asidmap
))
240 __set_bit(gru
->gs_gid
, gms
->ms_asidmap
);
241 spin_unlock(&gms
->ms_asid_lock
);
244 "gru %x, gms %p, ctxnum 0x%d, asid 0x%x, asidmap 0x%lx\n",
245 gru
->gs_gid
, gms
, ctxnum
, asid
, gms
->ms_asidmap
[0]);
249 static void gru_unload_mm_tracker(struct gru_state
*gru
,
250 struct gru_mm_struct
*gms
, int ctxnum
)
252 struct gru_mm_tracker
*asids
;
253 unsigned short ctxbitmap
;
255 asids
= &gms
->ms_asids
[gru
->gs_gid
];
256 ctxbitmap
= (1 << ctxnum
);
257 spin_lock(&gms
->ms_asid_lock
);
258 BUG_ON((asids
->mt_ctxbitmap
& ctxbitmap
) != ctxbitmap
);
259 asids
->mt_ctxbitmap
^= ctxbitmap
;
260 gru_dbg(grudev
, "gru %x, gms %p, ctxnum 0x%d, asidmap 0x%lx\n",
261 gru
->gs_gid
, gms
, ctxnum
, gms
->ms_asidmap
[0]);
262 spin_unlock(&gms
->ms_asid_lock
);
266 * Decrement the reference count on a GTS structure. Free the structure
267 * if the reference count goes to zero.
269 void gts_drop(struct gru_thread_state
*gts
)
271 if (gts
&& atomic_dec_return(>s
->ts_refcnt
) == 0) {
272 gru_drop_mmu_notifier(gts
->ts_gms
);
279 * Locate the GTS structure for the current thread.
281 static struct gru_thread_state
*gru_find_current_gts_nolock(struct gru_vma_data
284 struct gru_thread_state
*gts
;
286 list_for_each_entry(gts
, &vdata
->vd_head
, ts_next
)
287 if (gts
->ts_tsid
== tsid
)
293 * Allocate a thread state structure.
295 static struct gru_thread_state
*gru_alloc_gts(struct vm_area_struct
*vma
,
296 struct gru_vma_data
*vdata
,
299 struct gru_thread_state
*gts
;
302 bytes
= DSR_BYTES(vdata
->vd_dsr_au_count
) +
303 CBR_BYTES(vdata
->vd_cbr_au_count
);
304 bytes
+= sizeof(struct gru_thread_state
);
305 gts
= kzalloc(bytes
, GFP_KERNEL
);
310 atomic_set(>s
->ts_refcnt
, 1);
311 mutex_init(>s
->ts_ctxlock
);
312 gts
->ts_cbr_au_count
= vdata
->vd_cbr_au_count
;
313 gts
->ts_dsr_au_count
= vdata
->vd_dsr_au_count
;
314 gts
->ts_user_options
= vdata
->vd_user_options
;
316 gts
->ts_user_options
= vdata
->vd_user_options
;
317 gts
->ts_ctxnum
= NULLCTX
;
318 gts
->ts_mm
= current
->mm
;
320 gts
->ts_tlb_int_select
= -1;
321 gts
->ts_gms
= gru_register_mmu_notifier();
325 gru_dbg(grudev
, "alloc vdata %p, new gts %p\n", vdata
, gts
);
334 * Allocate a vma private data structure.
336 struct gru_vma_data
*gru_alloc_vma_data(struct vm_area_struct
*vma
, int tsid
)
338 struct gru_vma_data
*vdata
= NULL
;
340 vdata
= kmalloc(sizeof(*vdata
), GFP_KERNEL
);
344 INIT_LIST_HEAD(&vdata
->vd_head
);
345 spin_lock_init(&vdata
->vd_lock
);
346 gru_dbg(grudev
, "alloc vdata %p\n", vdata
);
351 * Find the thread state structure for the current thread.
353 struct gru_thread_state
*gru_find_thread_state(struct vm_area_struct
*vma
,
356 struct gru_vma_data
*vdata
= vma
->vm_private_data
;
357 struct gru_thread_state
*gts
;
359 spin_lock(&vdata
->vd_lock
);
360 gts
= gru_find_current_gts_nolock(vdata
, tsid
);
361 spin_unlock(&vdata
->vd_lock
);
362 gru_dbg(grudev
, "vma %p, gts %p\n", vma
, gts
);
367 * Allocate a new thread state for a GSEG. Note that races may allow
368 * another thread to race to create a gts.
370 struct gru_thread_state
*gru_alloc_thread_state(struct vm_area_struct
*vma
,
373 struct gru_vma_data
*vdata
= vma
->vm_private_data
;
374 struct gru_thread_state
*gts
, *ngts
;
376 gts
= gru_alloc_gts(vma
, vdata
, tsid
);
380 spin_lock(&vdata
->vd_lock
);
381 ngts
= gru_find_current_gts_nolock(vdata
, tsid
);
385 STAT(gts_double_allocate
);
387 list_add(>s
->ts_next
, &vdata
->vd_head
);
389 spin_unlock(&vdata
->vd_lock
);
390 gru_dbg(grudev
, "vma %p, gts %p\n", vma
, gts
);
395 * Free the GRU context assigned to the thread state.
397 static void gru_free_gru_context(struct gru_thread_state
*gts
)
399 struct gru_state
*gru
;
402 gru_dbg(grudev
, "gts %p, gru %p\n", gts
, gru
);
404 spin_lock(&gru
->gs_lock
);
405 gru
->gs_gts
[gts
->ts_ctxnum
] = NULL
;
406 free_gru_resources(gru
, gts
);
407 BUG_ON(test_bit(gts
->ts_ctxnum
, &gru
->gs_context_map
) == 0);
408 __clear_bit(gts
->ts_ctxnum
, &gru
->gs_context_map
);
409 gts
->ts_ctxnum
= NULLCTX
;
411 spin_unlock(&gru
->gs_lock
);
418 * Prefetching cachelines help hardware performance.
419 * (Strictly a performance enhancement. Not functionally required).
421 static void prefetch_data(void *p
, int num
, int stride
)
429 static inline long gru_copy_handle(void *d
, void *s
)
431 memcpy(d
, s
, GRU_HANDLE_BYTES
);
432 return GRU_HANDLE_BYTES
;
435 static void gru_prefetch_context(void *gseg
, void *cb
, void *cbe
, unsigned long cbrmap
,
436 unsigned long length
)
440 prefetch_data(gseg
+ GRU_DS_BASE
, length
/ GRU_CACHE_LINE_BYTES
,
441 GRU_CACHE_LINE_BYTES
);
443 for_each_cbr_in_allocation_map(i
, &cbrmap
, scr
) {
444 prefetch_data(cb
, 1, GRU_CACHE_LINE_BYTES
);
445 prefetch_data(cbe
+ i
* GRU_HANDLE_STRIDE
, 1,
446 GRU_CACHE_LINE_BYTES
);
447 cb
+= GRU_HANDLE_STRIDE
;
451 static void gru_load_context_data(void *save
, void *grubase
, int ctxnum
,
452 unsigned long cbrmap
, unsigned long dsrmap
)
454 void *gseg
, *cb
, *cbe
;
455 unsigned long length
;
458 gseg
= grubase
+ ctxnum
* GRU_GSEG_STRIDE
;
459 cb
= gseg
+ GRU_CB_BASE
;
460 cbe
= grubase
+ GRU_CBE_BASE
;
461 length
= hweight64(dsrmap
) * GRU_DSR_AU_BYTES
;
462 gru_prefetch_context(gseg
, cb
, cbe
, cbrmap
, length
);
464 for_each_cbr_in_allocation_map(i
, &cbrmap
, scr
) {
465 save
+= gru_copy_handle(cb
, save
);
466 save
+= gru_copy_handle(cbe
+ i
* GRU_HANDLE_STRIDE
, save
);
467 cb
+= GRU_HANDLE_STRIDE
;
470 memcpy(gseg
+ GRU_DS_BASE
, save
, length
);
473 static void gru_unload_context_data(void *save
, void *grubase
, int ctxnum
,
474 unsigned long cbrmap
, unsigned long dsrmap
)
476 void *gseg
, *cb
, *cbe
;
477 unsigned long length
;
480 gseg
= grubase
+ ctxnum
* GRU_GSEG_STRIDE
;
481 cb
= gseg
+ GRU_CB_BASE
;
482 cbe
= grubase
+ GRU_CBE_BASE
;
483 length
= hweight64(dsrmap
) * GRU_DSR_AU_BYTES
;
484 gru_prefetch_context(gseg
, cb
, cbe
, cbrmap
, length
);
486 for_each_cbr_in_allocation_map(i
, &cbrmap
, scr
) {
487 save
+= gru_copy_handle(save
, cb
);
488 save
+= gru_copy_handle(save
, cbe
+ i
* GRU_HANDLE_STRIDE
);
489 cb
+= GRU_HANDLE_STRIDE
;
491 memcpy(save
, gseg
+ GRU_DS_BASE
, length
);
494 void gru_unload_context(struct gru_thread_state
*gts
, int savestate
)
496 struct gru_state
*gru
= gts
->ts_gru
;
497 struct gru_context_configuration_handle
*cch
;
498 int ctxnum
= gts
->ts_ctxnum
;
500 zap_vma_ptes(gts
->ts_vma
, UGRUADDR(gts
), GRU_GSEG_PAGESIZE
);
501 cch
= get_cch(gru
->gs_gru_base_vaddr
, ctxnum
);
503 lock_cch_handle(cch
);
504 if (cch_interrupt_sync(cch
))
506 gru_dbg(grudev
, "gts %p\n", gts
);
508 gru_unload_mm_tracker(gru
, gts
->ts_gms
, gts
->ts_ctxnum
);
510 gru_unload_context_data(gts
->ts_gdata
, gru
->gs_gru_base_vaddr
,
511 ctxnum
, gts
->ts_cbr_map
,
514 if (cch_deallocate(cch
))
516 gts
->ts_force_unload
= 0; /* ts_force_unload locked by CCH lock */
517 unlock_cch_handle(cch
);
519 gru_free_gru_context(gts
);
520 STAT(unload_context
);
524 * Load a GRU context by copying it from the thread data structure in memory
527 static void gru_load_context(struct gru_thread_state
*gts
)
529 struct gru_state
*gru
= gts
->ts_gru
;
530 struct gru_context_configuration_handle
*cch
;
531 int err
, asid
, ctxnum
= gts
->ts_ctxnum
;
533 gru_dbg(grudev
, "gts %p\n", gts
);
534 cch
= get_cch(gru
->gs_gru_base_vaddr
, ctxnum
);
536 lock_cch_handle(cch
);
537 asid
= gru_load_mm_tracker(gru
, gts
->ts_gms
, gts
->ts_ctxnum
);
538 cch
->tfm_fault_bit_enable
=
539 (gts
->ts_user_options
== GRU_OPT_MISS_FMM_POLL
540 || gts
->ts_user_options
== GRU_OPT_MISS_FMM_INTR
);
541 cch
->tlb_int_enable
= (gts
->ts_user_options
== GRU_OPT_MISS_FMM_INTR
);
542 if (cch
->tlb_int_enable
) {
543 gts
->ts_tlb_int_select
= gru_cpu_fault_map_id();
544 cch
->tlb_int_select
= gts
->ts_tlb_int_select
;
546 cch
->tfm_done_bit_enable
= 0;
547 err
= cch_allocate(cch
, asid
, gts
->ts_cbr_map
, gts
->ts_dsr_map
);
550 "err %d: cch %p, gts %p, cbr 0x%lx, dsr 0x%lx\n",
551 err
, cch
, gts
, gts
->ts_cbr_map
, gts
->ts_dsr_map
);
555 gru_load_context_data(gts
->ts_gdata
, gru
->gs_gru_base_vaddr
, ctxnum
,
556 gts
->ts_cbr_map
, gts
->ts_dsr_map
);
560 unlock_cch_handle(cch
);
566 * Update fields in an active CCH:
567 * - retarget interrupts on local blade
568 * - force a delayed context unload by clearing the CCH asids. This
569 * forces TLB misses for new GRU instructions. The context is unloaded
570 * when the next TLB miss occurs.
572 static int gru_update_cch(struct gru_thread_state
*gts
, int int_select
)
574 struct gru_context_configuration_handle
*cch
;
575 struct gru_state
*gru
= gts
->ts_gru
;
576 int i
, ctxnum
= gts
->ts_ctxnum
, ret
= 0;
578 cch
= get_cch(gru
->gs_gru_base_vaddr
, ctxnum
);
580 lock_cch_handle(cch
);
581 if (cch
->state
== CCHSTATE_ACTIVE
) {
582 if (gru
->gs_gts
[gts
->ts_ctxnum
] != gts
)
584 if (cch_interrupt(cch
))
586 if (int_select
>= 0) {
587 gts
->ts_tlb_int_select
= int_select
;
588 cch
->tlb_int_select
= int_select
;
590 for (i
= 0; i
< 8; i
++)
592 cch
->tfm_fault_bit_enable
= 0;
593 cch
->tlb_int_enable
= 0;
594 gts
->ts_force_unload
= 1;
601 unlock_cch_handle(cch
);
606 * Update CCH tlb interrupt select. Required when all the following is true:
607 * - task's GRU context is loaded into a GRU
608 * - task is using interrupt notification for TLB faults
609 * - task has migrated to a different cpu on the same blade where
610 * it was previously running.
612 static int gru_retarget_intr(struct gru_thread_state
*gts
)
614 if (gts
->ts_tlb_int_select
< 0
615 || gts
->ts_tlb_int_select
== gru_cpu_fault_map_id())
618 gru_dbg(grudev
, "retarget from %d to %d\n", gts
->ts_tlb_int_select
,
619 gru_cpu_fault_map_id());
620 return gru_update_cch(gts
, gru_cpu_fault_map_id());
625 * Insufficient GRU resources available on the local blade. Steal a context from
626 * a process. This is a hack until a _real_ resource scheduler is written....
628 #define next_ctxnum(n) ((n) < GRU_NUM_CCH - 2 ? (n) + 1 : 0)
629 #define next_gru(b, g) (((g) < &(b)->bs_grus[GRU_CHIPLETS_PER_BLADE - 1]) ? \
630 ((g)+1) : &(b)->bs_grus[0])
632 static void gru_steal_context(struct gru_thread_state
*gts
)
634 struct gru_blade_state
*blade
;
635 struct gru_state
*gru
, *gru0
;
636 struct gru_thread_state
*ngts
= NULL
;
637 int ctxnum
, ctxnum0
, flag
= 0, cbr
, dsr
;
639 cbr
= gts
->ts_cbr_au_count
;
640 dsr
= gts
->ts_dsr_au_count
;
643 blade
= gru_base
[uv_numa_blade_id()];
644 spin_lock(&blade
->bs_lock
);
646 ctxnum
= next_ctxnum(blade
->bs_lru_ctxnum
);
647 gru
= blade
->bs_lru_gru
;
649 gru
= next_gru(blade
, gru
);
653 if (check_gru_resources(gru
, cbr
, dsr
, GRU_NUM_CCH
))
655 spin_lock(&gru
->gs_lock
);
656 for (; ctxnum
< GRU_NUM_CCH
; ctxnum
++) {
657 if (flag
&& gru
== gru0
&& ctxnum
== ctxnum0
)
659 ngts
= gru
->gs_gts
[ctxnum
];
661 * We are grabbing locks out of order, so trylock is
662 * needed. GTSs are usually not locked, so the odds of
663 * success are high. If trylock fails, try to steal a
666 if (ngts
&& mutex_trylock(&ngts
->ts_ctxlock
))
671 spin_unlock(&gru
->gs_lock
);
672 if (ngts
|| (flag
&& gru
== gru0
&& ctxnum
== ctxnum0
))
675 gru
= next_gru(blade
, gru
);
677 blade
->bs_lru_gru
= gru
;
678 blade
->bs_lru_ctxnum
= ctxnum
;
679 spin_unlock(&blade
->bs_lock
);
684 ngts
->ts_steal_jiffies
= jiffies
;
685 gru_unload_context(ngts
, 1);
686 mutex_unlock(&ngts
->ts_ctxlock
);
688 STAT(steal_context_failed
);
691 "stole gru %x, ctxnum %d from gts %p. Need cb %d, ds %d;"
692 " avail cb %ld, ds %ld\n",
693 gru
->gs_gid
, ctxnum
, ngts
, cbr
, dsr
, hweight64(gru
->gs_cbr_map
),
694 hweight64(gru
->gs_dsr_map
));
698 * Scan the GRUs on the local blade & assign a GRU context.
700 static struct gru_state
*gru_assign_gru_context(struct gru_thread_state
*gts
)
702 struct gru_state
*gru
, *grux
;
703 int i
, max_active_contexts
;
709 max_active_contexts
= GRU_NUM_CCH
;
710 for_each_gru_on_blade(grux
, uv_numa_blade_id(), i
) {
711 if (check_gru_resources(grux
, gts
->ts_cbr_au_count
,
712 gts
->ts_dsr_au_count
,
713 max_active_contexts
)) {
715 max_active_contexts
= grux
->gs_active_contexts
;
716 if (max_active_contexts
== 0)
722 spin_lock(&gru
->gs_lock
);
723 if (!check_gru_resources(gru
, gts
->ts_cbr_au_count
,
724 gts
->ts_dsr_au_count
, GRU_NUM_CCH
)) {
725 spin_unlock(&gru
->gs_lock
);
728 reserve_gru_resources(gru
, gts
);
731 find_first_zero_bit(&gru
->gs_context_map
, GRU_NUM_CCH
);
732 BUG_ON(gts
->ts_ctxnum
== GRU_NUM_CCH
);
733 atomic_inc(>s
->ts_refcnt
);
734 gru
->gs_gts
[gts
->ts_ctxnum
] = gts
;
735 __set_bit(gts
->ts_ctxnum
, &gru
->gs_context_map
);
736 spin_unlock(&gru
->gs_lock
);
738 STAT(assign_context
);
740 "gseg %p, gts %p, gru %x, ctx %d, cbr %d, dsr %d\n",
741 gseg_virtual_address(gts
->ts_gru
, gts
->ts_ctxnum
), gts
,
742 gts
->ts_gru
->gs_gid
, gts
->ts_ctxnum
,
743 gts
->ts_cbr_au_count
, gts
->ts_dsr_au_count
);
745 gru_dbg(grudev
, "failed to allocate a GTS %s\n", "");
746 STAT(assign_context_failed
);
756 * Map the user's GRU segment
758 * Note: gru segments alway mmaped on GRU_GSEG_PAGESIZE boundaries.
760 int gru_fault(struct vm_area_struct
*vma
, struct vm_fault
*vmf
)
762 struct gru_thread_state
*gts
;
763 unsigned long paddr
, vaddr
;
765 vaddr
= (unsigned long)vmf
->virtual_address
;
766 gru_dbg(grudev
, "vma %p, vaddr 0x%lx (0x%lx)\n",
767 vma
, vaddr
, GSEG_BASE(vaddr
));
770 /* The following check ensures vaddr is a valid address in the VMA */
771 gts
= gru_find_thread_state(vma
, TSID(vaddr
, vma
));
773 return VM_FAULT_SIGBUS
;
777 mutex_lock(>s
->ts_ctxlock
);
779 if (gts
->ts_gru
->gs_blade_id
!= uv_numa_blade_id()) {
780 STAT(migrated_nopfn_unload
);
781 gru_unload_context(gts
, 1);
783 if (gru_retarget_intr(gts
))
784 STAT(migrated_nopfn_retarget
);
789 if (!gru_assign_gru_context(gts
)) {
790 mutex_unlock(>s
->ts_ctxlock
);
792 schedule_timeout(GRU_ASSIGN_DELAY
); /* true hack ZZZ */
793 if (gts
->ts_steal_jiffies
+ GRU_STEAL_DELAY
< jiffies
)
794 gru_steal_context(gts
);
797 gru_load_context(gts
);
798 paddr
= gseg_physical_address(gts
->ts_gru
, gts
->ts_ctxnum
);
799 remap_pfn_range(vma
, vaddr
& ~(GRU_GSEG_PAGESIZE
- 1),
800 paddr
>> PAGE_SHIFT
, GRU_GSEG_PAGESIZE
,
804 mutex_unlock(>s
->ts_ctxlock
);
807 return VM_FAULT_NOPAGE
;