kernel: remove unused utsname_set_machine()
[unleashed.git] / usr / src / uts / sun4u / ngdr / io / dr_mem.c
blobccb70b83cb99dbd797a355fbc48b13eeb8f523dd
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * DR memory support routines.
31 #include <sys/note.h>
32 #include <sys/debug.h>
33 #include <sys/types.h>
34 #include <sys/errno.h>
35 #include <sys/param.h>
36 #include <sys/dditypes.h>
37 #include <sys/kmem.h>
38 #include <sys/conf.h>
39 #include <sys/ddi.h>
40 #include <sys/sunddi.h>
41 #include <sys/sunndi.h>
42 #include <sys/ddi_impldefs.h>
43 #include <sys/ndi_impldefs.h>
44 #include <sys/sysmacros.h>
45 #include <sys/machsystm.h>
46 #include <sys/spitregs.h>
47 #include <sys/cpuvar.h>
48 #include <sys/promif.h>
49 #include <vm/seg_kmem.h>
50 #include <sys/lgrp.h>
51 #include <sys/platform_module.h>
53 #include <vm/page.h>
55 #include <sys/dr.h>
56 #include <sys/dr_util.h>
58 extern struct memlist *phys_install;
60 /* TODO: push this reference below drmach line */
61 extern int kcage_on;
63 /* for the DR*INTERNAL_ERROR macros. see sys/dr.h. */
64 static char *dr_ie_fmt = "dr_mem.c %d";
66 static int dr_post_detach_mem_unit(dr_mem_unit_t *mp);
67 static int dr_reserve_mem_spans(memhandle_t *mhp, struct memlist *mlist);
68 static int dr_select_mem_target(dr_handle_t *hp, dr_mem_unit_t *mp,
69 struct memlist *ml);
70 static void dr_init_mem_unit_data(dr_mem_unit_t *mp);
72 static int memlist_canfit(struct memlist *s_mlist,
73 struct memlist *t_mlist);
76 * dr_mem_unit_t.sbm_flags
78 #define DR_MFLAG_RESERVED 0x01 /* mem unit reserved for delete */
79 #define DR_MFLAG_SOURCE 0x02 /* source brd of copy/rename op */
80 #define DR_MFLAG_TARGET 0x04 /* target brd of copy/rename op */
81 #define DR_MFLAG_MEMUPSIZE 0x08 /* move from big to small board */
82 #define DR_MFLAG_MEMDOWNSIZE 0x10 /* move from small to big board */
83 #define DR_MFLAG_MEMRESIZE 0x18 /* move to different size board */
84 #define DR_MFLAG_RELOWNER 0x20 /* memory release (delete) owner */
85 #define DR_MFLAG_RELDONE 0x40 /* memory release (delete) done */
87 /* helper macros */
88 #define _ptob64(p) ((uint64_t)(p) << PAGESHIFT)
89 #define _b64top(b) ((pgcnt_t)((b) >> PAGESHIFT))
91 static struct memlist *
92 dr_get_memlist(dr_mem_unit_t *mp)
94 struct memlist *mlist = NULL;
95 sbd_error_t *err;
96 static fn_t f = "dr_get_memlist";
98 PR_MEM("%s for %s...\n", f, mp->sbm_cm.sbdev_path);
101 * Return cached memlist, if present.
102 * This memlist will be present following an
103 * unconfigure (a.k.a: detach) of this memunit.
104 * It should only be used in the case were a configure
105 * is bringing this memunit back in without going
106 * through the disconnect and connect states.
108 if (mp->sbm_mlist) {
109 PR_MEM("%s: found cached memlist\n", f);
111 mlist = memlist_dup(mp->sbm_mlist);
112 } else {
113 uint64_t basepa = _ptob64(mp->sbm_basepfn);
115 /* attempt to construct a memlist using phys_install */
117 /* round down to slice base address */
118 basepa &= ~(mp->sbm_slice_size - 1);
120 /* get a copy of phys_install to edit */
121 memlist_read_lock();
122 mlist = memlist_dup(phys_install);
123 memlist_read_unlock();
125 /* trim lower irrelevant span */
126 if (mlist)
127 mlist = memlist_del_span(mlist, 0ull, basepa);
129 /* trim upper irrelevant span */
130 if (mlist) {
131 uint64_t endpa;
133 basepa += mp->sbm_slice_size;
134 endpa = _ptob64(physmax + 1);
135 if (endpa > basepa)
136 mlist = memlist_del_span(
137 mlist,
138 basepa,
139 endpa - basepa);
142 if (mlist) {
143 /* successfully built a memlist */
144 PR_MEM("%s: derived memlist from phys_install\n", f);
147 /* if no mlist yet, try platform layer */
148 if (!mlist) {
149 err = drmach_mem_get_memlist(
150 mp->sbm_cm.sbdev_id, &mlist);
151 if (err) {
152 DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
153 mlist = NULL; /* paranoia */
158 PR_MEM("%s: memlist for %s\n", f, mp->sbm_cm.sbdev_path);
159 PR_MEMLIST_DUMP(mlist);
161 return (mlist);
164 typedef struct {
165 kcondvar_t cond;
166 kmutex_t lock;
167 int error;
168 int done;
169 } dr_release_mem_sync_t;
172 * Memory has been logically removed by the time this routine is called.
174 static void
175 dr_mem_del_done(void *arg, int error)
177 dr_release_mem_sync_t *ds = arg;
179 mutex_enter(&ds->lock);
180 ds->error = error;
181 ds->done = 1;
182 cv_signal(&ds->cond);
183 mutex_exit(&ds->lock);
187 * When we reach here the memory being drained should have
188 * already been reserved in dr_pre_release_mem().
189 * Our only task here is to kick off the "drain" and wait
190 * for it to finish.
192 void
193 dr_release_mem(dr_common_unit_t *cp)
195 dr_mem_unit_t *mp = (dr_mem_unit_t *)cp;
196 int err;
197 dr_release_mem_sync_t rms;
198 static fn_t f = "dr_release_mem";
200 /* check that this memory unit has been reserved */
201 if (!(mp->sbm_flags & DR_MFLAG_RELOWNER)) {
202 DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
203 return;
206 bzero((void *) &rms, sizeof (rms));
208 mutex_init(&rms.lock, NULL, MUTEX_DRIVER, NULL);
209 cv_init(&rms.cond, NULL, CV_DRIVER, NULL);
211 mutex_enter(&rms.lock);
212 err = kphysm_del_start(mp->sbm_memhandle, dr_mem_del_done,
213 (void *) &rms);
214 if (err == KPHYSM_OK) {
215 /* wait for completion or interrupt */
216 while (!rms.done) {
217 if (cv_wait_sig(&rms.cond, &rms.lock) == 0) {
218 /* then there is a pending UNIX signal */
219 (void) kphysm_del_cancel(mp->sbm_memhandle);
221 /* wait for completion */
222 while (!rms.done)
223 cv_wait(&rms.cond, &rms.lock);
226 /* get the result of the memory delete operation */
227 err = rms.error;
229 mutex_exit(&rms.lock);
231 cv_destroy(&rms.cond);
232 mutex_destroy(&rms.lock);
234 if (err != KPHYSM_OK) {
235 int e_code;
237 switch (err) {
238 case KPHYSM_ENOWORK:
239 e_code = ESBD_NOERROR;
240 break;
242 case KPHYSM_EHANDLE:
243 case KPHYSM_ESEQUENCE:
244 e_code = ESBD_INTERNAL;
245 break;
247 case KPHYSM_ENOTVIABLE:
248 e_code = ESBD_MEM_NOTVIABLE;
249 break;
251 case KPHYSM_EREFUSED:
252 e_code = ESBD_MEM_REFUSED;
253 break;
255 case KPHYSM_ENONRELOC:
256 e_code = ESBD_MEM_NONRELOC;
257 break;
259 case KPHYSM_ECANCELLED:
260 e_code = ESBD_MEM_CANCELLED;
261 break;
263 case KPHYSM_ERESOURCE:
264 e_code = ESBD_MEMFAIL;
265 break;
267 default:
268 cmn_err(CE_WARN,
269 "%s: unexpected kphysm error code %d,"
270 " id 0x%p",
271 f, err, mp->sbm_cm.sbdev_id);
273 e_code = ESBD_IO;
274 break;
277 if (e_code != ESBD_NOERROR) {
278 dr_dev_err(CE_IGNORE, &mp->sbm_cm, e_code);
283 void
284 dr_attach_mem(dr_handle_t *hp, dr_common_unit_t *cp)
286 _NOTE(ARGUNUSED(hp))
288 dr_mem_unit_t *mp = (dr_mem_unit_t *)cp;
289 struct memlist *ml, *mc;
290 sbd_error_t *err;
291 static fn_t f = "dr_attach_mem";
293 PR_MEM("%s...\n", f);
295 dr_lock_status(hp->h_bd);
296 err = drmach_configure(cp->sbdev_id, 0);
297 dr_unlock_status(hp->h_bd);
298 if (err) {
299 DRERR_SET_C(&cp->sbdev_error, &err);
300 return;
303 ml = dr_get_memlist(mp);
304 for (mc = ml; mc; mc = mc->ml_next) {
305 int rv;
306 sbd_error_t *err;
308 rv = kphysm_add_memory_dynamic(
309 (pfn_t)(mc->ml_address >> PAGESHIFT),
310 (pgcnt_t)(mc->ml_size >> PAGESHIFT));
311 if (rv != KPHYSM_OK) {
313 * translate kphysm error and
314 * store in devlist error
316 switch (rv) {
317 case KPHYSM_ERESOURCE:
318 rv = ESBD_NOMEM;
319 break;
321 case KPHYSM_EFAULT:
322 rv = ESBD_FAULT;
323 break;
325 default:
326 rv = ESBD_INTERNAL;
327 break;
330 if (rv == ESBD_INTERNAL) {
331 DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
332 } else
333 dr_dev_err(CE_WARN, &mp->sbm_cm, rv);
334 break;
337 err = drmach_mem_add_span(
338 mp->sbm_cm.sbdev_id, mc->ml_address, mc->ml_size);
339 if (err) {
340 DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
341 break;
345 memlist_delete(ml);
347 /* back out if configure failed */
348 if (mp->sbm_cm.sbdev_error != NULL) {
349 dr_lock_status(hp->h_bd);
350 err = drmach_unconfigure(cp->sbdev_id,
351 DEVI_BRANCH_DESTROY);
352 if (err)
353 sbd_err_clear(&err);
354 dr_unlock_status(hp->h_bd);
358 #define DR_SCRUB_VALUE 0x0d0e0a0d0b0e0e0fULL
360 static void
361 dr_mem_ecache_scrub(dr_mem_unit_t *mp, struct memlist *mlist)
363 #ifdef DEBUG
364 clock_t stime = ddi_get_lbolt();
365 #endif /* DEBUG */
367 struct memlist *ml;
368 uint64_t scrub_value = DR_SCRUB_VALUE;
369 processorid_t cpuid;
370 static fn_t f = "dr_mem_ecache_scrub";
372 cpuid = drmach_mem_cpu_affinity(mp->sbm_cm.sbdev_id);
373 affinity_set(cpuid);
375 PR_MEM("%s: using proc %d, memlist...\n", f,
376 (cpuid == CPU_CURRENT) ? CPU->cpu_id : cpuid);
377 PR_MEMLIST_DUMP(mlist);
379 for (ml = mlist; ml; ml = ml->ml_next) {
380 uint64_t dst_pa;
381 uint64_t nbytes;
383 /* calculate the destination physical address */
384 dst_pa = ml->ml_address;
385 if (ml->ml_address & PAGEOFFSET)
386 cmn_err(CE_WARN,
387 "%s: address (0x%lx) not on "
388 "page boundary", f, ml->ml_address);
390 nbytes = ml->ml_size;
391 if (ml->ml_size & PAGEOFFSET)
392 cmn_err(CE_WARN,
393 "%s: size (0x%lx) not on "
394 "page boundary", f, ml->ml_size);
396 /*LINTED*/
397 while (nbytes > 0) {
398 /* write 64 bits to dst_pa */
399 stdphys(dst_pa, scrub_value);
401 /* increment/decrement by cacheline sizes */
402 dst_pa += DRMACH_COHERENCY_UNIT;
403 nbytes -= DRMACH_COHERENCY_UNIT;
408 * flush this cpu's ecache and take care to ensure
409 * that all of it's bus transactions have retired.
411 drmach_cpu_flush_ecache_sync();
413 affinity_clear();
415 #ifdef DEBUG
416 stime = ddi_get_lbolt() - stime;
417 PR_MEM("%s: scrub ticks = %ld (%ld secs)\n", f, stime, stime / hz);
418 #endif /* DEBUG */
421 static int
422 dr_move_memory(dr_handle_t *hp, dr_mem_unit_t *s_mp, dr_mem_unit_t *t_mp)
424 time_t copytime;
425 drmachid_t cr_id;
426 dr_sr_handle_t *srhp;
427 struct memlist *c_ml, *d_ml;
428 sbd_error_t *err;
429 static fn_t f = "dr_move_memory";
431 PR_MEM("%s: (INLINE) moving memory from %s to %s\n",
433 s_mp->sbm_cm.sbdev_path,
434 t_mp->sbm_cm.sbdev_path);
436 ASSERT(s_mp->sbm_flags & DR_MFLAG_SOURCE);
437 ASSERT(s_mp->sbm_peer == t_mp);
438 ASSERT(s_mp->sbm_mlist);
440 ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
441 ASSERT(t_mp->sbm_peer == s_mp);
444 * create a memlist of spans to copy by removing
445 * the spans that have been deleted, if any, from
446 * the full source board memlist. s_mp->sbm_del_mlist
447 * will be NULL if there were no spans deleted from
448 * the source board.
450 c_ml = memlist_dup(s_mp->sbm_mlist);
451 d_ml = s_mp->sbm_del_mlist;
452 while (d_ml != NULL) {
453 c_ml = memlist_del_span(c_ml, d_ml->ml_address, d_ml->ml_size);
454 d_ml = d_ml->ml_next;
457 affinity_set(drmach_mem_cpu_affinity(t_mp->sbm_cm.sbdev_id));
459 err = drmach_copy_rename_init(
460 t_mp->sbm_cm.sbdev_id, _ptob64(t_mp->sbm_slice_offset),
461 s_mp->sbm_cm.sbdev_id, c_ml, &cr_id);
462 if (err) {
463 DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
464 affinity_clear();
465 return (-1);
468 srhp = dr_get_sr_handle(hp);
469 ASSERT(srhp);
471 copytime = ddi_get_lbolt();
473 /* Quiesce the OS. */
474 if (dr_suspend(srhp)) {
475 cmn_err(CE_WARN, "%s: failed to quiesce OS"
476 " for copy-rename", f);
478 dr_release_sr_handle(srhp);
479 err = drmach_copy_rename_fini(cr_id);
480 if (err) {
482 * no error is expected since the program has
483 * not yet run.
486 /* catch this in debug kernels */
487 ASSERT(0);
489 sbd_err_clear(&err);
492 /* suspend error reached via hp */
493 s_mp->sbm_cm.sbdev_error = hp->h_err;
494 hp->h_err = NULL;
496 affinity_clear();
497 return (-1);
501 * Rename memory for lgroup.
502 * Source and target board numbers are packaged in arg.
505 dr_board_t *t_bp, *s_bp;
507 s_bp = s_mp->sbm_cm.sbdev_bp;
508 t_bp = t_mp->sbm_cm.sbdev_bp;
510 lgrp_plat_config(LGRP_CONFIG_MEM_RENAME,
511 (uintptr_t)(s_bp->b_num | (t_bp->b_num << 16)));
514 drmach_copy_rename(cr_id);
516 /* Resume the OS. */
517 dr_resume(srhp);
519 copytime = ddi_get_lbolt() - copytime;
521 dr_release_sr_handle(srhp);
522 err = drmach_copy_rename_fini(cr_id);
523 if (err)
524 DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
526 affinity_clear();
528 PR_MEM("%s: copy-rename elapsed time = %ld ticks (%ld secs)\n",
529 f, copytime, copytime / hz);
531 /* return -1 if dr_suspend or copy/rename recorded an error */
532 return (err == NULL ? 0 : -1);
536 * If detaching node contains memory that is "non-permanent"
537 * then the memory adr's are simply cleared. If the memory
538 * is non-relocatable, then do a copy-rename.
540 void
541 dr_detach_mem(dr_handle_t *hp, dr_common_unit_t *cp)
543 int rv = 0;
544 dr_mem_unit_t *s_mp = (dr_mem_unit_t *)cp;
545 dr_mem_unit_t *t_mp;
546 dr_state_t state;
547 static fn_t f = "dr_detach_mem";
549 PR_MEM("%s...\n", f);
551 /* lookup target mem unit and target board structure, if any */
552 if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
553 t_mp = s_mp->sbm_peer;
554 ASSERT(t_mp != NULL);
555 ASSERT(t_mp->sbm_peer == s_mp);
556 } else {
557 t_mp = NULL;
560 /* verify mem unit's state is UNREFERENCED */
561 state = s_mp->sbm_cm.sbdev_state;
562 if (state != DR_STATE_UNREFERENCED) {
563 dr_dev_err(CE_IGNORE, &s_mp->sbm_cm, ESBD_STATE);
564 return;
567 /* verify target mem unit's state is UNREFERENCED, if any */
568 if (t_mp != NULL) {
569 state = t_mp->sbm_cm.sbdev_state;
570 if (state != DR_STATE_UNREFERENCED) {
571 dr_dev_err(CE_IGNORE, &t_mp->sbm_cm, ESBD_STATE);
572 return;
577 * Scrub deleted memory. This will cause all cachelines
578 * referencing the memory to only be in the local cpu's
579 * ecache.
581 if (s_mp->sbm_flags & DR_MFLAG_RELDONE) {
582 /* no del mlist for src<=dst mem size copy/rename */
583 if (s_mp->sbm_del_mlist)
584 dr_mem_ecache_scrub(s_mp, s_mp->sbm_del_mlist);
586 if (t_mp != NULL && (t_mp->sbm_flags & DR_MFLAG_RELDONE)) {
587 ASSERT(t_mp->sbm_del_mlist);
588 dr_mem_ecache_scrub(t_mp, t_mp->sbm_del_mlist);
592 * If there is no target board (no copy/rename was needed), then
593 * we're done!
595 if (t_mp == NULL) {
596 sbd_error_t *err;
598 * Reprogram interconnect hardware and disable
599 * memory controllers for memory node that's going away.
602 err = drmach_mem_disable(s_mp->sbm_cm.sbdev_id);
603 if (err) {
604 DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
605 rv = -1;
607 } else {
608 rv = dr_move_memory(hp, s_mp, t_mp);
609 PR_MEM("%s: %s memory COPY-RENAME (board %d -> %d)\n",
611 rv ? "FAILED" : "COMPLETED",
612 s_mp->sbm_cm.sbdev_bp->b_num,
613 t_mp->sbm_cm.sbdev_bp->b_num);
615 if (rv != 0)
616 (void) dr_cancel_mem(s_mp);
619 if (rv == 0) {
620 sbd_error_t *err;
622 dr_lock_status(hp->h_bd);
623 err = drmach_unconfigure(s_mp->sbm_cm.sbdev_id,
624 DEVI_BRANCH_DESTROY);
625 dr_unlock_status(hp->h_bd);
626 if (err)
627 sbd_err_clear(&err);
631 #ifndef _STARFIRE
633 * XXX workaround for certain lab configurations (see also starcat drmach.c)
634 * Temporary code to get around observed incorrect results from
635 * kphysm_del_span_query when the queried span contains address spans
636 * not occupied by memory in between spans that do have memory.
637 * This routine acts as a wrapper to kphysm_del_span_query. It builds
638 * a memlist from phys_install of spans that exist between base and
639 * base + npages, inclusively. Kphysm_del_span_query is called for each
640 * node in the memlist with the results accumulated in *mp.
642 static int
643 dr_del_span_query(pfn_t base, pgcnt_t npages, memquery_t *mp)
645 uint64_t pa = _ptob64(base);
646 uint64_t sm = ~ (137438953472ull - 1);
647 uint64_t sa = pa & sm;
648 struct memlist *mlist, *ml;
649 int rv;
651 npages = npages; /* silence lint */
652 memlist_read_lock();
653 mlist = memlist_dup(phys_install);
654 memlist_read_unlock();
656 again:
657 for (ml = mlist; ml; ml = ml->ml_next) {
658 if ((ml->ml_address & sm) != sa) {
659 mlist = memlist_del_span(mlist,
660 ml->ml_address, ml->ml_size);
661 goto again;
665 mp->phys_pages = 0;
666 mp->managed = 0;
667 mp->nonrelocatable = 0;
668 mp->first_nonrelocatable = (pfn_t)-1; /* XXX */
669 mp->last_nonrelocatable = 0;
671 for (ml = mlist; ml; ml = ml->ml_next) {
672 memquery_t mq;
674 rv = kphysm_del_span_query(
675 _b64top(ml->ml_address), _b64top(ml->ml_size), &mq);
676 if (rv)
677 break;
679 mp->phys_pages += mq.phys_pages;
680 mp->managed += mq.managed;
681 mp->nonrelocatable += mq.nonrelocatable;
683 if (mq.nonrelocatable != 0) {
684 if (mq.first_nonrelocatable < mp->first_nonrelocatable)
685 mp->first_nonrelocatable =
686 mq.first_nonrelocatable;
687 if (mq.last_nonrelocatable > mp->last_nonrelocatable)
688 mp->last_nonrelocatable =
689 mq.last_nonrelocatable;
693 if (mp->nonrelocatable == 0)
694 mp->first_nonrelocatable = 0; /* XXX */
696 memlist_delete(mlist);
697 return (rv);
700 #define kphysm_del_span_query dr_del_span_query
701 #endif /* _STARFIRE */
704 * NOTE: This routine is only partially smart about multiple
705 * mem-units. Need to make mem-status structure smart
706 * about them also.
709 dr_mem_status(dr_handle_t *hp, dr_devset_t devset, sbd_dev_stat_t *dsp)
711 int m, mix;
712 memdelstat_t mdst;
713 memquery_t mq;
714 dr_board_t *bp;
715 dr_mem_unit_t *mp;
716 sbd_mem_stat_t *msp;
717 static fn_t f = "dr_mem_status";
719 bp = hp->h_bd;
720 devset &= DR_DEVS_PRESENT(bp);
722 for (m = mix = 0; m < MAX_MEM_UNITS_PER_BOARD; m++) {
723 int rv;
724 sbd_error_t *err;
725 drmach_status_t pstat;
726 dr_mem_unit_t *p_mp;
728 if (DEVSET_IN_SET(devset, SBD_COMP_MEM, m) == 0)
729 continue;
731 mp = dr_get_mem_unit(bp, m);
733 if (mp->sbm_cm.sbdev_state == DR_STATE_EMPTY) {
734 /* present, but not fully initialized */
735 continue;
738 if (mp->sbm_cm.sbdev_id == (drmachid_t)0)
739 continue;
741 /* fetch platform status */
742 err = drmach_status(mp->sbm_cm.sbdev_id, &pstat);
743 if (err) {
744 DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
745 continue;
748 msp = &dsp->d_mem;
749 bzero((caddr_t)msp, sizeof (*msp));
751 (void) strncpy(msp->ms_cm.c_id.c_name, pstat.type,
752 sizeof (msp->ms_cm.c_id.c_name));
753 msp->ms_cm.c_id.c_type = mp->sbm_cm.sbdev_type;
754 msp->ms_cm.c_id.c_unit = SBD_NULL_UNIT;
755 msp->ms_cm.c_cond = mp->sbm_cm.sbdev_cond;
756 msp->ms_cm.c_busy = mp->sbm_cm.sbdev_busy | pstat.busy;
757 msp->ms_cm.c_time = mp->sbm_cm.sbdev_time;
758 msp->ms_cm.c_ostate = mp->sbm_cm.sbdev_ostate;
760 msp->ms_totpages = mp->sbm_npages;
761 msp->ms_basepfn = mp->sbm_basepfn;
762 msp->ms_pageslost = mp->sbm_pageslost;
763 msp->ms_cage_enabled = kcage_on;
765 if (mp->sbm_flags & DR_MFLAG_RESERVED)
766 p_mp = mp->sbm_peer;
767 else
768 p_mp = NULL;
770 if (p_mp == NULL) {
771 msp->ms_peer_is_target = 0;
772 msp->ms_peer_ap_id[0] = '\0';
773 } else if (p_mp->sbm_flags & DR_MFLAG_RESERVED) {
774 char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
775 char *minor;
778 * b_dip doesn't have to be held for ddi_pathname()
779 * because the board struct (dr_board_t) will be
780 * destroyed before b_dip detaches.
782 (void) ddi_pathname(bp->b_dip, path);
783 minor = strchr(p_mp->sbm_cm.sbdev_path, ':');
785 (void) snprintf(msp->ms_peer_ap_id,
786 sizeof (msp->ms_peer_ap_id), "%s%s",
787 path, (minor == NULL) ? "" : minor);
789 kmem_free(path, MAXPATHLEN);
791 if (p_mp->sbm_flags & DR_MFLAG_TARGET)
792 msp->ms_peer_is_target = 1;
795 if (mp->sbm_flags & DR_MFLAG_RELOWNER)
796 rv = kphysm_del_status(mp->sbm_memhandle, &mdst);
797 else
798 rv = KPHYSM_EHANDLE; /* force 'if' to fail */
800 if (rv == KPHYSM_OK) {
802 * Any pages above managed is "free",
803 * i.e. it's collected.
805 msp->ms_detpages += (uint_t)(mdst.collected +
806 mdst.phys_pages - mdst.managed);
807 } else {
809 * If we're UNREFERENCED or UNCONFIGURED,
810 * then the number of detached pages is
811 * however many pages are on the board.
812 * I.e. detached = not in use by OS.
814 switch (msp->ms_cm.c_ostate) {
816 * changed to use cfgadm states
818 * was:
819 * case DR_STATE_UNREFERENCED:
820 * case DR_STATE_UNCONFIGURED:
822 case SBD_STAT_UNCONFIGURED:
823 msp->ms_detpages = msp->ms_totpages;
824 break;
826 default:
827 break;
832 * kphysm_del_span_query can report non-reloc pages = total
833 * pages for memory that is not yet configured
835 if (mp->sbm_cm.sbdev_state != DR_STATE_UNCONFIGURED) {
837 rv = kphysm_del_span_query(mp->sbm_basepfn,
838 mp->sbm_npages, &mq);
840 if (rv == KPHYSM_OK) {
841 msp->ms_managed_pages = mq.managed;
842 msp->ms_noreloc_pages = mq.nonrelocatable;
843 msp->ms_noreloc_first =
844 mq.first_nonrelocatable;
845 msp->ms_noreloc_last =
846 mq.last_nonrelocatable;
847 msp->ms_cm.c_sflags = 0;
848 if (mq.nonrelocatable) {
849 SBD_SET_SUSPEND(SBD_CMD_UNCONFIGURE,
850 msp->ms_cm.c_sflags);
852 } else {
853 PR_MEM("%s: kphysm_del_span_query() = %d\n",
854 f, rv);
859 * Check source unit state during copy-rename
861 if ((mp->sbm_flags & DR_MFLAG_SOURCE) &&
862 (mp->sbm_cm.sbdev_state == DR_STATE_UNREFERENCED ||
863 mp->sbm_cm.sbdev_state == DR_STATE_RELEASE))
864 msp->ms_cm.c_ostate = SBD_STAT_CONFIGURED;
866 mix++;
867 dsp++;
870 return (mix);
874 dr_pre_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
876 _NOTE(ARGUNUSED(hp))
878 int err_flag = 0;
879 int d;
880 sbd_error_t *err;
881 static fn_t f = "dr_pre_attach_mem";
883 PR_MEM("%s...\n", f);
885 for (d = 0; d < devnum; d++) {
886 dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d];
887 dr_state_t state;
889 cmn_err(CE_CONT, "OS configure %s", mp->sbm_cm.sbdev_path);
891 state = mp->sbm_cm.sbdev_state;
892 switch (state) {
893 case DR_STATE_UNCONFIGURED:
894 PR_MEM("%s: recovering from UNCONFIG for %s\n",
896 mp->sbm_cm.sbdev_path);
898 /* use memlist cached by dr_post_detach_mem_unit */
899 ASSERT(mp->sbm_mlist != NULL);
900 PR_MEM("%s: re-configuring cached memlist for %s:\n",
901 f, mp->sbm_cm.sbdev_path);
902 PR_MEMLIST_DUMP(mp->sbm_mlist);
904 /* kphysm del handle should be have been freed */
905 ASSERT((mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
907 /*FALLTHROUGH*/
909 case DR_STATE_CONNECTED:
910 PR_MEM("%s: reprogramming mem hardware on %s\n",
911 f, mp->sbm_cm.sbdev_bp->b_path);
913 PR_MEM("%s: enabling %s\n",
914 f, mp->sbm_cm.sbdev_path);
916 err = drmach_mem_enable(mp->sbm_cm.sbdev_id);
917 if (err) {
918 DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
919 err_flag = 1;
921 break;
923 default:
924 dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_STATE);
925 err_flag = 1;
926 break;
929 /* exit for loop if error encountered */
930 if (err_flag)
931 break;
934 return (err_flag ? -1 : 0);
938 dr_post_attach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
940 _NOTE(ARGUNUSED(hp))
942 int d;
943 static fn_t f = "dr_post_attach_mem";
945 PR_MEM("%s...\n", f);
947 for (d = 0; d < devnum; d++) {
948 dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d];
949 struct memlist *mlist, *ml;
951 mlist = dr_get_memlist(mp);
952 if (mlist == NULL) {
953 dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_MEMFAIL);
954 continue;
958 * Verify the memory really did successfully attach
959 * by checking for its existence in phys_install.
961 memlist_read_lock();
962 if (memlist_intersect(phys_install, mlist) == 0) {
963 memlist_read_unlock();
965 DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
967 PR_MEM("%s: %s memlist not in phys_install",
968 f, mp->sbm_cm.sbdev_path);
970 memlist_delete(mlist);
971 continue;
973 memlist_read_unlock();
975 for (ml = mlist; ml != NULL; ml = ml->ml_next) {
976 sbd_error_t *err;
978 err = drmach_mem_add_span(
979 mp->sbm_cm.sbdev_id,
980 ml->ml_address,
981 ml->ml_size);
982 if (err)
983 DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
986 memlist_delete(mlist);
989 * Destroy cached memlist, if any.
990 * There will be a cached memlist in sbm_mlist if
991 * this board is being configured directly after
992 * an unconfigure.
993 * To support this transition, dr_post_detach_mem
994 * left a copy of the last known memlist in sbm_mlist.
995 * This memlist could differ from any derived from
996 * hardware if while this memunit was last configured
997 * the system detected and deleted bad pages from
998 * phys_install. The location of those bad pages
999 * will be reflected in the cached memlist.
1001 if (mp->sbm_mlist) {
1002 memlist_delete(mp->sbm_mlist);
1003 mp->sbm_mlist = NULL;
1007 * TODO: why is this call to dr_init_mem_unit_data here?
1008 * this has been done at discovery or connect time, so this is
1009 * probably redundant and unnecessary.
1011 dr_init_mem_unit_data(mp);
1014 return (0);
1018 dr_pre_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
1020 _NOTE(ARGUNUSED(hp))
1022 int d;
1024 for (d = 0; d < devnum; d++) {
1025 dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d];
1027 cmn_err(CE_CONT, "OS unconfigure %s", mp->sbm_cm.sbdev_path);
1030 return (0);
1035 dr_post_detach_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
1037 _NOTE(ARGUNUSED(hp))
1039 int d, rv;
1040 static fn_t f = "dr_post_detach_mem";
1042 PR_MEM("%s...\n", f);
1044 rv = 0;
1045 for (d = 0; d < devnum; d++) {
1046 dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d];
1048 ASSERT(mp->sbm_cm.sbdev_bp == hp->h_bd);
1050 if (dr_post_detach_mem_unit(mp))
1051 rv = -1;
1054 return (rv);
1057 static void
1058 dr_add_memory_spans(dr_mem_unit_t *mp, struct memlist *ml)
1060 static fn_t f = "dr_add_memory_spans";
1062 PR_MEM("%s...", f);
1063 PR_MEMLIST_DUMP(ml);
1065 #ifdef DEBUG
1066 memlist_read_lock();
1067 if (memlist_intersect(phys_install, ml)) {
1068 PR_MEM("%s:WARNING: memlist intersects with phys_install\n", f);
1070 memlist_read_unlock();
1071 #endif
1073 for (; ml; ml = ml->ml_next) {
1074 pfn_t base;
1075 pgcnt_t npgs;
1076 int rv;
1077 sbd_error_t *err;
1079 base = _b64top(ml->ml_address);
1080 npgs = _b64top(ml->ml_size);
1082 rv = kphysm_add_memory_dynamic(base, npgs);
1084 err = drmach_mem_add_span(
1085 mp->sbm_cm.sbdev_id,
1086 ml->ml_address,
1087 ml->ml_size);
1089 if (err)
1090 DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
1092 if (rv != KPHYSM_OK) {
1093 cmn_err(CE_WARN, "%s:"
1094 " unexpected kphysm_add_memory_dynamic"
1095 " return value %d;"
1096 " basepfn=0x%lx, npages=%ld\n",
1097 f, rv, base, npgs);
1099 continue;
1104 static int
1105 dr_post_detach_mem_unit(dr_mem_unit_t *s_mp)
1107 uint64_t sz = s_mp->sbm_slice_size;
1108 uint64_t sm = sz - 1;
1109 /* old and new below refer to PAs before and after copy-rename */
1110 uint64_t s_old_basepa, s_new_basepa;
1111 uint64_t t_old_basepa, t_new_basepa;
1112 uint64_t t_new_smallsize = 0;
1113 dr_mem_unit_t *t_mp, *x_mp;
1114 struct memlist *ml;
1115 int rv;
1116 sbd_error_t *err;
1117 static fn_t f = "dr_post_detach_mem_unit";
1119 PR_MEM("%s...\n", f);
1121 /* s_mp->sbm_del_mlist could be NULL, meaning no deleted spans */
1122 PR_MEM("%s: %s: deleted memlist (EMPTY maybe okay):\n",
1123 f, s_mp->sbm_cm.sbdev_path);
1124 PR_MEMLIST_DUMP(s_mp->sbm_del_mlist);
1126 /* sanity check */
1127 ASSERT(s_mp->sbm_del_mlist == NULL ||
1128 (s_mp->sbm_flags & DR_MFLAG_RELDONE) != 0);
1130 if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
1131 t_mp = s_mp->sbm_peer;
1132 ASSERT(t_mp != NULL);
1133 ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
1134 ASSERT(t_mp->sbm_peer == s_mp);
1136 ASSERT(t_mp->sbm_flags & DR_MFLAG_RELDONE);
1137 ASSERT(t_mp->sbm_del_mlist);
1139 PR_MEM("%s: target %s: deleted memlist:\n",
1140 f, t_mp->sbm_cm.sbdev_path);
1141 PR_MEMLIST_DUMP(t_mp->sbm_del_mlist);
1142 } else {
1143 /* this is no target unit */
1144 t_mp = NULL;
1148 * Verify the memory really did successfully detach
1149 * by checking for its non-existence in phys_install.
1151 rv = 0;
1152 memlist_read_lock();
1153 if (s_mp->sbm_flags & DR_MFLAG_RELDONE) {
1154 x_mp = s_mp;
1155 rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist);
1157 if (rv == 0 && t_mp && (t_mp->sbm_flags & DR_MFLAG_RELDONE)) {
1158 x_mp = t_mp;
1159 rv = memlist_intersect(phys_install, x_mp->sbm_del_mlist);
1161 memlist_read_unlock();
1163 if (rv) {
1164 /* error: memlist still in phys_install */
1165 DR_DEV_INTERNAL_ERROR(&x_mp->sbm_cm);
1169 * clean mem unit state and bail out if an error has been recorded.
1171 rv = 0;
1172 if (s_mp->sbm_cm.sbdev_error) {
1173 PR_MEM("%s: %s flags=%x", f,
1174 s_mp->sbm_cm.sbdev_path, s_mp->sbm_flags);
1175 DR_DEV_CLR_UNREFERENCED(&s_mp->sbm_cm);
1176 DR_DEV_CLR_RELEASED(&s_mp->sbm_cm);
1177 dr_device_transition(&s_mp->sbm_cm, DR_STATE_CONFIGURED);
1178 rv = -1;
1180 if (t_mp != NULL && t_mp->sbm_cm.sbdev_error != NULL) {
1181 PR_MEM("%s: %s flags=%x", f,
1182 s_mp->sbm_cm.sbdev_path, s_mp->sbm_flags);
1183 DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
1184 DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
1185 dr_device_transition(&t_mp->sbm_cm, DR_STATE_CONFIGURED);
1186 rv = -1;
1188 if (rv)
1189 goto cleanup;
1191 s_old_basepa = _ptob64(s_mp->sbm_basepfn);
1192 err = drmach_mem_get_base_physaddr(s_mp->sbm_cm.sbdev_id,
1193 &s_new_basepa);
1194 ASSERT(err == NULL);
1196 PR_MEM("%s:s_old_basepa: 0x%lx\n", f, s_old_basepa);
1197 PR_MEM("%s:s_new_basepa: 0x%lx\n", f, s_new_basepa);
1199 if (t_mp != NULL) {
1200 struct memlist *s_copy_mlist;
1202 t_old_basepa = _ptob64(t_mp->sbm_basepfn);
1203 err = drmach_mem_get_base_physaddr(t_mp->sbm_cm.sbdev_id,
1204 &t_new_basepa);
1205 ASSERT(err == NULL);
1207 PR_MEM("%s:t_old_basepa: 0x%lx\n", f, t_old_basepa);
1208 PR_MEM("%s:t_new_basepa: 0x%lx\n", f, t_new_basepa);
1211 * Construct copy list with original source addresses.
1212 * Used to add back excess target mem.
1214 s_copy_mlist = memlist_dup(s_mp->sbm_mlist);
1215 for (ml = s_mp->sbm_del_mlist; ml; ml = ml->ml_next) {
1216 s_copy_mlist = memlist_del_span(s_copy_mlist,
1217 ml->ml_address, ml->ml_size);
1220 PR_MEM("%s: source copy list:\n:", f);
1221 PR_MEMLIST_DUMP(s_copy_mlist);
1224 * We had to swap mem-units, so update
1225 * memlists accordingly with new base
1226 * addresses.
1228 for (ml = t_mp->sbm_mlist; ml; ml = ml->ml_next) {
1229 ml->ml_address -= t_old_basepa;
1230 ml->ml_address += t_new_basepa;
1234 * There is no need to explicitly rename the target delete
1235 * memlist, because sbm_del_mlist and sbm_mlist always
1236 * point to the same memlist for a copy/rename operation.
1238 ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
1240 PR_MEM("%s: renamed target memlist and delete memlist:\n", f);
1241 PR_MEMLIST_DUMP(t_mp->sbm_mlist);
1243 for (ml = s_mp->sbm_mlist; ml; ml = ml->ml_next) {
1244 ml->ml_address -= s_old_basepa;
1245 ml->ml_address += s_new_basepa;
1248 PR_MEM("%s: renamed source memlist:\n", f);
1249 PR_MEMLIST_DUMP(s_mp->sbm_mlist);
1252 * Keep track of dynamically added segments
1253 * since they cannot be split if we need to delete
1254 * excess source memory later for this board.
1256 if (t_mp->sbm_dyn_segs)
1257 memlist_delete(t_mp->sbm_dyn_segs);
1258 t_mp->sbm_dyn_segs = s_mp->sbm_dyn_segs;
1259 s_mp->sbm_dyn_segs = NULL;
1262 * If the target memory range with the new target base PA
1263 * extends beyond the usable slice, prevent any "target excess"
1264 * from being added back after this copy/rename and
1265 * calculate the new smaller size of the target board
1266 * to be set as part of target cleanup. The base + npages
1267 * must only include the range of memory up to the end of
1268 * this slice. This will only be used after a category 4
1269 * large-to-small target type copy/rename - see comments
1270 * in dr_select_mem_target.
1272 if (((t_new_basepa & sm) + _ptob64(t_mp->sbm_npages)) > sz) {
1273 t_new_smallsize = sz - (t_new_basepa & sm);
1276 if (s_mp->sbm_flags & DR_MFLAG_MEMRESIZE &&
1277 t_new_smallsize == 0) {
1278 struct memlist *t_excess_mlist;
1281 * Add back excess target memory.
1282 * Subtract out the portion of the target memory
1283 * node that was taken over by the source memory
1284 * node.
1286 t_excess_mlist = memlist_dup(t_mp->sbm_mlist);
1287 for (ml = s_copy_mlist; ml; ml = ml->ml_next) {
1288 t_excess_mlist =
1289 memlist_del_span(t_excess_mlist,
1290 ml->ml_address, ml->ml_size);
1294 * Update dynamically added segs
1296 for (ml = s_mp->sbm_del_mlist; ml; ml = ml->ml_next) {
1297 t_mp->sbm_dyn_segs =
1298 memlist_del_span(t_mp->sbm_dyn_segs,
1299 ml->ml_address, ml->ml_size);
1301 for (ml = t_excess_mlist; ml; ml = ml->ml_next) {
1302 t_mp->sbm_dyn_segs =
1303 memlist_cat_span(t_mp->sbm_dyn_segs,
1304 ml->ml_address, ml->ml_size);
1306 PR_MEM("%s: %s: updated dynamic seg list:\n",
1307 f, t_mp->sbm_cm.sbdev_path);
1308 PR_MEMLIST_DUMP(t_mp->sbm_dyn_segs);
1310 PR_MEM("%s: adding back remaining portion"
1311 " of %s, memlist:\n",
1312 f, t_mp->sbm_cm.sbdev_path);
1313 PR_MEMLIST_DUMP(t_excess_mlist);
1315 dr_add_memory_spans(s_mp, t_excess_mlist);
1316 memlist_delete(t_excess_mlist);
1318 memlist_delete(s_copy_mlist);
1320 #ifdef DEBUG
1322 * Renaming s_mp->sbm_del_mlist is not necessary. This
1323 * list is not used beyond this point, and in fact, is
1324 * disposed of at the end of this function.
1326 for (ml = s_mp->sbm_del_mlist; ml; ml = ml->ml_next) {
1327 ml->ml_address -= s_old_basepa;
1328 ml->ml_address += s_new_basepa;
1331 PR_MEM("%s: renamed source delete memlist", f);
1332 PR_MEMLIST_DUMP(s_mp->sbm_del_mlist);
1333 #endif
1337 if (t_mp != NULL) {
1338 /* delete target's entire address space */
1339 err = drmach_mem_del_span(t_mp->sbm_cm.sbdev_id,
1340 t_old_basepa & ~ sm, sz);
1341 if (err)
1342 DRERR_SET_C(&t_mp->sbm_cm.sbdev_error, &err);
1343 ASSERT(err == NULL);
1346 * After the copy/rename, the original address space
1347 * for the source board (which is now located on the
1348 * target board) may now have some excess to be deleted.
1349 * The amount is calculated by masking the slice
1350 * info and keeping the slice offset from t_new_basepa.
1352 err = drmach_mem_del_span(s_mp->sbm_cm.sbdev_id,
1353 s_old_basepa & ~ sm, t_new_basepa & sm);
1354 if (err)
1355 DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
1356 ASSERT(err == NULL);
1358 } else {
1359 /* delete board's entire address space */
1360 err = drmach_mem_del_span(s_mp->sbm_cm.sbdev_id,
1361 s_old_basepa & ~ sm, sz);
1362 if (err)
1363 DRERR_SET_C(&s_mp->sbm_cm.sbdev_error, &err);
1364 ASSERT(err == NULL);
1367 cleanup:
1368 /* clean up target mem unit */
1369 if (t_mp != NULL) {
1370 memlist_delete(t_mp->sbm_del_mlist);
1371 /* no need to delete sbm_mlist, it shares sbm_del_mlist */
1373 t_mp->sbm_del_mlist = NULL;
1374 t_mp->sbm_mlist = NULL;
1375 t_mp->sbm_peer = NULL;
1376 t_mp->sbm_flags = 0;
1377 t_mp->sbm_cm.sbdev_busy = 0;
1378 dr_init_mem_unit_data(t_mp);
1380 /* reduce target size if new PAs go past end of usable slice */
1381 if (t_new_smallsize > 0) {
1382 t_mp->sbm_npages = _b64top(t_new_smallsize);
1383 PR_MEM("%s: target new size 0x%lx bytes\n",
1384 f, t_new_smallsize);
1387 if (t_mp != NULL && t_mp->sbm_cm.sbdev_error == NULL) {
1389 * now that copy/rename has completed, undo this
1390 * work that was done in dr_release_mem_done.
1392 DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
1393 DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
1394 dr_device_transition(&t_mp->sbm_cm, DR_STATE_CONFIGURED);
1398 * clean up (source) board's mem unit structure.
1399 * NOTE: sbm_mlist is retained if no error has been record (in other
1400 * words, when s_mp->sbm_cm.sbdev_error is NULL). This memlist is
1401 * referred to elsewhere as the cached memlist. The cached memlist
1402 * is used to re-attach (configure back in) this memunit from the
1403 * unconfigured state. The memlist is retained because it may
1404 * represent bad pages that were detected while the memory was
1405 * configured into the OS. The OS deletes bad pages from phys_install.
1406 * Those deletes, if any, will be represented in the cached mlist.
1408 if (s_mp->sbm_del_mlist && s_mp->sbm_del_mlist != s_mp->sbm_mlist)
1409 memlist_delete(s_mp->sbm_del_mlist);
1411 if (s_mp->sbm_cm.sbdev_error && s_mp->sbm_mlist) {
1412 memlist_delete(s_mp->sbm_mlist);
1413 s_mp->sbm_mlist = NULL;
1416 if (s_mp->sbm_dyn_segs != NULL && s_mp->sbm_cm.sbdev_error == 0) {
1417 memlist_delete(s_mp->sbm_dyn_segs);
1418 s_mp->sbm_dyn_segs = NULL;
1421 s_mp->sbm_del_mlist = NULL;
1422 s_mp->sbm_peer = NULL;
1423 s_mp->sbm_flags = 0;
1424 s_mp->sbm_cm.sbdev_busy = 0;
1425 dr_init_mem_unit_data(s_mp);
1427 PR_MEM("%s: cached memlist for %s:", f, s_mp->sbm_cm.sbdev_path);
1428 PR_MEMLIST_DUMP(s_mp->sbm_mlist);
1430 return (0);
1434 * Successful return from this function will have the memory
1435 * handle in bp->b_dev[..mem-unit...].sbm_memhandle allocated
1436 * and waiting. This routine's job is to select the memory that
1437 * actually has to be released (detached) which may not necessarily
1438 * be the same memory node that came in in devlist[],
1439 * i.e. a copy-rename is needed.
1442 dr_pre_release_mem(dr_handle_t *hp, dr_common_unit_t **devlist, int devnum)
1444 int d;
1445 int err_flag = 0;
1446 static fn_t f = "dr_pre_release_mem";
1448 PR_MEM("%s...\n", f);
1450 for (d = 0; d < devnum; d++) {
1451 dr_mem_unit_t *mp = (dr_mem_unit_t *)devlist[d];
1452 int rv;
1453 memquery_t mq;
1454 struct memlist *ml;
1456 if (mp->sbm_cm.sbdev_error) {
1457 err_flag = 1;
1458 continue;
1459 } else if (!kcage_on) {
1460 dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_KCAGE_OFF);
1461 err_flag = 1;
1462 continue;
1465 if (mp->sbm_flags & DR_MFLAG_RESERVED) {
1467 * Board is currently involved in a delete
1468 * memory operation. Can't detach this guy until
1469 * that operation completes.
1471 dr_dev_err(CE_WARN, &mp->sbm_cm, ESBD_INVAL);
1472 err_flag = 1;
1473 break;
1477 * Check whether the detaching memory requires a
1478 * copy-rename.
1480 ASSERT(mp->sbm_npages != 0);
1481 rv = kphysm_del_span_query(mp->sbm_basepfn, mp->sbm_npages,
1482 &mq);
1483 if (rv != KPHYSM_OK) {
1484 DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
1485 err_flag = 1;
1486 break;
1489 if (mq.nonrelocatable != 0) {
1490 if (!(dr_cmd_flags(hp) &
1491 (SBD_FLAG_FORCE | SBD_FLAG_QUIESCE_OKAY))) {
1492 /* caller wasn't prompted for a suspend */
1493 dr_dev_err(CE_WARN, &mp->sbm_cm,
1494 ESBD_QUIESCE_REQD);
1495 err_flag = 1;
1496 break;
1500 /* flags should be clean at this time */
1501 ASSERT(mp->sbm_flags == 0);
1503 ASSERT(mp->sbm_mlist == NULL); /* should be null */
1504 ASSERT(mp->sbm_del_mlist == NULL); /* should be null */
1505 if (mp->sbm_mlist != NULL) {
1506 memlist_delete(mp->sbm_mlist);
1507 mp->sbm_mlist = NULL;
1510 ml = dr_get_memlist(mp);
1511 if (ml == NULL) {
1512 err_flag = 1;
1513 PR_MEM("%s: no memlist found for %s\n",
1514 f, mp->sbm_cm.sbdev_path);
1515 continue;
1518 /* allocate a kphysm handle */
1519 rv = kphysm_del_gethandle(&mp->sbm_memhandle);
1520 if (rv != KPHYSM_OK) {
1521 memlist_delete(ml);
1523 DR_DEV_INTERNAL_ERROR(&mp->sbm_cm);
1524 err_flag = 1;
1525 break;
1527 mp->sbm_flags |= DR_MFLAG_RELOWNER;
1529 if ((mq.nonrelocatable != 0) ||
1530 dr_reserve_mem_spans(&mp->sbm_memhandle, ml)) {
1532 * Either the detaching memory node contains
1533 * non-reloc memory or we failed to reserve the
1534 * detaching memory node (which did _not_ have
1535 * any non-reloc memory, i.e. some non-reloc mem
1536 * got onboard).
1539 if (dr_select_mem_target(hp, mp, ml)) {
1540 int rv;
1543 * We had no luck locating a target
1544 * memory node to be the recipient of
1545 * the non-reloc memory on the node
1546 * we're trying to detach.
1547 * Clean up be disposing the mem handle
1548 * and the mem list.
1550 rv = kphysm_del_release(mp->sbm_memhandle);
1551 if (rv != KPHYSM_OK) {
1553 * can do nothing but complain
1554 * and hope helpful for debug
1556 cmn_err(CE_WARN, "%s: unexpected"
1557 " kphysm_del_release return"
1558 " value %d",
1559 f, rv);
1561 mp->sbm_flags &= ~DR_MFLAG_RELOWNER;
1563 memlist_delete(ml);
1565 /* make sure sbm_flags is clean */
1566 ASSERT(mp->sbm_flags == 0);
1568 dr_dev_err(CE_WARN, &mp->sbm_cm,
1569 ESBD_NO_TARGET);
1571 err_flag = 1;
1572 break;
1576 * ml is not memlist_delete'd here because
1577 * it has been assigned to mp->sbm_mlist
1578 * by dr_select_mem_target.
1580 } else {
1581 /* no target needed to detach this board */
1582 mp->sbm_flags |= DR_MFLAG_RESERVED;
1583 mp->sbm_peer = NULL;
1584 mp->sbm_del_mlist = ml;
1585 mp->sbm_mlist = ml;
1586 mp->sbm_cm.sbdev_busy = 1;
1588 #ifdef DEBUG
1589 ASSERT(mp->sbm_mlist != NULL);
1591 if (mp->sbm_flags & DR_MFLAG_SOURCE) {
1592 PR_MEM("%s: release of %s requires copy/rename;"
1593 " selected target board %s\n",
1595 mp->sbm_cm.sbdev_path,
1596 mp->sbm_peer->sbm_cm.sbdev_path);
1597 } else {
1598 PR_MEM("%s: copy/rename not required to release %s\n",
1599 f, mp->sbm_cm.sbdev_path);
1602 ASSERT(mp->sbm_flags & DR_MFLAG_RELOWNER);
1603 ASSERT(mp->sbm_flags & DR_MFLAG_RESERVED);
1604 #endif
1607 return (err_flag ? -1 : 0);
1610 void
1611 dr_release_mem_done(dr_common_unit_t *cp)
1613 dr_mem_unit_t *s_mp = (dr_mem_unit_t *)cp;
1614 dr_mem_unit_t *t_mp, *mp;
1615 int rv;
1616 static fn_t f = "dr_release_mem_done";
1619 * This unit will be flagged with DR_MFLAG_SOURCE, if it
1620 * has a target unit.
1622 if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
1623 t_mp = s_mp->sbm_peer;
1624 ASSERT(t_mp != NULL);
1625 ASSERT(t_mp->sbm_peer == s_mp);
1626 ASSERT(t_mp->sbm_flags & DR_MFLAG_TARGET);
1627 ASSERT(t_mp->sbm_flags & DR_MFLAG_RESERVED);
1628 } else {
1629 /* this is no target unit */
1630 t_mp = NULL;
1633 /* free delete handle */
1634 ASSERT(s_mp->sbm_flags & DR_MFLAG_RELOWNER);
1635 ASSERT(s_mp->sbm_flags & DR_MFLAG_RESERVED);
1636 rv = kphysm_del_release(s_mp->sbm_memhandle);
1637 if (rv != KPHYSM_OK) {
1639 * can do nothing but complain
1640 * and hope helpful for debug
1642 cmn_err(CE_WARN, "%s: unexpected kphysm_del_release"
1643 " return value %d", f, rv);
1645 s_mp->sbm_flags &= ~DR_MFLAG_RELOWNER;
1648 * If an error was encountered during release, clean up
1649 * the source (and target, if present) unit data.
1651 /* XXX Can we know that sbdev_error was encountered during release? */
1652 if (s_mp->sbm_cm.sbdev_error != NULL) {
1653 PR_MEM("%s: %s: error %d noted\n",
1655 s_mp->sbm_cm.sbdev_path,
1656 s_mp->sbm_cm.sbdev_error->e_code);
1658 if (t_mp != NULL) {
1659 ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
1660 t_mp->sbm_del_mlist = NULL;
1662 if (t_mp->sbm_mlist != NULL) {
1663 memlist_delete(t_mp->sbm_mlist);
1664 t_mp->sbm_mlist = NULL;
1667 t_mp->sbm_peer = NULL;
1668 t_mp->sbm_flags = 0;
1669 t_mp->sbm_cm.sbdev_busy = 0;
1672 if (s_mp->sbm_del_mlist != s_mp->sbm_mlist)
1673 memlist_delete(s_mp->sbm_del_mlist);
1674 s_mp->sbm_del_mlist = NULL;
1676 if (s_mp->sbm_mlist != NULL) {
1677 memlist_delete(s_mp->sbm_mlist);
1678 s_mp->sbm_mlist = NULL;
1681 s_mp->sbm_peer = NULL;
1682 s_mp->sbm_flags = 0;
1683 s_mp->sbm_cm.sbdev_busy = 0;
1685 /* bail out */
1686 return;
1689 DR_DEV_SET_RELEASED(&s_mp->sbm_cm);
1690 dr_device_transition(&s_mp->sbm_cm, DR_STATE_RELEASE);
1692 if (t_mp != NULL) {
1694 * the kphysm delete operation that drained the source
1695 * board also drained this target board. Since the source
1696 * board drain is now known to have succeeded, we know this
1697 * target board is drained too.
1699 * because DR_DEV_SET_RELEASED and dr_device_transition
1700 * is done here, the dr_release_dev_done should not
1701 * fail.
1703 DR_DEV_SET_RELEASED(&t_mp->sbm_cm);
1704 dr_device_transition(&t_mp->sbm_cm, DR_STATE_RELEASE);
1707 * NOTE: do not transition target's board state,
1708 * even if the mem-unit was the last configure
1709 * unit of the board. When copy/rename completes
1710 * this mem-unit will transitioned back to
1711 * the configured state. In the meantime, the
1712 * board's must remain as is.
1716 /* if board(s) had deleted memory, verify it is gone */
1717 rv = 0;
1718 memlist_read_lock();
1719 if (s_mp->sbm_del_mlist != NULL) {
1720 mp = s_mp;
1721 rv = memlist_intersect(phys_install, mp->sbm_del_mlist);
1723 if (rv == 0 && t_mp && t_mp->sbm_del_mlist != NULL) {
1724 mp = t_mp;
1725 rv = memlist_intersect(phys_install, mp->sbm_del_mlist);
1727 memlist_read_unlock();
1728 if (rv) {
1729 cmn_err(CE_WARN, "%s: %smem-unit (%d.%d): "
1730 "deleted memory still found in phys_install",
1732 (mp == t_mp ? "target " : ""),
1733 mp->sbm_cm.sbdev_bp->b_num,
1734 mp->sbm_cm.sbdev_unum);
1736 DR_DEV_INTERNAL_ERROR(&s_mp->sbm_cm);
1737 return;
1740 s_mp->sbm_flags |= DR_MFLAG_RELDONE;
1741 if (t_mp != NULL)
1742 t_mp->sbm_flags |= DR_MFLAG_RELDONE;
1744 /* this should not fail */
1745 if (dr_release_dev_done(&s_mp->sbm_cm) != 0) {
1746 /* catch this in debug kernels */
1747 ASSERT(0);
1748 return;
1751 PR_MEM("%s: marking %s release DONE\n",
1752 f, s_mp->sbm_cm.sbdev_path);
1754 s_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED;
1756 if (t_mp != NULL) {
1757 /* should not fail */
1758 rv = dr_release_dev_done(&t_mp->sbm_cm);
1759 if (rv != 0) {
1760 /* catch this in debug kernels */
1761 ASSERT(0);
1762 return;
1765 PR_MEM("%s: marking %s release DONE\n",
1766 f, t_mp->sbm_cm.sbdev_path);
1768 t_mp->sbm_cm.sbdev_ostate = SBD_STAT_UNCONFIGURED;
1772 /*ARGSUSED*/
1774 dr_disconnect_mem(dr_mem_unit_t *mp)
1776 static fn_t f = "dr_disconnect_mem";
1777 update_membounds_t umb;
1779 #ifdef DEBUG
1780 int state = mp->sbm_cm.sbdev_state;
1781 ASSERT(state == DR_STATE_CONNECTED || state == DR_STATE_UNCONFIGURED);
1782 #endif
1784 PR_MEM("%s...\n", f);
1786 if (mp->sbm_del_mlist && mp->sbm_del_mlist != mp->sbm_mlist)
1787 memlist_delete(mp->sbm_del_mlist);
1788 mp->sbm_del_mlist = NULL;
1790 if (mp->sbm_mlist) {
1791 memlist_delete(mp->sbm_mlist);
1792 mp->sbm_mlist = NULL;
1796 * Remove memory from lgroup
1797 * For now, only board info is required.
1799 umb.u_board = mp->sbm_cm.sbdev_bp->b_num;
1800 umb.u_base = (uint64_t)-1;
1801 umb.u_len = (uint64_t)-1;
1803 lgrp_plat_config(LGRP_CONFIG_MEM_DEL, (uintptr_t)&umb);
1805 return (0);
1809 dr_cancel_mem(dr_mem_unit_t *s_mp)
1811 dr_mem_unit_t *t_mp;
1812 dr_state_t state;
1813 static fn_t f = "dr_cancel_mem";
1815 state = s_mp->sbm_cm.sbdev_state;
1817 if (s_mp->sbm_flags & DR_MFLAG_TARGET) {
1818 /* must cancel source board, not target board */
1819 /* TODO: set error */
1820 return (-1);
1821 } else if (s_mp->sbm_flags & DR_MFLAG_SOURCE) {
1822 t_mp = s_mp->sbm_peer;
1823 ASSERT(t_mp != NULL);
1824 ASSERT(t_mp->sbm_peer == s_mp);
1826 /* must always match the source board's state */
1827 /* TODO: is this assertion correct? */
1828 ASSERT(t_mp->sbm_cm.sbdev_state == state);
1829 } else {
1830 /* this is no target unit */
1831 t_mp = NULL;
1834 switch (state) {
1835 case DR_STATE_UNREFERENCED: /* state set by dr_release_dev_done */
1836 ASSERT((s_mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
1838 if (t_mp != NULL && t_mp->sbm_del_mlist != NULL) {
1839 PR_MEM("%s: undoing target %s memory delete\n",
1840 f, t_mp->sbm_cm.sbdev_path);
1841 dr_add_memory_spans(t_mp, t_mp->sbm_del_mlist);
1843 DR_DEV_CLR_UNREFERENCED(&t_mp->sbm_cm);
1846 if (s_mp->sbm_del_mlist != NULL) {
1847 PR_MEM("%s: undoing %s memory delete\n",
1848 f, s_mp->sbm_cm.sbdev_path);
1850 dr_add_memory_spans(s_mp, s_mp->sbm_del_mlist);
1853 /*FALLTHROUGH*/
1855 /* TODO: should no longer be possible to see the release state here */
1856 case DR_STATE_RELEASE: /* state set by dr_release_mem_done */
1858 ASSERT((s_mp->sbm_flags & DR_MFLAG_RELOWNER) == 0);
1860 if (t_mp != NULL) {
1861 ASSERT(t_mp->sbm_del_mlist == t_mp->sbm_mlist);
1862 t_mp->sbm_del_mlist = NULL;
1864 if (t_mp->sbm_mlist != NULL) {
1865 memlist_delete(t_mp->sbm_mlist);
1866 t_mp->sbm_mlist = NULL;
1869 t_mp->sbm_peer = NULL;
1870 t_mp->sbm_flags = 0;
1871 t_mp->sbm_cm.sbdev_busy = 0;
1872 dr_init_mem_unit_data(t_mp);
1874 DR_DEV_CLR_RELEASED(&t_mp->sbm_cm);
1876 dr_device_transition(&t_mp->sbm_cm,
1877 DR_STATE_CONFIGURED);
1880 if (s_mp->sbm_del_mlist != s_mp->sbm_mlist)
1881 memlist_delete(s_mp->sbm_del_mlist);
1882 s_mp->sbm_del_mlist = NULL;
1884 if (s_mp->sbm_mlist != NULL) {
1885 memlist_delete(s_mp->sbm_mlist);
1886 s_mp->sbm_mlist = NULL;
1889 s_mp->sbm_peer = NULL;
1890 s_mp->sbm_flags = 0;
1891 s_mp->sbm_cm.sbdev_busy = 0;
1892 dr_init_mem_unit_data(s_mp);
1894 return (0);
1896 default:
1897 PR_MEM("%s: WARNING unexpected state (%d) for %s\n",
1898 f, (int)state, s_mp->sbm_cm.sbdev_path);
1900 return (-1);
1902 /*NOTREACHED*/
1905 void
1906 dr_init_mem_unit(dr_mem_unit_t *mp)
1908 dr_state_t new_state;
1911 if (DR_DEV_IS_ATTACHED(&mp->sbm_cm)) {
1912 new_state = DR_STATE_CONFIGURED;
1913 mp->sbm_cm.sbdev_cond = SBD_COND_OK;
1914 } else if (DR_DEV_IS_PRESENT(&mp->sbm_cm)) {
1915 new_state = DR_STATE_CONNECTED;
1916 mp->sbm_cm.sbdev_cond = SBD_COND_OK;
1917 } else if (mp->sbm_cm.sbdev_id != (drmachid_t)0) {
1918 new_state = DR_STATE_OCCUPIED;
1919 } else {
1920 new_state = DR_STATE_EMPTY;
1923 if (DR_DEV_IS_PRESENT(&mp->sbm_cm))
1924 dr_init_mem_unit_data(mp);
1926 /* delay transition until fully initialized */
1927 dr_device_transition(&mp->sbm_cm, new_state);
1930 static void
1931 dr_init_mem_unit_data(dr_mem_unit_t *mp)
1933 drmachid_t id = mp->sbm_cm.sbdev_id;
1934 uint64_t bytes;
1935 sbd_error_t *err;
1936 static fn_t f = "dr_init_mem_unit_data";
1937 update_membounds_t umb;
1939 PR_MEM("%s...\n", f);
1941 /* a little sanity checking */
1942 ASSERT(mp->sbm_peer == NULL);
1943 ASSERT(mp->sbm_flags == 0);
1945 /* get basepfn of mem unit */
1946 err = drmach_mem_get_base_physaddr(id, &bytes);
1947 if (err) {
1948 DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
1949 mp->sbm_basepfn = (pfn_t)-1;
1950 } else
1951 mp->sbm_basepfn = _b64top(bytes);
1953 /* attempt to get number of pages from PDA */
1954 err = drmach_mem_get_size(id, &bytes);
1955 if (err) {
1956 DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
1957 mp->sbm_npages = 0;
1958 } else
1959 mp->sbm_npages = _b64top(bytes);
1961 /* if didn't work, calculate using memlist */
1962 if (mp->sbm_npages == 0) {
1963 struct memlist *ml, *mlist;
1965 * Either we couldn't open the PDA or our
1966 * PDA has garbage in it. We must have the
1967 * page count consistent and whatever the
1968 * OS states has precedence over the PDA
1969 * so let's check the kernel.
1971 /* TODO: curious comment. it suggests pda query should happen if this fails */
1972 PR_MEM("%s: PDA query failed for npages."
1973 " Checking memlist for %s\n",
1974 f, mp->sbm_cm.sbdev_path);
1976 mlist = dr_get_memlist(mp);
1977 for (ml = mlist; ml; ml = ml->ml_next)
1978 mp->sbm_npages += btop(ml->ml_size);
1979 memlist_delete(mlist);
1982 err = drmach_mem_get_alignment(id, &bytes);
1983 if (err) {
1984 DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
1985 mp->sbm_alignment_mask = 0;
1986 } else
1987 mp->sbm_alignment_mask = _b64top(bytes);
1989 err = drmach_mem_get_slice_size(id, &bytes);
1990 if (err) {
1991 DRERR_SET_C(&mp->sbm_cm.sbdev_error, &err);
1992 mp->sbm_slice_size = 0; /* paranoia */
1993 } else
1994 mp->sbm_slice_size = bytes;
1997 * Add memory to lgroup
1999 umb.u_board = mp->sbm_cm.sbdev_bp->b_num;
2000 umb.u_base = (uint64_t)mp->sbm_basepfn << MMU_PAGESHIFT;
2001 umb.u_len = (uint64_t)mp->sbm_npages << MMU_PAGESHIFT;
2003 lgrp_plat_config(LGRP_CONFIG_MEM_ADD, (uintptr_t)&umb);
2005 PR_MEM("%s: %s (basepfn = 0x%lx, npgs = %ld)\n",
2006 f, mp->sbm_cm.sbdev_path, mp->sbm_basepfn, mp->sbm_npages);
2009 static int
2010 dr_reserve_mem_spans(memhandle_t *mhp, struct memlist *ml)
2012 int err;
2013 pfn_t base;
2014 pgcnt_t npgs;
2015 struct memlist *mc;
2016 static fn_t f = "dr_reserve_mem_spans";
2018 PR_MEM("%s...\n", f);
2021 * Walk the supplied memlist scheduling each span for removal
2022 * with kphysm_del_span. It is possible that a span may intersect
2023 * an area occupied by the cage.
2025 for (mc = ml; mc != NULL; mc = mc->ml_next) {
2026 base = _b64top(mc->ml_address);
2027 npgs = _b64top(mc->ml_size);
2029 err = kphysm_del_span(*mhp, base, npgs);
2030 if (err != KPHYSM_OK) {
2031 cmn_err(CE_WARN, "%s memory reserve failed."
2032 " unexpected kphysm_del_span return value %d;"
2033 " basepfn=0x%lx npages=%ld",
2034 f, err, base, npgs);
2036 return (-1);
2040 return (0);
2043 /* debug counters */
2044 int dr_smt_realigned;
2045 int dr_smt_preference[4];
2047 #ifdef DEBUG
2048 uint_t dr_ignore_board; /* if bit[bnum-1] set, board won't be candidate */
2049 #endif
2052 * Find and reserve a copy/rename target board suitable for the
2053 * given source board.
2054 * All boards in the system are examined and categorized in relation to
2055 * their memory size versus the source board's memory size. Order of
2056 * preference is:
2057 * 1st: board has same memory size
2058 * 2nd: board has larger memory size
2059 * 3rd: board has smaller memory size
2060 * 4th: board has smaller memory size, available memory will be reduced.
2061 * Boards in category 3 and 4 will have their MC's reprogrammed to locate the
2062 * span to which the MC responds to address span that appropriately covers
2063 * the nonrelocatable span of the source board.
2065 static int
2066 dr_select_mem_target(dr_handle_t *hp,
2067 dr_mem_unit_t *s_mp, struct memlist *s_ml)
2069 pgcnt_t sz = _b64top(s_mp->sbm_slice_size);
2070 pgcnt_t sm = sz - 1; /* mem_slice_mask */
2071 pfn_t s_phi, t_phi;
2073 int n_sets = 4; /* same, larger, smaller, clipped */
2074 int preference; /* lower value is higher preference */
2075 int n_units_per_set;
2076 int idx;
2077 dr_mem_unit_t **sets;
2079 int t_bd;
2080 int t_unit;
2081 int rv;
2082 int allow_src_memrange_modify;
2083 int allow_targ_memrange_modify;
2084 drmachid_t t_id;
2085 dr_board_t *s_bp, *t_bp;
2086 dr_mem_unit_t *t_mp, *c_mp;
2087 struct memlist *d_ml, *t_ml, *x_ml;
2088 memquery_t s_mq = {0};
2089 static fn_t f = "dr_select_mem_target";
2091 PR_MEM("%s...\n", f);
2093 ASSERT(s_ml != NULL);
2095 n_units_per_set = MAX_BOARDS * MAX_MEM_UNITS_PER_BOARD;
2096 sets = GETSTRUCT(dr_mem_unit_t *, n_units_per_set * n_sets);
2098 s_bp = hp->h_bd;
2099 /* calculate the offset into the slice of the last source board pfn */
2100 ASSERT(s_mp->sbm_npages != 0);
2101 s_phi = (s_mp->sbm_basepfn + s_mp->sbm_npages - 1) & sm;
2103 allow_src_memrange_modify = drmach_allow_memrange_modify(s_bp->b_id);
2106 * Make one pass through all memory units on all boards
2107 * and categorize them with respect to the source board.
2109 for (t_bd = 0; t_bd < MAX_BOARDS; t_bd++) {
2111 * The board structs are a contiguous array
2112 * so we take advantage of that to find the
2113 * correct board struct pointer for a given
2114 * board number.
2116 t_bp = dr_lookup_board(t_bd);
2118 /* source board can not be its own target */
2119 if (s_bp->b_num == t_bp->b_num)
2120 continue;
2122 for (t_unit = 0; t_unit < MAX_MEM_UNITS_PER_BOARD; t_unit++) {
2124 t_mp = dr_get_mem_unit(t_bp, t_unit);
2126 /* this memory node must be attached */
2127 if (!DR_DEV_IS_ATTACHED(&t_mp->sbm_cm))
2128 continue;
2130 /* source unit can not be its own target */
2131 if (s_mp == t_mp) {
2132 /* catch this is debug kernels */
2133 ASSERT(0);
2134 continue;
2138 * this memory node must not already be reserved
2139 * by some other memory delete operation.
2141 if (t_mp->sbm_flags & DR_MFLAG_RESERVED)
2142 continue;
2145 * categorize the memory node
2146 * If this is a smaller memory node, create a
2147 * temporary, edited copy of the source board's
2148 * memlist containing only the span of the non-
2149 * relocatable pages.
2151 t_phi = (t_mp->sbm_basepfn + t_mp->sbm_npages - 1) & sm;
2152 t_id = t_mp->sbm_cm.sbdev_bp->b_id;
2153 allow_targ_memrange_modify =
2154 drmach_allow_memrange_modify(t_id);
2155 if (t_mp->sbm_npages == s_mp->sbm_npages &&
2156 t_phi == s_phi) {
2157 preference = 0;
2158 t_mp->sbm_slice_offset = 0;
2159 } else if (t_mp->sbm_npages > s_mp->sbm_npages &&
2160 t_phi > s_phi) {
2162 * Selecting this target will require modifying
2163 * the source and/or target physical address
2164 * ranges. Skip if not supported by platform.
2166 if (!allow_src_memrange_modify ||
2167 !allow_targ_memrange_modify) {
2168 PR_MEM("%s: skip target %s, memory "
2169 "range relocation not supported "
2170 "by platform\n", f,
2171 t_mp->sbm_cm.sbdev_path);
2172 continue;
2174 preference = 1;
2175 t_mp->sbm_slice_offset = 0;
2176 } else {
2177 pfn_t pfn = 0;
2180 * Selecting this target will require modifying
2181 * the source and/or target physical address
2182 * ranges. Skip if not supported by platform.
2184 if (!allow_src_memrange_modify ||
2185 !allow_targ_memrange_modify) {
2186 PR_MEM("%s: skip target %s, memory "
2187 "range relocation not supported "
2188 "by platform\n", f,
2189 t_mp->sbm_cm.sbdev_path);
2190 continue;
2194 * Check if its mc can be programmed to relocate
2195 * the active address range to match the
2196 * nonrelocatable span of the source board.
2198 preference = 2;
2200 if (s_mq.phys_pages == 0) {
2202 * find non-relocatable span on
2203 * source board.
2205 rv = kphysm_del_span_query(
2206 s_mp->sbm_basepfn,
2207 s_mp->sbm_npages, &s_mq);
2208 if (rv != KPHYSM_OK) {
2209 PR_MEM("%s: %s: unexpected"
2210 " kphysm_del_span_query"
2211 " return value %d;"
2212 " basepfn 0x%lx,"
2213 " npages %ld\n",
2215 s_mp->sbm_cm.sbdev_path,
2217 s_mp->sbm_basepfn,
2218 s_mp->sbm_npages);
2220 /* paranoia */
2221 s_mq.phys_pages = 0;
2223 continue;
2226 /* more paranoia */
2227 ASSERT(s_mq.phys_pages != 0);
2228 ASSERT(s_mq.nonrelocatable != 0);
2231 * this should not happen
2232 * if it does, it simply means that
2233 * we can not proceed with qualifying
2234 * this target candidate.
2236 if (s_mq.nonrelocatable == 0)
2237 continue;
2239 PR_MEM("%s: %s: nonrelocatable"
2240 " span (0x%lx..0x%lx)\n",
2242 s_mp->sbm_cm.sbdev_path,
2243 s_mq.first_nonrelocatable,
2244 s_mq.last_nonrelocatable);
2248 * Round down the starting pfn of the
2249 * nonrelocatable span on the source board
2250 * to nearest programmable boundary possible
2251 * with this target candidate.
2253 pfn = s_mq.first_nonrelocatable &
2254 ~t_mp->sbm_alignment_mask;
2256 /* skip candidate if memory is too small */
2257 if (pfn + t_mp->sbm_npages <
2258 s_mq.last_nonrelocatable)
2259 continue;
2262 * reprogramming an mc to relocate its
2263 * active address range means the beginning
2264 * address to which the DIMMS respond will
2265 * be somewhere above the slice boundary
2266 * address. The larger the size of memory
2267 * on this unit, the more likely part of it
2268 * will exist beyond the end of the slice.
2269 * The portion of the memory that does is
2270 * unavailable to the system until the mc
2271 * reprogrammed to a more favorable base
2272 * address.
2273 * An attempt is made to avoid the loss by
2274 * recalculating the mc base address relative
2275 * to the end of the slice. This may produce
2276 * a more favorable result. If not, we lower
2277 * the board's preference rating so that it
2278 * is one the last candidate boards to be
2279 * considered.
2281 if ((pfn + t_mp->sbm_npages) & ~sm) {
2282 pfn_t p;
2284 ASSERT(sz >= t_mp->sbm_npages);
2287 * calculate an alternative starting
2288 * address relative to the end of the
2289 * slice's address space.
2291 p = pfn & ~sm;
2292 p = p + (sz - t_mp->sbm_npages);
2293 p = p & ~t_mp->sbm_alignment_mask;
2295 if ((p > s_mq.first_nonrelocatable) ||
2296 (p + t_mp->sbm_npages <
2297 s_mq.last_nonrelocatable)) {
2300 * alternative starting addr
2301 * won't work. Lower preference
2302 * rating of this board, since
2303 * some number of pages will
2304 * unavailable for use.
2306 preference = 3;
2307 } else {
2308 dr_smt_realigned++;
2309 pfn = p;
2314 * translate calculated pfn to an offset
2315 * relative to the slice boundary. If the
2316 * candidate board is selected, this offset
2317 * will be used to calculate the values
2318 * programmed into the mc.
2320 t_mp->sbm_slice_offset = pfn & sm;
2321 PR_MEM("%s: %s:"
2322 " proposed mc offset 0x%lx\n",
2324 t_mp->sbm_cm.sbdev_path,
2325 t_mp->sbm_slice_offset);
2328 dr_smt_preference[preference]++;
2330 /* calculate index to start of preference set */
2331 idx = n_units_per_set * preference;
2332 /* calculate offset to respective element */
2333 idx += t_bd * MAX_MEM_UNITS_PER_BOARD + t_unit;
2335 ASSERT(idx < n_units_per_set * n_sets);
2336 sets[idx] = t_mp;
2341 * NOTE: this would be a good place to sort each candidate
2342 * set in to some desired order, e.g. memory size in ascending
2343 * order. Without an additional sorting step here, the order
2344 * within a set is ascending board number order.
2347 c_mp = NULL;
2348 x_ml = NULL;
2349 t_ml = NULL;
2350 for (idx = 0; idx < n_units_per_set * n_sets; idx++) {
2351 memquery_t mq;
2353 /* cleanup t_ml after previous pass */
2354 if (t_ml != NULL) {
2355 memlist_delete(t_ml);
2356 t_ml = NULL;
2359 /* get candidate target board mem unit */
2360 t_mp = sets[idx];
2361 if (t_mp == NULL)
2362 continue;
2364 /* get target board memlist */
2365 t_ml = dr_get_memlist(t_mp);
2366 if (t_ml == NULL) {
2367 cmn_err(CE_WARN, "%s: no memlist for"
2368 " mem-unit %d, board %d",
2370 t_mp->sbm_cm.sbdev_bp->b_num,
2371 t_mp->sbm_cm.sbdev_unum);
2373 continue;
2376 /* get appropriate source board memlist */
2377 t_phi = (t_mp->sbm_basepfn + t_mp->sbm_npages - 1) & sm;
2378 if (t_mp->sbm_npages < s_mp->sbm_npages || t_phi < s_phi) {
2379 spgcnt_t excess;
2382 * make a copy of the source board memlist
2383 * then edit it to remove the spans that
2384 * are outside the calculated span of
2385 * [pfn..s_mq.last_nonrelocatable].
2387 if (x_ml != NULL)
2388 memlist_delete(x_ml);
2390 x_ml = memlist_dup(s_ml);
2391 if (x_ml == NULL) {
2392 PR_MEM("%s: memlist_dup failed\n", f);
2393 /* TODO: should abort */
2394 continue;
2397 /* trim off lower portion */
2398 excess = t_mp->sbm_slice_offset -
2399 (s_mp->sbm_basepfn & sm);
2401 if (excess > 0) {
2402 x_ml = memlist_del_span(
2403 x_ml,
2404 _ptob64(s_mp->sbm_basepfn),
2405 _ptob64(excess));
2407 ASSERT(x_ml);
2410 * Since this candidate target board is smaller
2411 * than the source board, s_mq must have been
2412 * initialized in previous loop while processing
2413 * this or some other candidate board.
2414 * FIXME: this is weak.
2416 ASSERT(s_mq.phys_pages != 0);
2418 /* trim off upper portion */
2419 excess = (s_mp->sbm_basepfn + s_mp->sbm_npages)
2420 - (s_mq.last_nonrelocatable + 1);
2421 if (excess > 0) {
2422 pfn_t p;
2424 p = s_mq.last_nonrelocatable + 1;
2425 x_ml = memlist_del_span(
2426 x_ml,
2427 _ptob64(p),
2428 _ptob64(excess));
2431 PR_MEM("%s: %s: edited source memlist:\n",
2432 f, s_mp->sbm_cm.sbdev_path);
2433 PR_MEMLIST_DUMP(x_ml);
2435 #ifdef DEBUG
2436 /* sanity check memlist */
2437 d_ml = x_ml;
2438 while (d_ml->ml_next != NULL)
2439 d_ml = d_ml->ml_next;
2441 ASSERT(d_ml->ml_address + d_ml->ml_size ==
2442 _ptob64(s_mq.last_nonrelocatable + 1));
2443 #endif
2446 * x_ml now describes only the portion of the
2447 * source board that will be moved during the
2448 * copy/rename operation.
2450 d_ml = x_ml;
2451 } else {
2452 /* use original memlist; all spans will be moved */
2453 d_ml = s_ml;
2456 /* verify target can support source memory spans. */
2457 if (memlist_canfit(d_ml, t_ml) == 0) {
2458 PR_MEM("%s: source memlist won't"
2459 " fit in target memlist\n", f);
2460 PR_MEM("%s: source memlist:\n", f);
2461 PR_MEMLIST_DUMP(d_ml);
2462 PR_MEM("%s: target memlist:\n", f);
2463 PR_MEMLIST_DUMP(t_ml);
2465 continue;
2468 /* NOTE: the value of d_ml is not used beyond this point */
2470 PR_MEM("%s: checking for no-reloc in %s, "
2471 " basepfn=0x%lx, npages=%ld\n",
2473 t_mp->sbm_cm.sbdev_path,
2474 t_mp->sbm_basepfn,
2475 t_mp->sbm_npages);
2477 rv = kphysm_del_span_query(
2478 t_mp->sbm_basepfn, t_mp->sbm_npages, &mq);
2479 if (rv != KPHYSM_OK) {
2480 PR_MEM("%s: kphysm_del_span_query:"
2481 " unexpected return value %d\n", f, rv);
2483 continue;
2486 if (mq.nonrelocatable != 0) {
2487 PR_MEM("%s: candidate %s has"
2488 " nonrelocatable span [0x%lx..0x%lx]\n",
2490 t_mp->sbm_cm.sbdev_path,
2491 mq.first_nonrelocatable,
2492 mq.last_nonrelocatable);
2494 continue;
2497 #ifdef DEBUG
2499 * This is a debug tool for excluding certain boards
2500 * from being selected as a target board candidate.
2501 * dr_ignore_board is only tested by this driver.
2502 * It must be set with adb, obp, /etc/system or your
2503 * favorite debugger.
2505 if (dr_ignore_board &
2506 (1 << (t_mp->sbm_cm.sbdev_bp->b_num - 1))) {
2507 PR_MEM("%s: dr_ignore_board flag set,"
2508 " ignoring %s as candidate\n",
2509 f, t_mp->sbm_cm.sbdev_path);
2510 continue;
2512 #endif
2515 * Reserve excess source board memory, if any.
2517 * When the number of pages on the candidate target
2518 * board is less than the number of pages on the source,
2519 * then some spans (clearly) of the source board's address
2520 * space will not be covered by physical memory after the
2521 * copy/rename completes. The following code block
2522 * schedules those spans to be deleted.
2524 if (t_mp->sbm_npages < s_mp->sbm_npages || t_phi < s_phi) {
2525 pfn_t pfn;
2526 uint64_t s_del_pa;
2527 struct memlist *ml;
2529 d_ml = memlist_dup(s_ml);
2530 if (d_ml == NULL) {
2531 PR_MEM("%s: cant dup src brd memlist\n", f);
2532 /* TODO: should abort */
2533 continue;
2536 /* calculate base pfn relative to target board */
2537 pfn = s_mp->sbm_basepfn & ~sm;
2538 pfn += t_mp->sbm_slice_offset;
2541 * cannot split dynamically added segment
2543 s_del_pa = _ptob64(pfn + t_mp->sbm_npages);
2544 PR_MEM("%s: proposed src delete pa=0x%lx\n", f,
2545 s_del_pa);
2546 PR_MEM("%s: checking for split of dyn seg list:\n", f);
2547 PR_MEMLIST_DUMP(s_mp->sbm_dyn_segs);
2548 for (ml = s_mp->sbm_dyn_segs; ml; ml = ml->ml_next) {
2549 if (s_del_pa > ml->ml_address &&
2550 s_del_pa < ml->ml_address + ml->ml_size) {
2551 s_del_pa = ml->ml_address;
2552 break;
2556 /* remove span that will reside on candidate board */
2557 d_ml = memlist_del_span(d_ml, _ptob64(pfn),
2558 s_del_pa - _ptob64(pfn));
2560 PR_MEM("%s: %s: reserving src brd memlist:\n",
2561 f, s_mp->sbm_cm.sbdev_path);
2562 PR_MEMLIST_DUMP(d_ml);
2564 /* reserve excess spans */
2565 if (dr_reserve_mem_spans(&s_mp->sbm_memhandle, d_ml)
2566 != 0) {
2568 /* likely more non-reloc pages appeared */
2569 /* TODO: restart from top? */
2570 continue;
2572 } else {
2573 /* no excess source board memory */
2574 d_ml = NULL;
2577 s_mp->sbm_flags |= DR_MFLAG_RESERVED;
2580 * reserve all memory on target board.
2581 * NOTE: source board's memhandle is used.
2583 * If this succeeds (eq 0), then target selection is
2584 * complete and all unwanted memory spans, both source and
2585 * target, have been reserved. Loop is terminated.
2587 if (dr_reserve_mem_spans(&s_mp->sbm_memhandle, t_ml) == 0) {
2588 PR_MEM("%s: %s: target board memory reserved\n",
2589 f, t_mp->sbm_cm.sbdev_path);
2591 /* a candidate target board is now reserved */
2592 t_mp->sbm_flags |= DR_MFLAG_RESERVED;
2593 c_mp = t_mp;
2595 /* *** EXITING LOOP *** */
2596 break;
2599 /* did not successfully reserve the target board. */
2600 PR_MEM("%s: could not reserve target %s\n",
2601 f, t_mp->sbm_cm.sbdev_path);
2604 * NOTE: an undo of the dr_reserve_mem_span work
2605 * will happen automatically when the memhandle
2606 * (s_mp->sbm_memhandle) is kphysm_del_release'd.
2609 s_mp->sbm_flags &= ~DR_MFLAG_RESERVED;
2612 /* clean up after memlist editing logic */
2613 if (x_ml != NULL)
2614 memlist_delete(x_ml);
2616 FREESTRUCT(sets, dr_mem_unit_t *, n_units_per_set * n_sets);
2619 * c_mp will be NULL when the entire sets[] array
2620 * has been searched without reserving a target board.
2622 if (c_mp == NULL) {
2623 PR_MEM("%s: %s: target selection failed.\n",
2624 f, s_mp->sbm_cm.sbdev_path);
2626 if (t_ml != NULL)
2627 memlist_delete(t_ml);
2629 return (-1);
2632 PR_MEM("%s: found target %s for source %s\n",
2634 c_mp->sbm_cm.sbdev_path,
2635 s_mp->sbm_cm.sbdev_path);
2637 s_mp->sbm_peer = c_mp;
2638 s_mp->sbm_flags |= DR_MFLAG_SOURCE;
2639 s_mp->sbm_del_mlist = d_ml; /* spans to be deleted, if any */
2640 s_mp->sbm_mlist = s_ml;
2641 s_mp->sbm_cm.sbdev_busy = 1;
2643 c_mp->sbm_peer = s_mp;
2644 c_mp->sbm_flags |= DR_MFLAG_TARGET;
2645 c_mp->sbm_del_mlist = t_ml; /* spans to be deleted */
2646 c_mp->sbm_mlist = t_ml;
2647 c_mp->sbm_cm.sbdev_busy = 1;
2649 s_mp->sbm_flags &= ~DR_MFLAG_MEMRESIZE;
2650 if (c_mp->sbm_npages > s_mp->sbm_npages) {
2651 s_mp->sbm_flags |= DR_MFLAG_MEMUPSIZE;
2652 PR_MEM("%s: upsize detected (source=%ld < target=%ld)\n",
2653 f, s_mp->sbm_npages, c_mp->sbm_npages);
2654 } else if (c_mp->sbm_npages < s_mp->sbm_npages) {
2655 s_mp->sbm_flags |= DR_MFLAG_MEMDOWNSIZE;
2656 PR_MEM("%s: downsize detected (source=%ld > target=%ld)\n",
2657 f, s_mp->sbm_npages, c_mp->sbm_npages);
2660 return (0);
2664 * Memlist support.
2668 * Determine whether the source memlist (s_mlist) will
2669 * fit into the target memlist (t_mlist) in terms of
2670 * size and holes (i.e. based on same relative base address).
2672 static int
2673 memlist_canfit(struct memlist *s_mlist, struct memlist *t_mlist)
2675 int rv = 0;
2676 uint64_t s_basepa, t_basepa;
2677 struct memlist *s_ml, *t_ml;
2679 if ((s_mlist == NULL) || (t_mlist == NULL))
2680 return (0);
2683 * Base both memlists on common base address (0).
2685 s_basepa = s_mlist->ml_address;
2686 t_basepa = t_mlist->ml_address;
2688 for (s_ml = s_mlist; s_ml; s_ml = s_ml->ml_next)
2689 s_ml->ml_address -= s_basepa;
2691 for (t_ml = t_mlist; t_ml; t_ml = t_ml->ml_next)
2692 t_ml->ml_address -= t_basepa;
2694 s_ml = s_mlist;
2695 for (t_ml = t_mlist; t_ml && s_ml; t_ml = t_ml->ml_next) {
2696 uint64_t s_start, s_end;
2697 uint64_t t_start, t_end;
2699 t_start = t_ml->ml_address;
2700 t_end = t_start + t_ml->ml_size;
2702 for (; s_ml; s_ml = s_ml->ml_next) {
2703 s_start = s_ml->ml_address;
2704 s_end = s_start + s_ml->ml_size;
2706 if ((s_start < t_start) || (s_end > t_end))
2707 break;
2711 * If we ran out of source memlist chunks that mean
2712 * we found a home for all of them.
2714 if (s_ml == NULL)
2715 rv = 1;
2718 * Need to add base addresses back since memlists
2719 * are probably in use by caller.
2721 for (s_ml = s_mlist; s_ml; s_ml = s_ml->ml_next)
2722 s_ml->ml_address += s_basepa;
2724 for (t_ml = t_mlist; t_ml; t_ml = t_ml->ml_next)
2725 t_ml->ml_address += t_basepa;
2727 return (rv);