drivers/edac/edac_mc.c

   1 /*
   2  * edac_mc kernel module
   3  * (C) 2005, 2006 Linux Networx (http://lnxi.com)
   4  * This file may be distributed under the terms of the
   5  * GNU General Public License.
   6  *
   7  * Written by Thayne Harbaugh
   8  * Based on work by Dan Hollis <goemon at anime dot net> and others.
   9  *      http://www.anime.net/~goemon/linux-ecc/
  10  *
  11  * Modified by Dave Peterson and Doug Thompson
  12  *
  13  */
  14
  15 #include <linux/module.h>
  16 #include <linux/proc_fs.h>
  17 #include <linux/kernel.h>
  18 #include <linux/types.h>
  19 #include <linux/smp.h>
  20 #include <linux/init.h>
  21 #include <linux/sysctl.h>
  22 #include <linux/highmem.h>
  23 #include <linux/timer.h>
  24 #include <linux/slab.h>
  25 #include <linux/jiffies.h>
  26 #include <linux/spinlock.h>
  27 #include <linux/list.h>
  28 #include <linux/sysdev.h>
  29 #include <linux/ctype.h>
  30 #include <linux/edac.h>
  31 #include <asm/uaccess.h>
  32 #include <asm/page.h>
  33 #include <asm/edac.h>
  34 #include "edac_core.h"
  35 #include "edac_module.h"
  36
  37 /* lock to memory controller's control array */
  38 static DEFINE_MUTEX(mem_ctls_mutex);
  39 static LIST_HEAD(mc_devices);
  40
  41 #ifdef CONFIG_EDAC_DEBUG
  42
  43 static void edac_mc_dump_channel(struct channel_info *chan)
  44 {
  45         debugf4("\tchannel = %p\n", chan);
  46         debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
  47         debugf4("\tchannel->ce_count = %d\n", chan->ce_count);
  48         debugf4("\tchannel->label = '%s'\n", chan->label);
  49         debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
  50 }
  51
  52 static void edac_mc_dump_csrow(struct csrow_info *csrow)
  53 {
  54         debugf4("\tcsrow = %p\n", csrow);
  55         debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
  56         debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
  57         debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
  58         debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
  59         debugf4("\tcsrow->nr_pages = 0x%x\n", csrow->nr_pages);
  60         debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
  61         debugf4("\tcsrow->channels = %p\n", csrow->channels);
  62         debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
  63 }
  64
  65 static void edac_mc_dump_mci(struct mem_ctl_info *mci)
  66 {
  67         debugf3("\tmci = %p\n", mci);
  68         debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
  69         debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
  70         debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
  71         debugf4("\tmci->edac_check = %p\n", mci->edac_check);
  72         debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
  73                 mci->nr_csrows, mci->csrows);
  74         debugf3("\tdev = %p\n", mci->dev);
  75         debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
  76         debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
  77 }
  78
  79 #endif                          /* CONFIG_EDAC_DEBUG */
  80
  81 /* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'.
  82  * Adjust 'ptr' so that its alignment is at least as stringent as what the
  83  * compiler would provide for X and return the aligned result.
  84  *
  85  * If 'size' is a constant, the compiler will optimize this whole function
  86  * down to either a no-op or the addition of a constant to the value of 'ptr'.
  87  */
  88 void *edac_align_ptr(void *ptr, unsigned size)
  89 {
  90         unsigned align, r;
  91
  92         /* Here we assume that the alignment of a "long long" is the most
  93          * stringent alignment that the compiler will ever provide by default.
  94          * As far as I know, this is a reasonable assumption.
  95          */
  96         if (size > sizeof(long))
  97                 align = sizeof(long long);
  98         else if (size > sizeof(int))
  99                 align = sizeof(long);
 100         else if (size > sizeof(short))
 101                 align = sizeof(int);
 102         else if (size > sizeof(char))
 103                 align = sizeof(short);
 104         else
 105                 return (char *)ptr;
 106
 107         r = size % align;
 108
 109         if (r == 0)
 110                 return (char *)ptr;
 111
 112         return (void *)(((unsigned long)ptr) + align - r);
 113 }
 114
 115 /**
 116  * edac_mc_alloc: Allocate a struct mem_ctl_info structure
 117  * @size_pvt:   size of private storage needed
 118  * @nr_csrows:  Number of CWROWS needed for this MC
 119  * @nr_chans:   Number of channels for the MC
 120  *
 121  * Everything is kmalloc'ed as one big chunk - more efficient.
 122  * Only can be used if all structures have the same lifetime - otherwise
 123  * you have to allocate and initialize your own structures.
 124  *
 125  * Use edac_mc_free() to free mc structures allocated by this function.
 126  *
 127  * Returns:
 128  *      NULL allocation failed
 129  *      struct mem_ctl_info pointer
 130  */
 131 struct mem_ctl_info *edac_mc_alloc(unsigned sz_pvt, unsigned nr_csrows,
 132                                 unsigned nr_chans, int edac_index)
 133 {
 134         struct mem_ctl_info *mci;
 135         struct csrow_info *csi, *csrow;
 136         struct channel_info *chi, *chp, *chan;
 137         void *pvt;
 138         unsigned size;
 139         int row, chn;
 140         int err;
 141
 142         /* Figure out the offsets of the various items from the start of an mc
 143          * structure.  We want the alignment of each item to be at least as
 144          * stringent as what the compiler would provide if we could simply
 145          * hardcode everything into a single struct.
 146          */
 147         mci = (struct mem_ctl_info *)0;
 148         csi = edac_align_ptr(&mci[1], sizeof(*csi));
 149         chi = edac_align_ptr(&csi[nr_csrows], sizeof(*chi));
 150         pvt = edac_align_ptr(&chi[nr_chans * nr_csrows], sz_pvt);
 151         size = ((unsigned long)pvt) + sz_pvt;
 152
 153         mci = kzalloc(size, GFP_KERNEL);
 154         if (mci == NULL)
 155                 return NULL;
 156
 157         /* Adjust pointers so they point within the memory we just allocated
 158          * rather than an imaginary chunk of memory located at address 0.
 159          */
 160         csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
 161         chi = (struct channel_info *)(((char *)mci) + ((unsigned long)chi));
 162         pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
 163
 164         /* setup index and various internal pointers */
 165         mci->mc_idx = edac_index;
 166         mci->csrows = csi;
 167         mci->pvt_info = pvt;
 168         mci->nr_csrows = nr_csrows;
 169
 170         for (row = 0; row < nr_csrows; row++) {
 171                 csrow = &csi[row];
 172                 csrow->csrow_idx = row;
 173                 csrow->mci = mci;
 174                 csrow->nr_channels = nr_chans;
 175                 chp = &chi[row * nr_chans];
 176                 csrow->channels = chp;
 177
 178                 for (chn = 0; chn < nr_chans; chn++) {
 179                         chan = &chp[chn];
 180                         chan->chan_idx = chn;
 181                         chan->csrow = csrow;
 182                 }
 183         }
 184
 185         mci->op_state = OP_ALLOC;
 186
 187         /*
 188          * Initialize the 'root' kobj for the edac_mc controller
 189          */
 190         err = edac_mc_register_sysfs_main_kobj(mci);
 191         if (err) {
 192                 kfree(mci);
 193                 return NULL;
 194         }
 195
 196         /* at this point, the root kobj is valid, and in order to
 197          * 'free' the object, then the function:
 198          *      edac_mc_unregister_sysfs_main_kobj() must be called
 199          * which will perform kobj unregistration and the actual free
 200          * will occur during the kobject callback operation
 201          */
 202         return mci;
 203 }
 204 EXPORT_SYMBOL_GPL(edac_mc_alloc);
 205
 206 /**
 207  * edac_mc_free
 208  *      'Free' a previously allocated 'mci' structure
 209  * @mci: pointer to a struct mem_ctl_info structure
 210  */
 211 void edac_mc_free(struct mem_ctl_info *mci)
 212 {
 213         edac_mc_unregister_sysfs_main_kobj(mci);
 214 }
 215 EXPORT_SYMBOL_GPL(edac_mc_free);
 216
 217
 218 /*
 219  * find_mci_by_dev
 220  *
 221  *      scan list of controllers looking for the one that manages
 222  *      the 'dev' device
 223  */
 224 static struct mem_ctl_info *find_mci_by_dev(struct device *dev)
 225 {
 226         struct mem_ctl_info *mci;
 227         struct list_head *item;
 228
 229         debugf3("%s()\n", __func__);
 230
 231         list_for_each(item, &mc_devices) {
 232                 mci = list_entry(item, struct mem_ctl_info, link);
 233
 234                 if (mci->dev == dev)
 235                         return mci;
 236         }
 237
 238         return NULL;
 239 }
 240
 241 /*
 242  * handler for EDAC to check if NMI type handler has asserted interrupt
 243  */
 244 static int edac_mc_assert_error_check_and_clear(void)
 245 {
 246         int old_state;
 247
 248         if (edac_op_state == EDAC_OPSTATE_POLL)
 249                 return 1;
 250
 251         old_state = edac_err_assert;
 252         edac_err_assert = 0;
 253
 254         return old_state;
 255 }
 256
 257 /*
 258  * edac_mc_workq_function
 259  *      performs the operation scheduled by a workq request
 260  */
 261 static void edac_mc_workq_function(struct work_struct *work_req)
 262 {
 263         struct delayed_work *d_work = to_delayed_work(work_req);
 264         struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
 265
 266         mutex_lock(&mem_ctls_mutex);
 267
 268         /* if this control struct has movd to offline state, we are done */
 269         if (mci->op_state == OP_OFFLINE) {
 270                 mutex_unlock(&mem_ctls_mutex);
 271                 return;
 272         }
 273
 274         /* Only poll controllers that are running polled and have a check */
 275         if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
 276                 mci->edac_check(mci);
 277
 278         mutex_unlock(&mem_ctls_mutex);
 279
 280         /* Reschedule */
 281         queue_delayed_work(edac_workqueue, &mci->work,
 282                         msecs_to_jiffies(edac_mc_get_poll_msec()));
 283 }
 284
 285 /*
 286  * edac_mc_workq_setup
 287  *      initialize a workq item for this mci
 288  *      passing in the new delay period in msec
 289  *
 290  *      locking model:
 291  *
 292  *              called with the mem_ctls_mutex held
 293  */
 294 static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
 295 {
 296         debugf0("%s()\n", __func__);
 297
 298         /* if this instance is not in the POLL state, then simply return */
 299         if (mci->op_state != OP_RUNNING_POLL)
 300                 return;
 301
 302         INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
 303         queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
 304 }
 305
 306 /*
 307  * edac_mc_workq_teardown
 308  *      stop the workq processing on this mci
 309  *
 310  *      locking model:
 311  *
 312  *              called WITHOUT lock held
 313  */
 314 static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
 315 {
 316         int status;
 317
 318         status = cancel_delayed_work(&mci->work);
 319         if (status == 0) {
 320                 debugf0("%s() not canceled, flush the queue\n",
 321                         __func__);
 322
 323                 /* workq instance might be running, wait for it */
 324                 flush_workqueue(edac_workqueue);
 325         }
 326 }
 327
 328 /*
 329  * edac_mc_reset_delay_period(unsigned long value)
 330  *
 331  *      user space has updated our poll period value, need to
 332  *      reset our workq delays
 333  */
 334 void edac_mc_reset_delay_period(int value)
 335 {
 336         struct mem_ctl_info *mci;
 337         struct list_head *item;
 338
 339         mutex_lock(&mem_ctls_mutex);
 340
 341         /* scan the list and turn off all workq timers, doing so under lock
 342          */
 343         list_for_each(item, &mc_devices) {
 344                 mci = list_entry(item, struct mem_ctl_info, link);
 345
 346                 if (mci->op_state == OP_RUNNING_POLL)
 347                         cancel_delayed_work(&mci->work);
 348         }
 349
 350         mutex_unlock(&mem_ctls_mutex);
 351
 352
 353         /* re-walk the list, and reset the poll delay */
 354         mutex_lock(&mem_ctls_mutex);
 355
 356         list_for_each(item, &mc_devices) {
 357                 mci = list_entry(item, struct mem_ctl_info, link);
 358
 359                 edac_mc_workq_setup(mci, (unsigned long) value);
 360         }
 361
 362         mutex_unlock(&mem_ctls_mutex);
 363 }
 364
 365
 366
 367 /* Return 0 on success, 1 on failure.
 368  * Before calling this function, caller must
 369  * assign a unique value to mci->mc_idx.
 370  *
 371  *      locking model:
 372  *
 373  *              called with the mem_ctls_mutex lock held
 374  */
 375 static int add_mc_to_global_list(struct mem_ctl_info *mci)
 376 {
 377         struct list_head *item, *insert_before;
 378         struct mem_ctl_info *p;
 379
 380         insert_before = &mc_devices;
 381
 382         p = find_mci_by_dev(mci->dev);
 383         if (unlikely(p != NULL))
 384                 goto fail0;
 385
 386         list_for_each(item, &mc_devices) {
 387                 p = list_entry(item, struct mem_ctl_info, link);
 388
 389                 if (p->mc_idx >= mci->mc_idx) {
 390                         if (unlikely(p->mc_idx == mci->mc_idx))
 391                                 goto fail1;
 392
 393                         insert_before = item;
 394                         break;
 395                 }
 396         }
 397
 398         list_add_tail_rcu(&mci->link, insert_before);
 399         atomic_inc(&edac_handlers);
 400         return 0;
 401
 402 fail0:
 403         edac_printk(KERN_WARNING, EDAC_MC,
 404                 "%s (%s) %s %s already assigned %d\n", dev_name(p->dev),
 405                 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
 406         return 1;
 407
 408 fail1:
 409         edac_printk(KERN_WARNING, EDAC_MC,
 410                 "bug in low-level driver: attempt to assign\n"
 411                 "    duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
 412         return 1;
 413 }
 414
 415 static void complete_mc_list_del(struct rcu_head *head)
 416 {
 417         struct mem_ctl_info *mci;
 418
 419         mci = container_of(head, struct mem_ctl_info, rcu);
 420         INIT_LIST_HEAD(&mci->link);
 421 }
 422
 423 static void del_mc_from_global_list(struct mem_ctl_info *mci)
 424 {
 425         atomic_dec(&edac_handlers);
 426         list_del_rcu(&mci->link);
 427         call_rcu(&mci->rcu, complete_mc_list_del);
 428         rcu_barrier();
 429 }
 430
 431 /**
 432  * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
 433  *
 434  * If found, return a pointer to the structure.
 435  * Else return NULL.
 436  *
 437  * Caller must hold mem_ctls_mutex.
 438  */
 439 struct mem_ctl_info *edac_mc_find(int idx)
 440 {
 441         struct list_head *item;
 442         struct mem_ctl_info *mci;
 443
 444         list_for_each(item, &mc_devices) {
 445                 mci = list_entry(item, struct mem_ctl_info, link);
 446
 447                 if (mci->mc_idx >= idx) {
 448                         if (mci->mc_idx == idx)
 449                                 return mci;
 450
 451                         break;
 452                 }
 453         }
 454
 455         return NULL;
 456 }
 457 EXPORT_SYMBOL(edac_mc_find);
 458
 459 /**
 460  * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
 461  *                 create sysfs entries associated with mci structure
 462  * @mci: pointer to the mci structure to be added to the list
 463  * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure.
 464  *
 465  * Return:
 466  *      0       Success
 467  *      !0      Failure
 468  */
 469
 470 /* FIXME - should a warning be printed if no error detection? correction? */
 471 int edac_mc_add_mc(struct mem_ctl_info *mci)
 472 {
 473         debugf0("%s()\n", __func__);
 474
 475 #ifdef CONFIG_EDAC_DEBUG
 476         if (edac_debug_level >= 3)
 477                 edac_mc_dump_mci(mci);
 478
 479         if (edac_debug_level >= 4) {
 480                 int i;
 481
 482                 for (i = 0; i < mci->nr_csrows; i++) {
 483                         int j;
 484
 485                         edac_mc_dump_csrow(&mci->csrows[i]);
 486                         for (j = 0; j < mci->csrows[i].nr_channels; j++)
 487                                 edac_mc_dump_channel(&mci->csrows[i].
 488                                                 channels[j]);
 489                 }
 490         }
 491 #endif
 492         mutex_lock(&mem_ctls_mutex);
 493
 494         if (add_mc_to_global_list(mci))
 495                 goto fail0;
 496
 497         /* set load time so that error rate can be tracked */
 498         mci->start_time = jiffies;
 499
 500         if (edac_create_sysfs_mci_device(mci)) {
 501                 edac_mc_printk(mci, KERN_WARNING,
 502                         "failed to create sysfs device\n");
 503                 goto fail1;
 504         }
 505
 506         /* If there IS a check routine, then we are running POLLED */
 507         if (mci->edac_check != NULL) {
 508                 /* This instance is NOW RUNNING */
 509                 mci->op_state = OP_RUNNING_POLL;
 510
 511                 edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
 512         } else {
 513                 mci->op_state = OP_RUNNING_INTERRUPT;
 514         }
 515
 516         /* Report action taken */
 517         edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
 518                 " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
 519
 520         mutex_unlock(&mem_ctls_mutex);
 521         return 0;
 522
 523 fail1:
 524         del_mc_from_global_list(mci);
 525
 526 fail0:
 527         mutex_unlock(&mem_ctls_mutex);
 528         return 1;
 529 }
 530 EXPORT_SYMBOL_GPL(edac_mc_add_mc);
 531
 532 /**
 533  * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
 534  *                 remove mci structure from global list
 535  * @pdev: Pointer to 'struct device' representing mci structure to remove.
 536  *
 537  * Return pointer to removed mci structure, or NULL if device not found.
 538  */
 539 struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
 540 {
 541         struct mem_ctl_info *mci;
 542
 543         debugf0("%s()\n", __func__);
 544
 545         mutex_lock(&mem_ctls_mutex);
 546
 547         /* find the requested mci struct in the global list */
 548         mci = find_mci_by_dev(dev);
 549         if (mci == NULL) {
 550                 mutex_unlock(&mem_ctls_mutex);
 551                 return NULL;
 552         }
 553
 554         /* marking MCI offline */
 555         mci->op_state = OP_OFFLINE;
 556
 557         del_mc_from_global_list(mci);
 558         mutex_unlock(&mem_ctls_mutex);
 559
 560         /* flush workq processes and remove sysfs */
 561         edac_mc_workq_teardown(mci);
 562         edac_remove_sysfs_mci_device(mci);
 563
 564         edac_printk(KERN_INFO, EDAC_MC,
 565                 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
 566                 mci->mod_name, mci->ctl_name, edac_dev_name(mci));
 567
 568         return mci;
 569 }
 570 EXPORT_SYMBOL_GPL(edac_mc_del_mc);
 571
 572 static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
 573                                 u32 size)
 574 {
 575         struct page *pg;
 576         void *virt_addr;
 577         unsigned long flags = 0;
 578
 579         debugf3("%s()\n", __func__);
 580
 581         /* ECC error page was not in our memory. Ignore it. */
 582         if (!pfn_valid(page))
 583                 return;
 584
 585         /* Find the actual page structure then map it and fix */
 586         pg = pfn_to_page(page);
 587
 588         if (PageHighMem(pg))
 589                 local_irq_save(flags);
 590
 591         virt_addr = kmap_atomic(pg, KM_BOUNCE_READ);
 592
 593         /* Perform architecture specific atomic scrub operation */
 594         atomic_scrub(virt_addr + offset, size);
 595
 596         /* Unmap and complete */
 597         kunmap_atomic(virt_addr, KM_BOUNCE_READ);
 598
 599         if (PageHighMem(pg))
 600                 local_irq_restore(flags);
 601 }
 602
 603 /* FIXME - should return -1 */
 604 int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
 605 {
 606         struct csrow_info *csrows = mci->csrows;
 607         int row, i;
 608
 609         debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
 610         row = -1;
 611
 612         for (i = 0; i < mci->nr_csrows; i++) {
 613                 struct csrow_info *csrow = &csrows[i];
 614
 615                 if (csrow->nr_pages == 0)
 616                         continue;
 617
 618                 debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
 619                         "mask(0x%lx)\n", mci->mc_idx, __func__,
 620                         csrow->first_page, page, csrow->last_page,
 621                         csrow->page_mask);
 622
 623                 if ((page >= csrow->first_page) &&
 624                     (page <= csrow->last_page) &&
 625                     ((page & csrow->page_mask) ==
 626                      (csrow->first_page & csrow->page_mask))) {
 627                         row = i;
 628                         break;
 629                 }
 630         }
 631
 632         if (row == -1)
 633                 edac_mc_printk(mci, KERN_ERR,
 634                         "could not look up page error address %lx\n",
 635                         (unsigned long)page);
 636
 637         return row;
 638 }
 639 EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
 640
 641 /* FIXME - setable log (warning/emerg) levels */
 642 /* FIXME - integrate with evlog: http://evlog.sourceforge.net/ */
 643 void edac_mc_handle_ce(struct mem_ctl_info *mci,
 644                 unsigned long page_frame_number,
 645                 unsigned long offset_in_page, unsigned long syndrome,
 646                 int row, int channel, const char *msg)
 647 {
 648         unsigned long remapped_page;
 649
 650         debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
 651
 652         /* FIXME - maybe make panic on INTERNAL ERROR an option */
 653         if (row >= mci->nr_csrows || row < 0) {
 654                 /* something is wrong */
 655                 edac_mc_printk(mci, KERN_ERR,
 656                         "INTERNAL ERROR: row out of range "
 657                         "(%d >= %d)\n", row, mci->nr_csrows);
 658                 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
 659                 return;
 660         }
 661
 662         if (channel >= mci->csrows[row].nr_channels || channel < 0) {
 663                 /* something is wrong */
 664                 edac_mc_printk(mci, KERN_ERR,
 665                         "INTERNAL ERROR: channel out of range "
 666                         "(%d >= %d)\n", channel,
 667                         mci->csrows[row].nr_channels);
 668                 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
 669                 return;
 670         }
 671
 672         if (edac_mc_get_log_ce())
 673                 /* FIXME - put in DIMM location */
 674                 edac_mc_printk(mci, KERN_WARNING,
 675                         "CE page 0x%lx, offset 0x%lx, grain %d, syndrome "
 676                         "0x%lx, row %d, channel %d, label \"%s\": %s\n",
 677                         page_frame_number, offset_in_page,
 678                         mci->csrows[row].grain, syndrome, row, channel,
 679                         mci->csrows[row].channels[channel].label, msg);
 680
 681         mci->ce_count++;
 682         mci->csrows[row].ce_count++;
 683         mci->csrows[row].channels[channel].ce_count++;
 684
 685         if (mci->scrub_mode & SCRUB_SW_SRC) {
 686                 /*
 687                  * Some MC's can remap memory so that it is still available
 688                  * at a different address when PCI devices map into memory.
 689                  * MC's that can't do this lose the memory where PCI devices
 690                  * are mapped.  This mapping is MC dependant and so we call
 691                  * back into the MC driver for it to map the MC page to
 692                  * a physical (CPU) page which can then be mapped to a virtual
 693                  * page - which can then be scrubbed.
 694                  */
 695                 remapped_page = mci->ctl_page_to_phys ?
 696                         mci->ctl_page_to_phys(mci, page_frame_number) :
 697                         page_frame_number;
 698
 699                 edac_mc_scrub_block(remapped_page, offset_in_page,
 700                                 mci->csrows[row].grain);
 701         }
 702 }
 703 EXPORT_SYMBOL_GPL(edac_mc_handle_ce);
 704
 705 void edac_mc_handle_ce_no_info(struct mem_ctl_info *mci, const char *msg)
 706 {
 707         if (edac_mc_get_log_ce())
 708                 edac_mc_printk(mci, KERN_WARNING,
 709                         "CE - no information available: %s\n", msg);
 710
 711         mci->ce_noinfo_count++;
 712         mci->ce_count++;
 713 }
 714 EXPORT_SYMBOL_GPL(edac_mc_handle_ce_no_info);
 715
 716 void edac_mc_handle_ue(struct mem_ctl_info *mci,
 717                 unsigned long page_frame_number,
 718                 unsigned long offset_in_page, int row, const char *msg)
 719 {
 720         int len = EDAC_MC_LABEL_LEN * 4;
 721         char labels[len + 1];
 722         char *pos = labels;
 723         int chan;
 724         int chars;
 725
 726         debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
 727
 728         /* FIXME - maybe make panic on INTERNAL ERROR an option */
 729         if (row >= mci->nr_csrows || row < 0) {
 730                 /* something is wrong */
 731                 edac_mc_printk(mci, KERN_ERR,
 732                         "INTERNAL ERROR: row out of range "
 733                         "(%d >= %d)\n", row, mci->nr_csrows);
 734                 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
 735                 return;
 736         }
 737
 738         chars = snprintf(pos, len + 1, "%s",
 739                          mci->csrows[row].channels[0].label);
 740         len -= chars;
 741         pos += chars;
 742
 743         for (chan = 1; (chan < mci->csrows[row].nr_channels) && (len > 0);
 744                 chan++) {
 745                 chars = snprintf(pos, len + 1, ":%s",
 746                                  mci->csrows[row].channels[chan].label);
 747                 len -= chars;
 748                 pos += chars;
 749         }
 750
 751         if (edac_mc_get_log_ue())
 752                 edac_mc_printk(mci, KERN_EMERG,
 753                         "UE page 0x%lx, offset 0x%lx, grain %d, row %d, "
 754                         "labels \"%s\": %s\n", page_frame_number,
 755                         offset_in_page, mci->csrows[row].grain, row,
 756                         labels, msg);
 757
 758         if (edac_mc_get_panic_on_ue())
 759                 panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, "
 760                         "row %d, labels \"%s\": %s\n", mci->mc_idx,
 761                         page_frame_number, offset_in_page,
 762                         mci->csrows[row].grain, row, labels, msg);
 763
 764         mci->ue_count++;
 765         mci->csrows[row].ue_count++;
 766 }
 767 EXPORT_SYMBOL_GPL(edac_mc_handle_ue);
 768
 769 void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg)
 770 {
 771         if (edac_mc_get_panic_on_ue())
 772                 panic("EDAC MC%d: Uncorrected Error", mci->mc_idx);
 773
 774         if (edac_mc_get_log_ue())
 775                 edac_mc_printk(mci, KERN_WARNING,
 776                         "UE - no information available: %s\n", msg);
 777         mci->ue_noinfo_count++;
 778         mci->ue_count++;
 779 }
 780 EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info);
 781
 782 /*************************************************************
 783  * On Fully Buffered DIMM modules, this help function is
 784  * called to process UE events
 785  */
 786 void edac_mc_handle_fbd_ue(struct mem_ctl_info *mci,
 787                         unsigned int csrow,
 788                         unsigned int channela,
 789                         unsigned int channelb, char *msg)
 790 {
 791         int len = EDAC_MC_LABEL_LEN * 4;
 792         char labels[len + 1];
 793         char *pos = labels;
 794         int chars;
 795
 796         if (csrow >= mci->nr_csrows) {
 797                 /* something is wrong */
 798                 edac_mc_printk(mci, KERN_ERR,
 799                         "INTERNAL ERROR: row out of range (%d >= %d)\n",
 800                         csrow, mci->nr_csrows);
 801                 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
 802                 return;
 803         }
 804
 805         if (channela >= mci->csrows[csrow].nr_channels) {
 806                 /* something is wrong */
 807                 edac_mc_printk(mci, KERN_ERR,
 808                         "INTERNAL ERROR: channel-a out of range "
 809                         "(%d >= %d)\n",
 810                         channela, mci->csrows[csrow].nr_channels);
 811                 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
 812                 return;
 813         }
 814
 815         if (channelb >= mci->csrows[csrow].nr_channels) {
 816                 /* something is wrong */
 817                 edac_mc_printk(mci, KERN_ERR,
 818                         "INTERNAL ERROR: channel-b out of range "
 819                         "(%d >= %d)\n",
 820                         channelb, mci->csrows[csrow].nr_channels);
 821                 edac_mc_handle_ue_no_info(mci, "INTERNAL ERROR");
 822                 return;
 823         }
 824
 825         mci->ue_count++;
 826         mci->csrows[csrow].ue_count++;
 827
 828         /* Generate the DIMM labels from the specified channels */
 829         chars = snprintf(pos, len + 1, "%s",
 830                          mci->csrows[csrow].channels[channela].label);
 831         len -= chars;
 832         pos += chars;
 833         chars = snprintf(pos, len + 1, "-%s",
 834                          mci->csrows[csrow].channels[channelb].label);
 835
 836         if (edac_mc_get_log_ue())
 837                 edac_mc_printk(mci, KERN_EMERG,
 838                         "UE row %d, channel-a= %d channel-b= %d "
 839                         "labels \"%s\": %s\n", csrow, channela, channelb,
 840                         labels, msg);
 841
 842         if (edac_mc_get_panic_on_ue())
 843                 panic("UE row %d, channel-a= %d channel-b= %d "
 844                         "labels \"%s\": %s\n", csrow, channela,
 845                         channelb, labels, msg);
 846 }
 847 EXPORT_SYMBOL(edac_mc_handle_fbd_ue);
 848
 849 /*************************************************************
 850  * On Fully Buffered DIMM modules, this help function is
 851  * called to process CE events
 852  */
 853 void edac_mc_handle_fbd_ce(struct mem_ctl_info *mci,
 854                         unsigned int csrow, unsigned int channel, char *msg)
 855 {
 856
 857         /* Ensure boundary values */
 858         if (csrow >= mci->nr_csrows) {
 859                 /* something is wrong */
 860                 edac_mc_printk(mci, KERN_ERR,
 861                         "INTERNAL ERROR: row out of range (%d >= %d)\n",
 862                         csrow, mci->nr_csrows);
 863                 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
 864                 return;
 865         }
 866         if (channel >= mci->csrows[csrow].nr_channels) {
 867                 /* something is wrong */
 868                 edac_mc_printk(mci, KERN_ERR,
 869                         "INTERNAL ERROR: channel out of range (%d >= %d)\n",
 870                         channel, mci->csrows[csrow].nr_channels);
 871                 edac_mc_handle_ce_no_info(mci, "INTERNAL ERROR");
 872                 return;
 873         }
 874
 875         if (edac_mc_get_log_ce())
 876                 /* FIXME - put in DIMM location */
 877                 edac_mc_printk(mci, KERN_WARNING,
 878                         "CE row %d, channel %d, label \"%s\": %s\n",
 879                         csrow, channel,
 880                         mci->csrows[csrow].channels[channel].label, msg);
 881
 882         mci->ce_count++;
 883         mci->csrows[csrow].ce_count++;
 884         mci->csrows[csrow].channels[channel].ce_count++;
 885 }
 886 EXPORT_SYMBOL(edac_mc_handle_fbd_ce);