2 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
3 * This file may be distributed under the terms of the
4 * GNU General Public License.
6 * Written Doug Thompson <norsk5@xmission.com>
9 #include <linux/module.h>
10 #include <linux/sysdev.h>
11 #include <linux/slab.h>
12 #include <linux/ctype.h>
14 #include "edac_core.h"
15 #include "edac_module.h"
17 /* Turn off this whole feature if PCI is not configured */
20 #define EDAC_PCI_SYMLINK "device"
22 /* data variables exported via sysfs */
23 static int check_pci_errors
; /* default NO check PCI parity */
24 static int edac_pci_panic_on_pe
; /* default NO panic on PCI Parity */
25 static int edac_pci_log_pe
= 1; /* log PCI parity errors */
26 static int edac_pci_log_npe
= 1; /* log PCI non-parity error errors */
27 static int edac_pci_poll_msec
= 1000; /* one second workq period */
29 static atomic_t pci_parity_count
= ATOMIC_INIT(0);
30 static atomic_t pci_nonparity_count
= ATOMIC_INIT(0);
32 static struct kobject
*edac_pci_top_main_kobj
;
33 static atomic_t edac_pci_sysfs_refcount
= ATOMIC_INIT(0);
35 /* getter functions for the data variables */
36 int edac_pci_get_check_errors(void)
38 return check_pci_errors
;
41 static int edac_pci_get_log_pe(void)
43 return edac_pci_log_pe
;
46 static int edac_pci_get_log_npe(void)
48 return edac_pci_log_npe
;
51 static int edac_pci_get_panic_on_pe(void)
53 return edac_pci_panic_on_pe
;
56 int edac_pci_get_poll_msec(void)
58 return edac_pci_poll_msec
;
61 /**************************** EDAC PCI sysfs instance *******************/
62 static ssize_t
instance_pe_count_show(struct edac_pci_ctl_info
*pci
, char *data
)
64 return sprintf(data
, "%u\n", atomic_read(&pci
->counters
.pe_count
));
67 static ssize_t
instance_npe_count_show(struct edac_pci_ctl_info
*pci
,
70 return sprintf(data
, "%u\n", atomic_read(&pci
->counters
.npe_count
));
73 #define to_instance(k) container_of(k, struct edac_pci_ctl_info, kobj)
74 #define to_instance_attr(a) container_of(a, struct instance_attribute, attr)
76 /* DEVICE instance kobject release() function */
77 static void edac_pci_instance_release(struct kobject
*kobj
)
79 struct edac_pci_ctl_info
*pci
;
81 debugf0("%s()\n", __func__
);
83 /* Form pointer to containing struct, the pci control struct */
84 pci
= to_instance(kobj
);
86 /* decrement reference count on top main kobj */
87 kobject_put(edac_pci_top_main_kobj
);
89 kfree(pci
); /* Free the control struct */
92 /* instance specific attribute structure */
93 struct instance_attribute
{
94 struct attribute attr
;
95 ssize_t(*show
) (struct edac_pci_ctl_info
*, char *);
96 ssize_t(*store
) (struct edac_pci_ctl_info
*, const char *, size_t);
99 /* Function to 'show' fields from the edac_pci 'instance' structure */
100 static ssize_t
edac_pci_instance_show(struct kobject
*kobj
,
101 struct attribute
*attr
, char *buffer
)
103 struct edac_pci_ctl_info
*pci
= to_instance(kobj
);
104 struct instance_attribute
*instance_attr
= to_instance_attr(attr
);
106 if (instance_attr
->show
)
107 return instance_attr
->show(pci
, buffer
);
111 /* Function to 'store' fields into the edac_pci 'instance' structure */
112 static ssize_t
edac_pci_instance_store(struct kobject
*kobj
,
113 struct attribute
*attr
,
114 const char *buffer
, size_t count
)
116 struct edac_pci_ctl_info
*pci
= to_instance(kobj
);
117 struct instance_attribute
*instance_attr
= to_instance_attr(attr
);
119 if (instance_attr
->store
)
120 return instance_attr
->store(pci
, buffer
, count
);
125 static const struct sysfs_ops pci_instance_ops
= {
126 .show
= edac_pci_instance_show
,
127 .store
= edac_pci_instance_store
130 #define INSTANCE_ATTR(_name, _mode, _show, _store) \
131 static struct instance_attribute attr_instance_##_name = { \
132 .attr = {.name = __stringify(_name), .mode = _mode }, \
137 INSTANCE_ATTR(pe_count
, S_IRUGO
, instance_pe_count_show
, NULL
);
138 INSTANCE_ATTR(npe_count
, S_IRUGO
, instance_npe_count_show
, NULL
);
140 /* pci instance attributes */
141 static struct instance_attribute
*pci_instance_attr
[] = {
142 &attr_instance_pe_count
,
143 &attr_instance_npe_count
,
147 /* the ktype for a pci instance */
148 static struct kobj_type ktype_pci_instance
= {
149 .release
= edac_pci_instance_release
,
150 .sysfs_ops
= &pci_instance_ops
,
151 .default_attrs
= (struct attribute
**)pci_instance_attr
,
155 * edac_pci_create_instance_kobj
157 * construct one EDAC PCI instance's kobject for use
159 static int edac_pci_create_instance_kobj(struct edac_pci_ctl_info
*pci
, int idx
)
161 struct kobject
*main_kobj
;
164 debugf0("%s()\n", __func__
);
166 /* First bump the ref count on the top main kobj, which will
167 * track the number of PCI instances we have, and thus nest
168 * properly on keeping the module loaded
170 main_kobj
= kobject_get(edac_pci_top_main_kobj
);
176 /* And now register this new kobject under the main kobj */
177 err
= kobject_init_and_add(&pci
->kobj
, &ktype_pci_instance
,
178 edac_pci_top_main_kobj
, "pci%d", idx
);
180 debugf2("%s() failed to register instance pci%d\n",
182 kobject_put(edac_pci_top_main_kobj
);
186 kobject_uevent(&pci
->kobj
, KOBJ_ADD
);
187 debugf1("%s() Register instance 'pci%d' kobject\n", __func__
, idx
);
191 /* Error unwind statck */
197 * edac_pci_unregister_sysfs_instance_kobj
199 * unregister the kobj for the EDAC PCI instance
201 static void edac_pci_unregister_sysfs_instance_kobj(
202 struct edac_pci_ctl_info
*pci
)
204 debugf0("%s()\n", __func__
);
206 /* Unregister the instance kobject and allow its release
207 * function release the main reference count and then
210 kobject_put(&pci
->kobj
);
213 /***************************** EDAC PCI sysfs root **********************/
214 #define to_edacpci(k) container_of(k, struct edac_pci_ctl_info, kobj)
215 #define to_edacpci_attr(a) container_of(a, struct edac_pci_attr, attr)
217 /* simple show/store functions for attributes */
218 static ssize_t
edac_pci_int_show(void *ptr
, char *buffer
)
221 return sprintf(buffer
, "%d\n", *value
);
224 static ssize_t
edac_pci_int_store(void *ptr
, const char *buffer
, size_t count
)
228 if (isdigit(*buffer
))
229 *value
= simple_strtoul(buffer
, NULL
, 0);
234 struct edac_pci_dev_attribute
{
235 struct attribute attr
;
237 ssize_t(*show
) (void *, char *);
238 ssize_t(*store
) (void *, const char *, size_t);
241 /* Set of show/store abstract level functions for PCI Parity object */
242 static ssize_t
edac_pci_dev_show(struct kobject
*kobj
, struct attribute
*attr
,
245 struct edac_pci_dev_attribute
*edac_pci_dev
;
246 edac_pci_dev
= (struct edac_pci_dev_attribute
*)attr
;
248 if (edac_pci_dev
->show
)
249 return edac_pci_dev
->show(edac_pci_dev
->value
, buffer
);
253 static ssize_t
edac_pci_dev_store(struct kobject
*kobj
,
254 struct attribute
*attr
, const char *buffer
,
257 struct edac_pci_dev_attribute
*edac_pci_dev
;
258 edac_pci_dev
= (struct edac_pci_dev_attribute
*)attr
;
260 if (edac_pci_dev
->show
)
261 return edac_pci_dev
->store(edac_pci_dev
->value
, buffer
, count
);
265 static const struct sysfs_ops edac_pci_sysfs_ops
= {
266 .show
= edac_pci_dev_show
,
267 .store
= edac_pci_dev_store
270 #define EDAC_PCI_ATTR(_name,_mode,_show,_store) \
271 static struct edac_pci_dev_attribute edac_pci_attr_##_name = { \
272 .attr = {.name = __stringify(_name), .mode = _mode }, \
278 #define EDAC_PCI_STRING_ATTR(_name,_data,_mode,_show,_store) \
279 static struct edac_pci_dev_attribute edac_pci_attr_##_name = { \
280 .attr = {.name = __stringify(_name), .mode = _mode }, \
286 /* PCI Parity control files */
287 EDAC_PCI_ATTR(check_pci_errors
, S_IRUGO
| S_IWUSR
, edac_pci_int_show
,
289 EDAC_PCI_ATTR(edac_pci_log_pe
, S_IRUGO
| S_IWUSR
, edac_pci_int_show
,
291 EDAC_PCI_ATTR(edac_pci_log_npe
, S_IRUGO
| S_IWUSR
, edac_pci_int_show
,
293 EDAC_PCI_ATTR(edac_pci_panic_on_pe
, S_IRUGO
| S_IWUSR
, edac_pci_int_show
,
295 EDAC_PCI_ATTR(pci_parity_count
, S_IRUGO
, edac_pci_int_show
, NULL
);
296 EDAC_PCI_ATTR(pci_nonparity_count
, S_IRUGO
, edac_pci_int_show
, NULL
);
298 /* Base Attributes of the memory ECC object */
299 static struct edac_pci_dev_attribute
*edac_pci_attr
[] = {
300 &edac_pci_attr_check_pci_errors
,
301 &edac_pci_attr_edac_pci_log_pe
,
302 &edac_pci_attr_edac_pci_log_npe
,
303 &edac_pci_attr_edac_pci_panic_on_pe
,
304 &edac_pci_attr_pci_parity_count
,
305 &edac_pci_attr_pci_nonparity_count
,
310 * edac_pci_release_main_kobj
312 * This release function is called when the reference count to the
313 * passed kobj goes to zero.
315 * This kobj is the 'main' kobject that EDAC PCI instances
316 * link to, and thus provide for proper nesting counts
318 static void edac_pci_release_main_kobj(struct kobject
*kobj
)
320 debugf0("%s() here to module_put(THIS_MODULE)\n", __func__
);
324 /* last reference to top EDAC PCI kobject has been removed,
325 * NOW release our ref count on the core module
327 module_put(THIS_MODULE
);
330 /* ktype struct for the EDAC PCI main kobj */
331 static struct kobj_type ktype_edac_pci_main_kobj
= {
332 .release
= edac_pci_release_main_kobj
,
333 .sysfs_ops
= &edac_pci_sysfs_ops
,
334 .default_attrs
= (struct attribute
**)edac_pci_attr
,
338 * edac_pci_main_kobj_setup()
340 * setup the sysfs for EDAC PCI attributes
341 * assumes edac_class has already been initialized
343 static int edac_pci_main_kobj_setup(void)
346 struct sysdev_class
*edac_class
;
348 debugf0("%s()\n", __func__
);
350 /* check and count if we have already created the main kobject */
351 if (atomic_inc_return(&edac_pci_sysfs_refcount
) != 1)
354 /* First time, so create the main kobject and its
355 * controls and atributes
357 edac_class
= edac_get_edac_class();
358 if (edac_class
== NULL
) {
359 debugf1("%s() no edac_class\n", __func__
);
361 goto decrement_count_fail
;
364 /* Bump the reference count on this module to ensure the
365 * modules isn't unloaded until we deconstruct the top
366 * level main kobj for EDAC PCI
368 if (!try_module_get(THIS_MODULE
)) {
369 debugf1("%s() try_module_get() failed\n", __func__
);
371 goto decrement_count_fail
;
374 edac_pci_top_main_kobj
= kzalloc(sizeof(struct kobject
), GFP_KERNEL
);
375 if (!edac_pci_top_main_kobj
) {
376 debugf1("Failed to allocate\n");
381 /* Instanstiate the pci object */
382 err
= kobject_init_and_add(edac_pci_top_main_kobj
,
383 &ktype_edac_pci_main_kobj
,
384 &edac_class
->kset
.kobj
, "pci");
386 debugf1("Failed to register '.../edac/pci'\n");
387 goto kobject_init_and_add_fail
;
390 /* At this point, to 'release' the top level kobject
391 * for EDAC PCI, then edac_pci_main_kobj_teardown()
392 * must be used, for resources to be cleaned up properly
394 kobject_uevent(edac_pci_top_main_kobj
, KOBJ_ADD
);
395 debugf1("Registered '.../edac/pci' kobject\n");
399 /* Error unwind statck */
400 kobject_init_and_add_fail
:
401 kfree(edac_pci_top_main_kobj
);
404 module_put(THIS_MODULE
);
406 decrement_count_fail
:
407 /* if are on this error exit, nothing to tear down */
408 atomic_dec(&edac_pci_sysfs_refcount
);
414 * edac_pci_main_kobj_teardown()
416 * if no longer linked (needed) remove the top level EDAC PCI
417 * kobject with its controls and attributes
419 static void edac_pci_main_kobj_teardown(void)
421 debugf0("%s()\n", __func__
);
423 /* Decrement the count and only if no more controller instances
424 * are connected perform the unregisteration of the top level
427 if (atomic_dec_return(&edac_pci_sysfs_refcount
) == 0) {
428 debugf0("%s() called kobject_put on main kobj\n",
430 kobject_put(edac_pci_top_main_kobj
);
436 * edac_pci_create_sysfs
438 * Create the controls/attributes for the specified EDAC PCI device
440 int edac_pci_create_sysfs(struct edac_pci_ctl_info
*pci
)
443 struct kobject
*edac_kobj
= &pci
->kobj
;
445 debugf0("%s() idx=%d\n", __func__
, pci
->pci_idx
);
447 /* create the top main EDAC PCI kobject, IF needed */
448 err
= edac_pci_main_kobj_setup();
452 /* Create this instance's kobject under the MAIN kobject */
453 err
= edac_pci_create_instance_kobj(pci
, pci
->pci_idx
);
455 goto unregister_cleanup
;
457 err
= sysfs_create_link(edac_kobj
, &pci
->dev
->kobj
, EDAC_PCI_SYMLINK
);
459 debugf0("%s() sysfs_create_link() returned err= %d\n",
466 /* Error unwind stack */
468 edac_pci_unregister_sysfs_instance_kobj(pci
);
471 edac_pci_main_kobj_teardown();
477 * edac_pci_remove_sysfs
479 * remove the controls and attributes for this EDAC PCI device
481 void edac_pci_remove_sysfs(struct edac_pci_ctl_info
*pci
)
483 debugf0("%s() index=%d\n", __func__
, pci
->pci_idx
);
485 /* Remove the symlink */
486 sysfs_remove_link(&pci
->kobj
, EDAC_PCI_SYMLINK
);
488 /* remove this PCI instance's sysfs entries */
489 edac_pci_unregister_sysfs_instance_kobj(pci
);
491 /* Call the main unregister function, which will determine
492 * if this 'pci' is the last instance.
493 * If it is, the main kobject will be unregistered as a result
495 debugf0("%s() calling edac_pci_main_kobj_teardown()\n", __func__
);
496 edac_pci_main_kobj_teardown();
499 /************************ PCI error handling *************************/
500 static u16
get_pci_parity_status(struct pci_dev
*dev
, int secondary
)
505 where
= secondary
? PCI_SEC_STATUS
: PCI_STATUS
;
506 pci_read_config_word(dev
, where
, &status
);
508 /* If we get back 0xFFFF then we must suspect that the card has been
509 * pulled but the Linux PCI layer has not yet finished cleaning up.
510 * We don't want to report on such devices
513 if (status
== 0xFFFF) {
516 pci_read_config_dword(dev
, 0, &sanity
);
518 if (sanity
== 0xFFFFFFFF)
522 status
&= PCI_STATUS_DETECTED_PARITY
| PCI_STATUS_SIG_SYSTEM_ERROR
|
526 /* reset only the bits we are interested in */
527 pci_write_config_word(dev
, where
, status
);
533 /* Clear any PCI parity errors logged by this device. */
534 static void edac_pci_dev_parity_clear(struct pci_dev
*dev
)
538 get_pci_parity_status(dev
, 0);
540 /* read the device TYPE, looking for bridges */
541 pci_read_config_byte(dev
, PCI_HEADER_TYPE
, &header_type
);
543 if ((header_type
& 0x7F) == PCI_HEADER_TYPE_BRIDGE
)
544 get_pci_parity_status(dev
, 1);
550 * Fucntion to retrieve the current parity status
554 static void edac_pci_dev_parity_test(struct pci_dev
*dev
)
560 /* stop any interrupts until we can acquire the status */
561 local_irq_save(flags
);
563 /* read the STATUS register on this device */
564 status
= get_pci_parity_status(dev
, 0);
566 /* read the device TYPE, looking for bridges */
567 pci_read_config_byte(dev
, PCI_HEADER_TYPE
, &header_type
);
569 local_irq_restore(flags
);
571 debugf4("PCI STATUS= 0x%04x %s\n", status
, dev_name(&dev
->dev
));
573 /* check the status reg for errors on boards NOT marked as broken
574 * if broken, we cannot trust any of the status bits
576 if (status
&& !dev
->broken_parity_status
) {
577 if (status
& (PCI_STATUS_SIG_SYSTEM_ERROR
)) {
578 edac_printk(KERN_CRIT
, EDAC_PCI
,
579 "Signaled System Error on %s\n",
581 atomic_inc(&pci_nonparity_count
);
584 if (status
& (PCI_STATUS_PARITY
)) {
585 edac_printk(KERN_CRIT
, EDAC_PCI
,
586 "Master Data Parity Error on %s\n",
589 atomic_inc(&pci_parity_count
);
592 if (status
& (PCI_STATUS_DETECTED_PARITY
)) {
593 edac_printk(KERN_CRIT
, EDAC_PCI
,
594 "Detected Parity Error on %s\n",
597 atomic_inc(&pci_parity_count
);
602 debugf4("PCI HEADER TYPE= 0x%02x %s\n", header_type
, dev_name(&dev
->dev
));
604 if ((header_type
& 0x7F) == PCI_HEADER_TYPE_BRIDGE
) {
605 /* On bridges, need to examine secondary status register */
606 status
= get_pci_parity_status(dev
, 1);
608 debugf4("PCI SEC_STATUS= 0x%04x %s\n", status
, dev_name(&dev
->dev
));
610 /* check the secondary status reg for errors,
611 * on NOT broken boards
613 if (status
&& !dev
->broken_parity_status
) {
614 if (status
& (PCI_STATUS_SIG_SYSTEM_ERROR
)) {
615 edac_printk(KERN_CRIT
, EDAC_PCI
, "Bridge "
616 "Signaled System Error on %s\n",
618 atomic_inc(&pci_nonparity_count
);
621 if (status
& (PCI_STATUS_PARITY
)) {
622 edac_printk(KERN_CRIT
, EDAC_PCI
, "Bridge "
623 "Master Data Parity Error on "
624 "%s\n", pci_name(dev
));
626 atomic_inc(&pci_parity_count
);
629 if (status
& (PCI_STATUS_DETECTED_PARITY
)) {
630 edac_printk(KERN_CRIT
, EDAC_PCI
, "Bridge "
631 "Detected Parity Error on %s\n",
634 atomic_inc(&pci_parity_count
);
640 /* reduce some complexity in definition of the iterator */
641 typedef void (*pci_parity_check_fn_t
) (struct pci_dev
*dev
);
644 * pci_dev parity list iterator
645 * Scan the PCI device list for one pass, looking for SERRORs
646 * Master Parity ERRORS or Parity ERRORs on primary or secondary devices
648 static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn
)
650 struct pci_dev
*dev
= NULL
;
652 /* request for kernel access to the next PCI device, if any,
653 * and while we are looking at it have its reference count
654 * bumped until we are done with it
656 while ((dev
= pci_get_device(PCI_ANY_ID
, PCI_ANY_ID
, dev
)) != NULL
) {
662 * edac_pci_do_parity_check
664 * performs the actual PCI parity check operation
666 void edac_pci_do_parity_check(void)
670 debugf3("%s()\n", __func__
);
672 /* if policy has PCI check off, leave now */
673 if (!check_pci_errors
)
676 before_count
= atomic_read(&pci_parity_count
);
678 /* scan all PCI devices looking for a Parity Error on devices and
680 * The iterator calls pci_get_device() which might sleep, thus
681 * we cannot disable interrupts in this scan.
683 edac_pci_dev_parity_iterator(edac_pci_dev_parity_test
);
685 /* Only if operator has selected panic on PCI Error */
686 if (edac_pci_get_panic_on_pe()) {
687 /* If the count is different 'after' from 'before' */
688 if (before_count
!= atomic_read(&pci_parity_count
))
689 panic("EDAC: PCI Parity Error");
694 * edac_pci_clear_parity_errors
696 * function to perform an iteration over the PCI devices
697 * and clearn their current status
699 void edac_pci_clear_parity_errors(void)
701 /* Clear any PCI bus parity errors that devices initially have logged
702 * in their registers.
704 edac_pci_dev_parity_iterator(edac_pci_dev_parity_clear
);
710 * Called to handle a PARITY ERROR event
712 void edac_pci_handle_pe(struct edac_pci_ctl_info
*pci
, const char *msg
)
715 /* global PE counter incremented by edac_pci_do_parity_check() */
716 atomic_inc(&pci
->counters
.pe_count
);
718 if (edac_pci_get_log_pe())
719 edac_pci_printk(pci
, KERN_WARNING
,
720 "Parity Error ctl: %s %d: %s\n",
721 pci
->ctl_name
, pci
->pci_idx
, msg
);
724 * poke all PCI devices and see which one is the troublemaker
725 * panic() is called if set
727 edac_pci_do_parity_check();
729 EXPORT_SYMBOL_GPL(edac_pci_handle_pe
);
733 * edac_pci_handle_npe
735 * Called to handle a NON-PARITY ERROR event
737 void edac_pci_handle_npe(struct edac_pci_ctl_info
*pci
, const char *msg
)
740 /* global NPE counter incremented by edac_pci_do_parity_check() */
741 atomic_inc(&pci
->counters
.npe_count
);
743 if (edac_pci_get_log_npe())
744 edac_pci_printk(pci
, KERN_WARNING
,
745 "Non-Parity Error ctl: %s %d: %s\n",
746 pci
->ctl_name
, pci
->pci_idx
, msg
);
749 * poke all PCI devices and see which one is the troublemaker
750 * panic() is called if set
752 edac_pci_do_parity_check();
754 EXPORT_SYMBOL_GPL(edac_pci_handle_npe
);
757 * Define the PCI parameter to the module
759 module_param(check_pci_errors
, int, 0644);
760 MODULE_PARM_DESC(check_pci_errors
,
761 "Check for PCI bus parity errors: 0=off 1=on");
762 module_param(edac_pci_panic_on_pe
, int, 0644);
763 MODULE_PARM_DESC(edac_pci_panic_on_pe
,
764 "Panic on PCI Bus Parity error: 0=off 1=on");
766 #endif /* CONFIG_PCI */