3 * Copyright (C) 2001 Dave Engebretsen & Todd Inglett IBM Corporation
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 #include <linux/init.h>
21 #include <linux/pci.h>
22 #include <linux/proc_fs.h>
23 #include <linux/bootmem.h>
25 #include <linux/rbtree.h>
26 #include <linux/spinlock.h>
27 #include <linux/seq_file.h>
29 #include <asm/processor.h>
32 #include <asm/machdep.h>
33 #include <asm/pgtable.h>
39 #define BUID_HI(buid) ((buid) >> 32)
40 #define BUID_LO(buid) ((buid) & 0xffffffff)
41 #define CONFIG_ADDR(busno, devfn) \
42 (((((busno) & 0xff) << 8) | ((devfn) & 0xf8)) << 8)
45 static int ibm_set_eeh_option
;
46 static int ibm_set_slot_reset
;
47 static int ibm_read_slot_reset_state
;
48 static int ibm_slot_error_detail
;
50 static int eeh_subsystem_enabled
;
52 /* Buffer for reporting slot-error-detail rtas calls */
53 static unsigned char slot_errbuf
[RTAS_ERROR_LOG_MAX
];
54 static spinlock_t slot_errbuf_lock
= SPIN_LOCK_UNLOCKED
;
55 static int eeh_error_buf_size
;
57 /* System monitoring statistics */
58 static DEFINE_PER_CPU(unsigned long, total_mmio_ffs
);
59 static DEFINE_PER_CPU(unsigned long, false_positives
);
60 static DEFINE_PER_CPU(unsigned long, ignored_failures
);
63 * The pci address cache subsystem. This subsystem places
64 * PCI device address resources into a red-black tree, sorted
65 * according to the address range, so that given only an i/o
66 * address, the corresponding PCI device can be **quickly**
69 * Currently, the only customer of this code is the EEH subsystem;
70 * thus, this code has been somewhat tailored to suit EEH better.
71 * In particular, the cache does *not* hold the addresses of devices
72 * for which EEH is not enabled.
74 * (Implementation Note: The RB tree seems to be better/faster
75 * than any hash algo I could think of for this problem, even
76 * with the penalty of slow pointer chases for d-cache misses).
78 struct pci_io_addr_range
80 struct rb_node rb_node
;
81 unsigned long addr_lo
;
82 unsigned long addr_hi
;
83 struct pci_dev
*pcidev
;
87 static struct pci_io_addr_cache
89 struct rb_root rb_root
;
91 } pci_io_addr_cache_root
;
93 static inline struct pci_dev
*__pci_get_device_by_addr(unsigned long addr
)
95 struct rb_node
*n
= pci_io_addr_cache_root
.rb_root
.rb_node
;
98 struct pci_io_addr_range
*piar
;
99 piar
= rb_entry(n
, struct pci_io_addr_range
, rb_node
);
101 if (addr
< piar
->addr_lo
) {
104 if (addr
> piar
->addr_hi
) {
107 pci_dev_get(piar
->pcidev
);
117 * pci_get_device_by_addr - Get device, given only address
118 * @addr: mmio (PIO) phys address or i/o port number
120 * Given an mmio phys address, or a port number, find a pci device
121 * that implements this address. Be sure to pci_dev_put the device
122 * when finished. I/O port numbers are assumed to be offset
123 * from zero (that is, they do *not* have pci_io_addr added in).
124 * It is safe to call this function within an interrupt.
126 static struct pci_dev
*pci_get_device_by_addr(unsigned long addr
)
131 spin_lock_irqsave(&pci_io_addr_cache_root
.piar_lock
, flags
);
132 dev
= __pci_get_device_by_addr(addr
);
133 spin_unlock_irqrestore(&pci_io_addr_cache_root
.piar_lock
, flags
);
139 * Handy-dandy debug print routine, does nothing more
140 * than print out the contents of our addr cache.
142 static void pci_addr_cache_print(struct pci_io_addr_cache
*cache
)
147 n
= rb_first(&cache
->rb_root
);
149 struct pci_io_addr_range
*piar
;
150 piar
= rb_entry(n
, struct pci_io_addr_range
, rb_node
);
151 printk(KERN_DEBUG
"PCI: %s addr range %d [%lx-%lx]: %s %s\n",
152 (piar
->flags
& IORESOURCE_IO
) ? "i/o" : "mem", cnt
,
153 piar
->addr_lo
, piar
->addr_hi
, pci_name(piar
->pcidev
),
154 pci_pretty_name(piar
->pcidev
));
161 /* Insert address range into the rb tree. */
162 static struct pci_io_addr_range
*
163 pci_addr_cache_insert(struct pci_dev
*dev
, unsigned long alo
,
164 unsigned long ahi
, unsigned int flags
)
166 struct rb_node
**p
= &pci_io_addr_cache_root
.rb_root
.rb_node
;
167 struct rb_node
*parent
= NULL
;
168 struct pci_io_addr_range
*piar
;
170 /* Walk tree, find a place to insert into tree */
173 piar
= rb_entry(parent
, struct pci_io_addr_range
, rb_node
);
174 if (alo
< piar
->addr_lo
) {
175 p
= &parent
->rb_left
;
176 } else if (ahi
> piar
->addr_hi
) {
177 p
= &parent
->rb_right
;
179 if (dev
!= piar
->pcidev
||
180 alo
!= piar
->addr_lo
|| ahi
!= piar
->addr_hi
) {
181 printk(KERN_WARNING
"PIAR: overlapping address range\n");
186 piar
= (struct pci_io_addr_range
*)kmalloc(sizeof(struct pci_io_addr_range
), GFP_ATOMIC
);
195 rb_link_node(&piar
->rb_node
, parent
, p
);
196 rb_insert_color(&piar
->rb_node
, &pci_io_addr_cache_root
.rb_root
);
201 static void __pci_addr_cache_insert_device(struct pci_dev
*dev
)
203 struct device_node
*dn
;
207 dn
= pci_device_to_OF_node(dev
);
209 printk(KERN_WARNING
"PCI: no pci dn found for dev=%s %s\n",
210 pci_name(dev
), pci_pretty_name(dev
));
214 /* Skip any devices for which EEH is not enabled. */
215 if (!(dn
->eeh_mode
& EEH_MODE_SUPPORTED
) ||
216 dn
->eeh_mode
& EEH_MODE_NOCHECK
) {
218 printk(KERN_INFO
"PCI: skip building address cache for=%s %s\n",
219 pci_name(dev
), pci_pretty_name(dev
));
224 /* The cache holds a reference to the device... */
227 /* Walk resources on this device, poke them into the tree */
228 for (i
= 0; i
< DEVICE_COUNT_RESOURCE
; i
++) {
229 unsigned long start
= pci_resource_start(dev
,i
);
230 unsigned long end
= pci_resource_end(dev
,i
);
231 unsigned int flags
= pci_resource_flags(dev
,i
);
233 /* We are interested only bus addresses, not dma or other stuff */
234 if (0 == (flags
& (IORESOURCE_IO
| IORESOURCE_MEM
)))
236 if (start
== 0 || ~start
== 0 || end
== 0 || ~end
== 0)
238 pci_addr_cache_insert(dev
, start
, end
, flags
);
242 /* If there was nothing to add, the cache has no reference... */
248 * pci_addr_cache_insert_device - Add a device to the address cache
249 * @dev: PCI device whose I/O addresses we are interested in.
251 * In order to support the fast lookup of devices based on addresses,
252 * we maintain a cache of devices that can be quickly searched.
253 * This routine adds a device to that cache.
255 void pci_addr_cache_insert_device(struct pci_dev
*dev
)
259 spin_lock_irqsave(&pci_io_addr_cache_root
.piar_lock
, flags
);
260 __pci_addr_cache_insert_device(dev
);
261 spin_unlock_irqrestore(&pci_io_addr_cache_root
.piar_lock
, flags
);
264 static inline void __pci_addr_cache_remove_device(struct pci_dev
*dev
)
270 n
= rb_first(&pci_io_addr_cache_root
.rb_root
);
272 struct pci_io_addr_range
*piar
;
273 piar
= rb_entry(n
, struct pci_io_addr_range
, rb_node
);
275 if (piar
->pcidev
== dev
) {
276 rb_erase(n
, &pci_io_addr_cache_root
.rb_root
);
284 /* The cache no longer holds its reference to this device... */
290 * pci_addr_cache_remove_device - remove pci device from addr cache
291 * @dev: device to remove
293 * Remove a device from the addr-cache tree.
294 * This is potentially expensive, since it will walk
295 * the tree multiple times (once per resource).
296 * But so what; device removal doesn't need to be that fast.
298 void pci_addr_cache_remove_device(struct pci_dev
*dev
)
302 spin_lock_irqsave(&pci_io_addr_cache_root
.piar_lock
, flags
);
303 __pci_addr_cache_remove_device(dev
);
304 spin_unlock_irqrestore(&pci_io_addr_cache_root
.piar_lock
, flags
);
308 * pci_addr_cache_build - Build a cache of I/O addresses
310 * Build a cache of pci i/o addresses. This cache will be used to
311 * find the pci device that corresponds to a given address.
312 * This routine scans all pci busses to build the cache.
313 * Must be run late in boot process, after the pci controllers
314 * have been scaned for devices (after all device resources are known).
316 void __init
pci_addr_cache_build(void)
318 struct pci_dev
*dev
= NULL
;
320 spin_lock_init(&pci_io_addr_cache_root
.piar_lock
);
322 while ((dev
= pci_get_device(PCI_ANY_ID
, PCI_ANY_ID
, dev
)) != NULL
) {
323 /* Ignore PCI bridges ( XXX why ??) */
324 if ((dev
->class >> 16) == PCI_BASE_CLASS_BRIDGE
) {
327 pci_addr_cache_insert_device(dev
);
331 /* Verify tree built up above, echo back the list of addrs. */
332 pci_addr_cache_print(&pci_io_addr_cache_root
);
337 * eeh_token_to_phys - convert EEH address token to phys address
338 * @token i/o token, should be address in the form 0xE....
340 static inline unsigned long eeh_token_to_phys(unsigned long token
)
345 ptep
= find_linux_pte(ioremap_mm
.pgd
, token
);
348 pa
= pte_pfn(*ptep
) << PAGE_SHIFT
;
350 return pa
| (token
& (PAGE_SIZE
-1));
354 * eeh_dn_check_failure - check if all 1's data is due to EEH slot freeze
356 * @dev pci device, if known
358 * Check for an EEH failure for the given device node. Call this
359 * routine if the result of a read was all 0xff's and you want to
360 * find out if this is due to an EEH slot freeze event. This routine
361 * will query firmware for the EEH status.
363 * Returns 0 if there has not been an EEH error; otherwise returns
366 * It is safe to call this routine in an interrupt context.
368 int eeh_dn_check_failure(struct device_node
*dn
, struct pci_dev
*dev
)
374 __get_cpu_var(total_mmio_ffs
)++;
376 if (!eeh_subsystem_enabled
)
382 /* Access to IO BARs might get this far and still not want checking. */
383 if (!(dn
->eeh_mode
& EEH_MODE_SUPPORTED
) ||
384 dn
->eeh_mode
& EEH_MODE_NOCHECK
) {
388 if (!dn
->eeh_config_addr
) {
393 * Now test for an EEH failure. This is VERY expensive.
394 * Note that the eeh_config_addr may be a parent device
395 * in the case of a device behind a bridge, or it may be
396 * function zero of a multi-function device.
397 * In any case they must share a common PHB.
399 ret
= rtas_call(ibm_read_slot_reset_state
, 3, 3, rets
,
400 dn
->eeh_config_addr
, BUID_HI(dn
->phb
->buid
),
401 BUID_LO(dn
->phb
->buid
));
403 if (ret
== 0 && rets
[1] == 1 && (rets
[0] == 2 || rets
[0] == 4)) {
406 spin_lock_irqsave(&slot_errbuf_lock
, flags
);
407 memset(slot_errbuf
, 0, eeh_error_buf_size
);
409 log_event
= rtas_call(ibm_slot_error_detail
,
410 8, 1, NULL
, dn
->eeh_config_addr
,
411 BUID_HI(dn
->phb
->buid
),
412 BUID_LO(dn
->phb
->buid
), NULL
, 0,
413 virt_to_phys(slot_errbuf
),
415 1 /* Temporary Error */);
418 log_error(slot_errbuf
, ERR_TYPE_RTAS_LOG
,
421 spin_unlock_irqrestore(&slot_errbuf_lock
, flags
);
423 printk(KERN_INFO
"EEH: MMIO failure (%d) on device: %s %s\n",
424 rets
[0], dn
->name
, dn
->full_name
);
428 * XXX We should create a separate sysctl for this.
430 * Since the panic_on_oops sysctl is used to halt
431 * the system in light of potential corruption, we
435 panic("EEH: MMIO failure (%d) on device: %s %s\n",
436 rets
[0], dn
->name
, dn
->full_name
);
438 __get_cpu_var(ignored_failures
)++;
441 __get_cpu_var(false_positives
)++;
447 EXPORT_SYMBOL(eeh_dn_check_failure
);
450 * eeh_check_failure - check if all 1's data is due to EEH slot freeze
451 * @token i/o token, should be address in the form 0xA....
452 * @val value, should be all 1's (XXX why do we need this arg??)
454 * Check for an eeh failure at the given token address.
455 * Check for an EEH failure at the given token address. Call this
456 * routine if the result of a read was all 0xff's and you want to
457 * find out if this is due to an EEH slot freeze event. This routine
458 * will query firmware for the EEH status.
460 * Note this routine is safe to call in an interrupt context.
462 unsigned long eeh_check_failure(const volatile void __iomem
*token
, unsigned long val
)
466 struct device_node
*dn
;
468 /* Finding the phys addr + pci device; this is pretty quick. */
469 addr
= eeh_token_to_phys((unsigned long __force
) token
);
470 dev
= pci_get_device_by_addr(addr
);
474 dn
= pci_device_to_OF_node(dev
);
475 eeh_dn_check_failure (dn
, dev
);
481 EXPORT_SYMBOL(eeh_check_failure
);
483 struct eeh_early_enable_info
{
484 unsigned int buid_hi
;
485 unsigned int buid_lo
;
488 /* Enable eeh for the given device node. */
489 static void *early_enable_eeh(struct device_node
*dn
, void *data
)
491 struct eeh_early_enable_info
*info
= data
;
493 char *status
= get_property(dn
, "status", NULL
);
494 u32
*class_code
= (u32
*)get_property(dn
, "class-code", NULL
);
495 u32
*vendor_id
= (u32
*)get_property(dn
, "vendor-id", NULL
);
496 u32
*device_id
= (u32
*)get_property(dn
, "device-id", NULL
);
502 if (status
&& strcmp(status
, "ok") != 0)
503 return NULL
; /* ignore devices with bad status */
505 /* Ignore bad nodes. */
506 if (!class_code
|| !vendor_id
|| !device_id
)
509 /* There is nothing to check on PCI to ISA bridges */
510 if (dn
->type
&& !strcmp(dn
->type
, "isa")) {
511 dn
->eeh_mode
|= EEH_MODE_NOCHECK
;
516 * Now decide if we are going to "Disable" EEH checking
517 * for this device. We still run with the EEH hardware active,
518 * but we won't be checking for ff's. This means a driver
519 * could return bad data (very bad!), an interrupt handler could
520 * hang waiting on status bits that won't change, etc.
521 * But there are a few cases like display devices that make sense.
523 enable
= 1; /* i.e. we will do checking */
524 if ((*class_code
>> 16) == PCI_BASE_CLASS_DISPLAY
)
528 dn
->eeh_mode
|= EEH_MODE_NOCHECK
;
530 /* Ok... see if this device supports EEH. Some do, some don't,
531 * and the only way to find out is to check each and every one. */
532 regs
= (u32
*)get_property(dn
, "reg", NULL
);
534 /* First register entry is addr (00BBSS00) */
535 /* Try to enable eeh */
536 ret
= rtas_call(ibm_set_eeh_option
, 4, 1, NULL
,
537 regs
[0], info
->buid_hi
, info
->buid_lo
,
540 eeh_subsystem_enabled
= 1;
541 dn
->eeh_mode
|= EEH_MODE_SUPPORTED
;
542 dn
->eeh_config_addr
= regs
[0];
544 printk(KERN_DEBUG
"EEH: %s: eeh enabled\n", dn
->full_name
);
548 /* This device doesn't support EEH, but it may have an
549 * EEH parent, in which case we mark it as supported. */
550 if (dn
->parent
&& (dn
->parent
->eeh_mode
& EEH_MODE_SUPPORTED
)) {
551 /* Parent supports EEH. */
552 dn
->eeh_mode
|= EEH_MODE_SUPPORTED
;
553 dn
->eeh_config_addr
= dn
->parent
->eeh_config_addr
;
558 printk(KERN_WARNING
"EEH: %s: unable to get reg property.\n",
566 * Initialize EEH by trying to enable it for all of the adapters in the system.
567 * As a side effect we can determine here if eeh is supported at all.
568 * Note that we leave EEH on so failed config cycles won't cause a machine
569 * check. If a user turns off EEH for a particular adapter they are really
570 * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't
571 * grant access to a slot if EEH isn't enabled, and so we always enable
572 * EEH for all slots/all devices.
574 * The eeh-force-off option disables EEH checking globally, for all slots.
575 * Even if force-off is set, the EEH hardware is still enabled, so that
576 * newer systems can boot.
578 void __init
eeh_init(void)
580 struct device_node
*phb
, *np
;
581 struct eeh_early_enable_info info
;
583 init_pci_config_tokens();
585 np
= of_find_node_by_path("/rtas");
589 ibm_set_eeh_option
= rtas_token("ibm,set-eeh-option");
590 ibm_set_slot_reset
= rtas_token("ibm,set-slot-reset");
591 ibm_read_slot_reset_state
= rtas_token("ibm,read-slot-reset-state");
592 ibm_slot_error_detail
= rtas_token("ibm,slot-error-detail");
594 if (ibm_set_eeh_option
== RTAS_UNKNOWN_SERVICE
)
597 eeh_error_buf_size
= rtas_token("rtas-error-log-max");
598 if (eeh_error_buf_size
== RTAS_UNKNOWN_SERVICE
) {
599 eeh_error_buf_size
= 1024;
601 if (eeh_error_buf_size
> RTAS_ERROR_LOG_MAX
) {
602 printk(KERN_WARNING
"EEH: rtas-error-log-max is bigger than allocated "
603 "buffer ! (%d vs %d)", eeh_error_buf_size
, RTAS_ERROR_LOG_MAX
);
604 eeh_error_buf_size
= RTAS_ERROR_LOG_MAX
;
607 /* Enable EEH for all adapters. Note that eeh requires buid's */
608 for (phb
= of_find_node_by_name(NULL
, "pci"); phb
;
609 phb
= of_find_node_by_name(phb
, "pci")) {
612 buid
= get_phb_buid(phb
);
616 info
.buid_lo
= BUID_LO(buid
);
617 info
.buid_hi
= BUID_HI(buid
);
618 traverse_pci_devices(phb
, early_enable_eeh
, &info
);
621 if (eeh_subsystem_enabled
)
622 printk(KERN_INFO
"EEH: PCI Enhanced I/O Error Handling Enabled\n");
624 printk(KERN_WARNING
"EEH: No capable adapters found\n");
628 * eeh_add_device_early - enable EEH for the indicated device_node
629 * @dn: device node for which to set up EEH
631 * This routine must be used to perform EEH initialization for PCI
632 * devices that were added after system boot (e.g. hotplug, dlpar).
633 * This routine must be called before any i/o is performed to the
634 * adapter (inluding any config-space i/o).
635 * Whether this actually enables EEH or not for this device depends
636 * on the CEC architecture, type of the device, on earlier boot
637 * command-line arguments & etc.
639 void eeh_add_device_early(struct device_node
*dn
)
641 struct pci_controller
*phb
;
642 struct eeh_early_enable_info info
;
644 if (!dn
|| !eeh_subsystem_enabled
)
647 if (NULL
== phb
|| 0 == phb
->buid
) {
648 printk(KERN_WARNING
"EEH: Expected buid but found none\n");
652 info
.buid_hi
= BUID_HI(phb
->buid
);
653 info
.buid_lo
= BUID_LO(phb
->buid
);
654 early_enable_eeh(dn
, &info
);
656 EXPORT_SYMBOL(eeh_add_device_early
);
659 * eeh_add_device_late - perform EEH initialization for the indicated pci device
660 * @dev: pci device for which to set up EEH
662 * This routine must be used to complete EEH initialization for PCI
663 * devices that were added after system boot (e.g. hotplug, dlpar).
665 void eeh_add_device_late(struct pci_dev
*dev
)
667 if (!dev
|| !eeh_subsystem_enabled
)
671 printk(KERN_DEBUG
"EEH: adding device %s %s\n", pci_name(dev
),
672 pci_pretty_name(dev
));
675 pci_addr_cache_insert_device (dev
);
677 EXPORT_SYMBOL(eeh_add_device_late
);
680 * eeh_remove_device - undo EEH setup for the indicated pci device
681 * @dev: pci device to be removed
683 * This routine should be when a device is removed from a running
684 * system (e.g. by hotplug or dlpar).
686 void eeh_remove_device(struct pci_dev
*dev
)
688 if (!dev
|| !eeh_subsystem_enabled
)
691 /* Unregister the device with the EEH/PCI address search system */
693 printk(KERN_DEBUG
"EEH: remove device %s %s\n", pci_name(dev
),
694 pci_pretty_name(dev
));
696 pci_addr_cache_remove_device(dev
);
698 EXPORT_SYMBOL(eeh_remove_device
);
700 static int proc_eeh_show(struct seq_file
*m
, void *v
)
703 unsigned long ffs
= 0, positives
= 0, failures
= 0;
706 ffs
+= per_cpu(total_mmio_ffs
, cpu
);
707 positives
+= per_cpu(false_positives
, cpu
);
708 failures
+= per_cpu(ignored_failures
, cpu
);
711 if (0 == eeh_subsystem_enabled
) {
712 seq_printf(m
, "EEH Subsystem is globally disabled\n");
713 seq_printf(m
, "eeh_total_mmio_ffs=%ld\n", ffs
);
715 seq_printf(m
, "EEH Subsystem is enabled\n");
716 seq_printf(m
, "eeh_total_mmio_ffs=%ld\n"
717 "eeh_false_positives=%ld\n"
718 "eeh_ignored_failures=%ld\n",
719 ffs
, positives
, failures
);
725 static int proc_eeh_open(struct inode
*inode
, struct file
*file
)
727 return single_open(file
, proc_eeh_show
, NULL
);
730 static struct file_operations proc_eeh_operations
= {
731 .open
= proc_eeh_open
,
734 .release
= single_release
,
737 static int __init
eeh_init_proc(void)
739 struct proc_dir_entry
*e
;
741 if (systemcfg
->platform
& PLATFORM_PSERIES
) {
742 e
= create_proc_entry("ppc64/eeh", 0, NULL
);
744 e
->proc_fops
= &proc_eeh_operations
;
749 __initcall(eeh_init_proc
);