1 #ifdef CONFIG_CPU_SUP_AMD
3 static DEFINE_RAW_SPINLOCK(amd_nb_lock
);
5 static __initconst
const u64 amd_hw_cache_event_ids
6 [PERF_COUNT_HW_CACHE_MAX
]
7 [PERF_COUNT_HW_CACHE_OP_MAX
]
8 [PERF_COUNT_HW_CACHE_RESULT_MAX
] =
12 [ C(RESULT_ACCESS
) ] = 0x0040, /* Data Cache Accesses */
13 [ C(RESULT_MISS
) ] = 0x0041, /* Data Cache Misses */
16 [ C(RESULT_ACCESS
) ] = 0x0142, /* Data Cache Refills :system */
17 [ C(RESULT_MISS
) ] = 0,
19 [ C(OP_PREFETCH
) ] = {
20 [ C(RESULT_ACCESS
) ] = 0x0267, /* Data Prefetcher :attempts */
21 [ C(RESULT_MISS
) ] = 0x0167, /* Data Prefetcher :cancelled */
26 [ C(RESULT_ACCESS
) ] = 0x0080, /* Instruction cache fetches */
27 [ C(RESULT_MISS
) ] = 0x0081, /* Instruction cache misses */
30 [ C(RESULT_ACCESS
) ] = -1,
31 [ C(RESULT_MISS
) ] = -1,
33 [ C(OP_PREFETCH
) ] = {
34 [ C(RESULT_ACCESS
) ] = 0x014B, /* Prefetch Instructions :Load */
35 [ C(RESULT_MISS
) ] = 0,
40 [ C(RESULT_ACCESS
) ] = 0x037D, /* Requests to L2 Cache :IC+DC */
41 [ C(RESULT_MISS
) ] = 0x037E, /* L2 Cache Misses : IC+DC */
44 [ C(RESULT_ACCESS
) ] = 0x017F, /* L2 Fill/Writeback */
45 [ C(RESULT_MISS
) ] = 0,
47 [ C(OP_PREFETCH
) ] = {
48 [ C(RESULT_ACCESS
) ] = 0,
49 [ C(RESULT_MISS
) ] = 0,
54 [ C(RESULT_ACCESS
) ] = 0x0040, /* Data Cache Accesses */
55 [ C(RESULT_MISS
) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */
58 [ C(RESULT_ACCESS
) ] = 0,
59 [ C(RESULT_MISS
) ] = 0,
61 [ C(OP_PREFETCH
) ] = {
62 [ C(RESULT_ACCESS
) ] = 0,
63 [ C(RESULT_MISS
) ] = 0,
68 [ C(RESULT_ACCESS
) ] = 0x0080, /* Instruction fecthes */
69 [ C(RESULT_MISS
) ] = 0x0085, /* Instr. fetch ITLB misses */
72 [ C(RESULT_ACCESS
) ] = -1,
73 [ C(RESULT_MISS
) ] = -1,
75 [ C(OP_PREFETCH
) ] = {
76 [ C(RESULT_ACCESS
) ] = -1,
77 [ C(RESULT_MISS
) ] = -1,
82 [ C(RESULT_ACCESS
) ] = 0x00c2, /* Retired Branch Instr. */
83 [ C(RESULT_MISS
) ] = 0x00c3, /* Retired Mispredicted BI */
86 [ C(RESULT_ACCESS
) ] = -1,
87 [ C(RESULT_MISS
) ] = -1,
89 [ C(OP_PREFETCH
) ] = {
90 [ C(RESULT_ACCESS
) ] = -1,
91 [ C(RESULT_MISS
) ] = -1,
97 * AMD Performance Monitor K7 and later.
99 static const u64 amd_perfmon_event_map
[] =
101 [PERF_COUNT_HW_CPU_CYCLES
] = 0x0076,
102 [PERF_COUNT_HW_INSTRUCTIONS
] = 0x00c0,
103 [PERF_COUNT_HW_CACHE_REFERENCES
] = 0x0080,
104 [PERF_COUNT_HW_CACHE_MISSES
] = 0x0081,
105 [PERF_COUNT_HW_BRANCH_INSTRUCTIONS
] = 0x00c2,
106 [PERF_COUNT_HW_BRANCH_MISSES
] = 0x00c3,
109 static u64
amd_pmu_event_map(int hw_event
)
111 return amd_perfmon_event_map
[hw_event
];
114 static int amd_pmu_hw_config(struct perf_event
*event
)
116 int ret
= x86_pmu_hw_config(event
);
121 if (event
->attr
.type
!= PERF_TYPE_RAW
)
124 event
->hw
.config
|= event
->attr
.config
& AMD64_RAW_EVENT_MASK
;
130 * AMD64 events are detected based on their event codes.
132 static inline int amd_is_nb_event(struct hw_perf_event
*hwc
)
134 return (hwc
->config
& 0xe0) == 0xe0;
137 static inline int amd_has_nb(struct cpu_hw_events
*cpuc
)
139 struct amd_nb
*nb
= cpuc
->amd_nb
;
141 return nb
&& nb
->nb_id
!= -1;
144 static void amd_put_event_constraints(struct cpu_hw_events
*cpuc
,
145 struct perf_event
*event
)
147 struct hw_perf_event
*hwc
= &event
->hw
;
148 struct amd_nb
*nb
= cpuc
->amd_nb
;
152 * only care about NB events
154 if (!(amd_has_nb(cpuc
) && amd_is_nb_event(hwc
)))
158 * need to scan whole list because event may not have
159 * been assigned during scheduling
161 * no race condition possible because event can only
162 * be removed on one CPU at a time AND PMU is disabled
165 for (i
= 0; i
< x86_pmu
.num_counters
; i
++) {
166 if (nb
->owners
[i
] == event
) {
167 cmpxchg(nb
->owners
+i
, event
, NULL
);
174 * AMD64 NorthBridge events need special treatment because
175 * counter access needs to be synchronized across all cores
176 * of a package. Refer to BKDG section 3.12
178 * NB events are events measuring L3 cache, Hypertransport
179 * traffic. They are identified by an event code >= 0xe00.
180 * They measure events on the NorthBride which is shared
181 * by all cores on a package. NB events are counted on a
182 * shared set of counters. When a NB event is programmed
183 * in a counter, the data actually comes from a shared
184 * counter. Thus, access to those counters needs to be
187 * We implement the synchronization such that no two cores
188 * can be measuring NB events using the same counters. Thus,
189 * we maintain a per-NB allocation table. The available slot
190 * is propagated using the event_constraint structure.
192 * We provide only one choice for each NB event based on
193 * the fact that only NB events have restrictions. Consequently,
194 * if a counter is available, there is a guarantee the NB event
195 * will be assigned to it. If no slot is available, an empty
196 * constraint is returned and scheduling will eventually fail
199 * Note that all cores attached the same NB compete for the same
200 * counters to host NB events, this is why we use atomic ops. Some
201 * multi-chip CPUs may have more than one NB.
203 * Given that resources are allocated (cmpxchg), they must be
204 * eventually freed for others to use. This is accomplished by
205 * calling amd_put_event_constraints().
207 * Non NB events are not impacted by this restriction.
209 static struct event_constraint
*
210 amd_get_event_constraints(struct cpu_hw_events
*cpuc
, struct perf_event
*event
)
212 struct hw_perf_event
*hwc
= &event
->hw
;
213 struct amd_nb
*nb
= cpuc
->amd_nb
;
214 struct perf_event
*old
= NULL
;
215 int max
= x86_pmu
.num_counters
;
219 * if not NB event or no NB, then no constraints
221 if (!(amd_has_nb(cpuc
) && amd_is_nb_event(hwc
)))
222 return &unconstrained
;
225 * detect if already present, if so reuse
227 * cannot merge with actual allocation
228 * because of possible holes
230 * event can already be present yet not assigned (in hwc->idx)
231 * because of successive calls to x86_schedule_events() from
232 * hw_perf_group_sched_in() without hw_perf_enable()
234 for (i
= 0; i
< max
; i
++) {
236 * keep track of first free slot
238 if (k
== -1 && !nb
->owners
[i
])
241 /* already present, reuse */
242 if (nb
->owners
[i
] == event
)
246 * not present, so grab a new slot
247 * starting either at:
249 if (hwc
->idx
!= -1) {
250 /* previous assignment */
252 } else if (k
!= -1) {
253 /* start from free slot found */
257 * event not found, no slot found in
258 * first pass, try again from the
265 old
= cmpxchg(nb
->owners
+i
, NULL
, event
);
273 return &nb
->event_constraints
[i
];
275 return &emptyconstraint
;
278 static struct amd_nb
*amd_alloc_nb(int cpu
, int nb_id
)
283 nb
= kmalloc(sizeof(struct amd_nb
), GFP_KERNEL
);
287 memset(nb
, 0, sizeof(*nb
));
291 * initialize all possible NB constraints
293 for (i
= 0; i
< x86_pmu
.num_counters
; i
++) {
294 __set_bit(i
, nb
->event_constraints
[i
].idxmsk
);
295 nb
->event_constraints
[i
].weight
= 1;
300 static int amd_pmu_cpu_prepare(int cpu
)
302 struct cpu_hw_events
*cpuc
= &per_cpu(cpu_hw_events
, cpu
);
304 WARN_ON_ONCE(cpuc
->amd_nb
);
306 if (boot_cpu_data
.x86_max_cores
< 2)
309 cpuc
->amd_nb
= amd_alloc_nb(cpu
, -1);
316 static void amd_pmu_cpu_starting(int cpu
)
318 struct cpu_hw_events
*cpuc
= &per_cpu(cpu_hw_events
, cpu
);
322 if (boot_cpu_data
.x86_max_cores
< 2)
325 nb_id
= amd_get_nb_id(cpu
);
326 WARN_ON_ONCE(nb_id
== BAD_APICID
);
328 raw_spin_lock(&amd_nb_lock
);
330 for_each_online_cpu(i
) {
331 nb
= per_cpu(cpu_hw_events
, i
).amd_nb
;
332 if (WARN_ON_ONCE(!nb
))
335 if (nb
->nb_id
== nb_id
) {
342 cpuc
->amd_nb
->nb_id
= nb_id
;
343 cpuc
->amd_nb
->refcnt
++;
345 raw_spin_unlock(&amd_nb_lock
);
348 static void amd_pmu_cpu_dead(int cpu
)
350 struct cpu_hw_events
*cpuhw
;
352 if (boot_cpu_data
.x86_max_cores
< 2)
355 cpuhw
= &per_cpu(cpu_hw_events
, cpu
);
357 raw_spin_lock(&amd_nb_lock
);
360 struct amd_nb
*nb
= cpuhw
->amd_nb
;
362 if (nb
->nb_id
== -1 || --nb
->refcnt
== 0)
365 cpuhw
->amd_nb
= NULL
;
368 raw_spin_unlock(&amd_nb_lock
);
371 static __initconst
const struct x86_pmu amd_pmu
= {
373 .handle_irq
= x86_pmu_handle_irq
,
374 .disable_all
= x86_pmu_disable_all
,
375 .enable_all
= x86_pmu_enable_all
,
376 .enable
= x86_pmu_enable_event
,
377 .disable
= x86_pmu_disable_event
,
378 .hw_config
= amd_pmu_hw_config
,
379 .schedule_events
= x86_schedule_events
,
380 .eventsel
= MSR_K7_EVNTSEL0
,
381 .perfctr
= MSR_K7_PERFCTR0
,
382 .event_map
= amd_pmu_event_map
,
383 .max_events
= ARRAY_SIZE(amd_perfmon_event_map
),
386 .cntval_mask
= (1ULL << 48) - 1,
388 /* use highest bit to detect overflow */
389 .max_period
= (1ULL << 47) - 1,
390 .get_event_constraints
= amd_get_event_constraints
,
391 .put_event_constraints
= amd_put_event_constraints
,
393 .cpu_prepare
= amd_pmu_cpu_prepare
,
394 .cpu_starting
= amd_pmu_cpu_starting
,
395 .cpu_dead
= amd_pmu_cpu_dead
,
398 static __init
int amd_pmu_init(void)
400 /* Performance-monitoring supported from K7 and later: */
401 if (boot_cpu_data
.x86
< 6)
406 /* Events are common for all AMDs */
407 memcpy(hw_cache_event_ids
, amd_hw_cache_event_ids
,
408 sizeof(hw_cache_event_ids
));
413 #else /* CONFIG_CPU_SUP_AMD */
415 static int amd_pmu_init(void)