1 // SPDX-License-Identifier: GPL-2.0-only
2
3 /*
4 * Copyright(c) 2023 Huawei
5 *
6 * The CXL 3.0 specification includes a standard Performance Monitoring Unit,
7 * called the CXL PMU, or CPMU. In order to allow a high degree of
8 * implementation flexibility the specification provides a wide range of
9 * options all of which are self describing.
10 *
11 * Details in CXL rev 3.0 section 8.2.7 CPMU Register Interface
12 */
13
14 #include <linux/io-64-nonatomic-lo-hi.h>
15 #include <linux/perf_event.h>
16 #include <linux/bitops.h>
17 #include <linux/device.h>
18 #include <linux/bits.h>
19 #include <linux/list.h>
20 #include <linux/bug.h>
21 #include <linux/pci.h>
22
23 #include "../cxl/cxlpci.h"
24 #include "../cxl/cxl.h"
25 #include "../cxl/pmu.h"
26
27 #define CXL_PMU_CAP_REG 0x0
28 #define CXL_PMU_CAP_NUM_COUNTERS_MSK GENMASK_ULL(5, 0)
29 #define CXL_PMU_CAP_COUNTER_WIDTH_MSK GENMASK_ULL(15, 8)
30 #define CXL_PMU_CAP_NUM_EVN_CAP_REG_SUP_MSK GENMASK_ULL(24, 20)
31 #define CXL_PMU_CAP_FILTERS_SUP_MSK GENMASK_ULL(39, 32)
32 #define CXL_PMU_FILTER_HDM BIT(0)
33 #define CXL_PMU_FILTER_CHAN_RANK_BANK BIT(1)
34 #define CXL_PMU_CAP_MSI_N_MSK GENMASK_ULL(47, 44)
35 #define CXL_PMU_CAP_WRITEABLE_WHEN_FROZEN BIT_ULL(48)
36 #define CXL_PMU_CAP_FREEZE BIT_ULL(49)
37 #define CXL_PMU_CAP_INT BIT_ULL(50)
38 #define CXL_PMU_CAP_VERSION_MSK GENMASK_ULL(63, 60)
39
40 #define CXL_PMU_OVERFLOW_REG 0x10
41 #define CXL_PMU_FREEZE_REG 0x18
42 #define CXL_PMU_EVENT_CAP_REG(n) (0x100 + 8 * (n))
43 #define CXL_PMU_EVENT_CAP_SUPPORTED_EVENTS_MSK GENMASK_ULL(31, 0)
44 #define CXL_PMU_EVENT_CAP_GROUP_ID_MSK GENMASK_ULL(47, 32)
45 #define CXL_PMU_EVENT_CAP_VENDOR_ID_MSK GENMASK_ULL(63, 48)
46
47 #define CXL_PMU_COUNTER_CFG_REG(n) (0x200 + 8 * (n))
48 #define CXL_PMU_COUNTER_CFG_TYPE_MSK GENMASK_ULL(1, 0)
49 #define CXL_PMU_COUNTER_CFG_TYPE_FREE_RUN 0
50 #define CXL_PMU_COUNTER_CFG_TYPE_FIXED_FUN 1
51 #define CXL_PMU_COUNTER_CFG_TYPE_CONFIGURABLE 2
52 #define CXL_PMU_COUNTER_CFG_ENABLE BIT_ULL(8)
53 #define CXL_PMU_COUNTER_CFG_INT_ON_OVRFLW BIT_ULL(9)
54 #define CXL_PMU_COUNTER_CFG_FREEZE_ON_OVRFLW BIT_ULL(10)
55 #define CXL_PMU_COUNTER_CFG_EDGE BIT_ULL(11)
56 #define CXL_PMU_COUNTER_CFG_INVERT BIT_ULL(12)
57 #define CXL_PMU_COUNTER_CFG_THRESHOLD_MSK GENMASK_ULL(23, 16)
58 #define CXL_PMU_COUNTER_CFG_EVENTS_MSK GENMASK_ULL(55, 24)
59 #define CXL_PMU_COUNTER_CFG_EVENT_GRP_ID_IDX_MSK GENMASK_ULL(63, 59)
60
61 #define CXL_PMU_FILTER_CFG_REG(n, f) (0x400 + 4 * ((f) + (n) * 8))
62 #define CXL_PMU_FILTER_CFG_VALUE_MSK GENMASK(31, 0)
63
64 #define CXL_PMU_COUNTER_REG(n) (0xc00 + 8 * (n))
65
66 /* CXL rev 3.0 Table 13-5 Events under CXL Vendor ID */
67 #define CXL_PMU_GID_CLOCK_TICKS 0x00
68 #define CXL_PMU_GID_D2H_REQ 0x0010
69 #define CXL_PMU_GID_D2H_RSP 0x0011
70 #define CXL_PMU_GID_H2D_REQ 0x0012
71 #define CXL_PMU_GID_H2D_RSP 0x0013
72 #define CXL_PMU_GID_CACHE_DATA 0x0014
73 #define CXL_PMU_GID_M2S_REQ 0x0020
74 #define CXL_PMU_GID_M2S_RWD 0x0021
75 #define CXL_PMU_GID_M2S_BIRSP 0x0022
76 #define CXL_PMU_GID_S2M_BISNP 0x0023
77 #define CXL_PMU_GID_S2M_NDR 0x0024
78 #define CXL_PMU_GID_S2M_DRS 0x0025
79 #define CXL_PMU_GID_DDR 0x8000
80
81 static int cxl_pmu_cpuhp_state_num;
82
83 struct cxl_pmu_ev_cap {
84 u16 vid;
85 u16 gid;
86 u32 msk;
87 union {
88 int counter_idx; /* fixed counters */
89 int event_idx; /* configurable counters */
90 };
91 struct list_head node;
92 };
93
94 #define CXL_PMU_MAX_COUNTERS 64
95 struct cxl_pmu_info {
96 struct pmu pmu;
97 void __iomem *base;
98 struct perf_event **hw_events;
99 struct list_head event_caps_configurable;
100 struct list_head event_caps_fixed;
101 DECLARE_BITMAP(used_counter_bm, CXL_PMU_MAX_COUNTERS);
102 DECLARE_BITMAP(conf_counter_bm, CXL_PMU_MAX_COUNTERS);
103 u16 counter_width;
104 u8 num_counters;
105 u8 num_event_capabilities;
106 int on_cpu;
107 struct hlist_node node;
108 bool filter_hdm;
109 int irq;
110 };
111
112 #define pmu_to_cxl_pmu_info(_pmu) container_of(_pmu, struct cxl_pmu_info, pmu)
113
114 /*
115 * All CPMU counters are discoverable via the Event Capabilities Registers.
116 * Each Event Capability register contains a a VID / GroupID.
117 * A counter may then count any combination (by summing) of events in
118 * that group which are in the Supported Events Bitmask.
119 * However, there are some complexities to the scheme.
120 * - Fixed function counters refer to an Event Capabilities register.
121 * That event capability register is not then used for Configurable
122 * counters.
123 */
cxl_pmu_parse_caps(struct device * dev,struct cxl_pmu_info * info)124 static int cxl_pmu_parse_caps(struct device *dev, struct cxl_pmu_info *info)
125 {
126 unsigned long fixed_counter_event_cap_bm = 0;
127 void __iomem *base = info->base;
128 bool freeze_for_enable;
129 u64 val, eval;
130 int i;
131
132 val = readq(base + CXL_PMU_CAP_REG);
133 freeze_for_enable = FIELD_GET(CXL_PMU_CAP_WRITEABLE_WHEN_FROZEN, val) &&
134 FIELD_GET(CXL_PMU_CAP_FREEZE, val);
135 if (!freeze_for_enable) {
136 dev_err(dev, "Counters not writable while frozen\n");
137 return -ENODEV;
138 }
139
140 info->num_counters = FIELD_GET(CXL_PMU_CAP_NUM_COUNTERS_MSK, val) + 1;
141 info->counter_width = FIELD_GET(CXL_PMU_CAP_COUNTER_WIDTH_MSK, val);
142 info->num_event_capabilities = FIELD_GET(CXL_PMU_CAP_NUM_EVN_CAP_REG_SUP_MSK, val) + 1;
143
144 info->filter_hdm = FIELD_GET(CXL_PMU_CAP_FILTERS_SUP_MSK, val) & CXL_PMU_FILTER_HDM;
145 if (FIELD_GET(CXL_PMU_CAP_INT, val))
146 info->irq = FIELD_GET(CXL_PMU_CAP_MSI_N_MSK, val);
147 else
148 info->irq = -1;
149
150 /* First handle fixed function counters; note if configurable counters found */
151 for (i = 0; i < info->num_counters; i++) {
152 struct cxl_pmu_ev_cap *pmu_ev;
153 u32 events_msk;
154 u8 group_idx;
155
156 val = readq(base + CXL_PMU_COUNTER_CFG_REG(i));
157
158 if (FIELD_GET(CXL_PMU_COUNTER_CFG_TYPE_MSK, val) ==
159 CXL_PMU_COUNTER_CFG_TYPE_CONFIGURABLE) {
160 set_bit(i, info->conf_counter_bm);
161 }
162
163 if (FIELD_GET(CXL_PMU_COUNTER_CFG_TYPE_MSK, val) !=
164 CXL_PMU_COUNTER_CFG_TYPE_FIXED_FUN)
165 continue;
166
167 /* In this case we know which fields are const */
168 group_idx = FIELD_GET(CXL_PMU_COUNTER_CFG_EVENT_GRP_ID_IDX_MSK, val);
169 events_msk = FIELD_GET(CXL_PMU_COUNTER_CFG_EVENTS_MSK, val);
170 eval = readq(base + CXL_PMU_EVENT_CAP_REG(group_idx));
171 pmu_ev = devm_kzalloc(dev, sizeof(*pmu_ev), GFP_KERNEL);
172 if (!pmu_ev)
173 return -ENOMEM;
174
175 pmu_ev->vid = FIELD_GET(CXL_PMU_EVENT_CAP_VENDOR_ID_MSK, eval);
176 pmu_ev->gid = FIELD_GET(CXL_PMU_EVENT_CAP_GROUP_ID_MSK, eval);
177 /* For a fixed purpose counter use the events mask from the counter CFG */
178 pmu_ev->msk = events_msk;
179 pmu_ev->counter_idx = i;
180 /* This list add is never unwound as all entries deleted on remove */
181 list_add(&pmu_ev->node, &info->event_caps_fixed);
182 /*
183 * Configurable counters must not use an Event Capability registers that
184 * is in use for a Fixed counter
185 */
186 set_bit(group_idx, &fixed_counter_event_cap_bm);
187 }
188
189 if (!bitmap_empty(info->conf_counter_bm, CXL_PMU_MAX_COUNTERS)) {
190 struct cxl_pmu_ev_cap *pmu_ev;
191 int j;
192 /* Walk event capabilities unused by fixed counters */
193 for_each_clear_bit(j, &fixed_counter_event_cap_bm,
194 info->num_event_capabilities) {
195 pmu_ev = devm_kzalloc(dev, sizeof(*pmu_ev), GFP_KERNEL);
196 if (!pmu_ev)
197 return -ENOMEM;
198
199 eval = readq(base + CXL_PMU_EVENT_CAP_REG(j));
200 pmu_ev->vid = FIELD_GET(CXL_PMU_EVENT_CAP_VENDOR_ID_MSK, eval);
201 pmu_ev->gid = FIELD_GET(CXL_PMU_EVENT_CAP_GROUP_ID_MSK, eval);
202 pmu_ev->msk = FIELD_GET(CXL_PMU_EVENT_CAP_SUPPORTED_EVENTS_MSK, eval);
203 pmu_ev->event_idx = j;
204 list_add(&pmu_ev->node, &info->event_caps_configurable);
205 }
206 }
207
208 return 0;
209 }
210
cxl_pmu_format_sysfs_show(struct device * dev,struct device_attribute * attr,char * buf)211 static ssize_t cxl_pmu_format_sysfs_show(struct device *dev,
212 struct device_attribute *attr, char *buf)
213 {
214 struct dev_ext_attribute *eattr;
215
216 eattr = container_of(attr, struct dev_ext_attribute, attr);
217
218 return sysfs_emit(buf, "%s\n", (char *)eattr->var);
219 }
220
221 #define CXL_PMU_FORMAT_ATTR(_name, _format)\
222 (&((struct dev_ext_attribute[]) { \
223 { \
224 .attr = __ATTR(_name, 0444, \
225 cxl_pmu_format_sysfs_show, NULL), \
226 .var = (void *)_format \
227 } \
228 })[0].attr.attr)
229
230 enum {
231 cxl_pmu_mask_attr,
232 cxl_pmu_gid_attr,
233 cxl_pmu_vid_attr,
234 cxl_pmu_threshold_attr,
235 cxl_pmu_invert_attr,
236 cxl_pmu_edge_attr,
237 cxl_pmu_hdm_filter_en_attr,
238 cxl_pmu_hdm_attr,
239 };
240
241 static struct attribute *cxl_pmu_format_attr[] = {
242 [cxl_pmu_mask_attr] = CXL_PMU_FORMAT_ATTR(mask, "config:0-31"),
243 [cxl_pmu_gid_attr] = CXL_PMU_FORMAT_ATTR(gid, "config:32-47"),
244 [cxl_pmu_vid_attr] = CXL_PMU_FORMAT_ATTR(vid, "config:48-63"),
245 [cxl_pmu_threshold_attr] = CXL_PMU_FORMAT_ATTR(threshold, "config1:0-15"),
246 [cxl_pmu_invert_attr] = CXL_PMU_FORMAT_ATTR(invert, "config1:16"),
247 [cxl_pmu_edge_attr] = CXL_PMU_FORMAT_ATTR(edge, "config1:17"),
248 [cxl_pmu_hdm_filter_en_attr] = CXL_PMU_FORMAT_ATTR(hdm_filter_en, "config1:18"),
249 [cxl_pmu_hdm_attr] = CXL_PMU_FORMAT_ATTR(hdm, "config2:0-15"),
250 NULL
251 };
252
253 #define CXL_PMU_ATTR_CONFIG_MASK_MSK GENMASK_ULL(31, 0)
254 #define CXL_PMU_ATTR_CONFIG_GID_MSK GENMASK_ULL(47, 32)
255 #define CXL_PMU_ATTR_CONFIG_VID_MSK GENMASK_ULL(63, 48)
256 #define CXL_PMU_ATTR_CONFIG1_THRESHOLD_MSK GENMASK_ULL(15, 0)
257 #define CXL_PMU_ATTR_CONFIG1_INVERT_MSK BIT(16)
258 #define CXL_PMU_ATTR_CONFIG1_EDGE_MSK BIT(17)
259 #define CXL_PMU_ATTR_CONFIG1_FILTER_EN_MSK BIT(18)
260 #define CXL_PMU_ATTR_CONFIG2_HDM_MSK GENMASK(15, 0)
261
cxl_pmu_format_is_visible(struct kobject * kobj,struct attribute * attr,int a)262 static umode_t cxl_pmu_format_is_visible(struct kobject *kobj,
263 struct attribute *attr, int a)
264 {
265 struct device *dev = kobj_to_dev(kobj);
266 struct cxl_pmu_info *info = dev_get_drvdata(dev);
267
268 /*
269 * Filter capability at the CPMU level, so hide the attributes if the particular
270 * filter is not supported.
271 */
272 if (!info->filter_hdm &&
273 (attr == cxl_pmu_format_attr[cxl_pmu_hdm_filter_en_attr] ||
274 attr == cxl_pmu_format_attr[cxl_pmu_hdm_attr]))
275 return 0;
276
277 return attr->mode;
278 }
279
280 static const struct attribute_group cxl_pmu_format_group = {
281 .name = "format",
282 .attrs = cxl_pmu_format_attr,
283 .is_visible = cxl_pmu_format_is_visible,
284 };
285
cxl_pmu_config_get_mask(struct perf_event * event)286 static u32 cxl_pmu_config_get_mask(struct perf_event *event)
287 {
288 return FIELD_GET(CXL_PMU_ATTR_CONFIG_MASK_MSK, event->attr.config);
289 }
290
cxl_pmu_config_get_gid(struct perf_event * event)291 static u16 cxl_pmu_config_get_gid(struct perf_event *event)
292 {
293 return FIELD_GET(CXL_PMU_ATTR_CONFIG_GID_MSK, event->attr.config);
294 }
295
cxl_pmu_config_get_vid(struct perf_event * event)296 static u16 cxl_pmu_config_get_vid(struct perf_event *event)
297 {
298 return FIELD_GET(CXL_PMU_ATTR_CONFIG_VID_MSK, event->attr.config);
299 }
300
cxl_pmu_config1_get_threshold(struct perf_event * event)301 static u8 cxl_pmu_config1_get_threshold(struct perf_event *event)
302 {
303 return FIELD_GET(CXL_PMU_ATTR_CONFIG1_THRESHOLD_MSK, event->attr.config1);
304 }
305
cxl_pmu_config1_get_invert(struct perf_event * event)306 static bool cxl_pmu_config1_get_invert(struct perf_event *event)
307 {
308 return FIELD_GET(CXL_PMU_ATTR_CONFIG1_INVERT_MSK, event->attr.config1);
309 }
310
cxl_pmu_config1_get_edge(struct perf_event * event)311 static bool cxl_pmu_config1_get_edge(struct perf_event *event)
312 {
313 return FIELD_GET(CXL_PMU_ATTR_CONFIG1_EDGE_MSK, event->attr.config1);
314 }
315
316 /*
317 * CPMU specification allows for 8 filters, each with a 32 bit value...
318 * So we need to find 8x32bits to store it in.
319 * As the value used for disable is 0xffff_ffff, a separate enable switch
320 * is needed.
321 */
322
cxl_pmu_config1_hdm_filter_en(struct perf_event * event)323 static bool cxl_pmu_config1_hdm_filter_en(struct perf_event *event)
324 {
325 return FIELD_GET(CXL_PMU_ATTR_CONFIG1_FILTER_EN_MSK, event->attr.config1);
326 }
327
cxl_pmu_config2_get_hdm_decoder(struct perf_event * event)328 static u16 cxl_pmu_config2_get_hdm_decoder(struct perf_event *event)
329 {
330 return FIELD_GET(CXL_PMU_ATTR_CONFIG2_HDM_MSK, event->attr.config2);
331 }
332
cxl_pmu_event_sysfs_show(struct device * dev,struct device_attribute * attr,char * buf)333 static ssize_t cxl_pmu_event_sysfs_show(struct device *dev,
334 struct device_attribute *attr, char *buf)
335 {
336 struct perf_pmu_events_attr *pmu_attr =
337 container_of(attr, struct perf_pmu_events_attr, attr);
338
339 return sysfs_emit(buf, "config=%#llx\n", pmu_attr->id);
340 }
341
342 #define CXL_PMU_EVENT_ATTR(_name, _vid, _gid, _msk) \
343 PMU_EVENT_ATTR_ID(_name, cxl_pmu_event_sysfs_show, \
344 ((u64)(_vid) << 48) | ((u64)(_gid) << 32) | (u64)(_msk))
345
346 /* For CXL spec defined events */
347 #define CXL_PMU_EVENT_CXL_ATTR(_name, _gid, _msk) \
348 CXL_PMU_EVENT_ATTR(_name, PCI_DVSEC_VENDOR_ID_CXL, _gid, _msk)
349
350 static struct attribute *cxl_pmu_event_attrs[] = {
351 CXL_PMU_EVENT_CXL_ATTR(clock_ticks, CXL_PMU_GID_CLOCK_TICKS, BIT(0)),
352 /* CXL rev 3.0 Table 3-17 - Device to Host Requests */
353 CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdcurr, CXL_PMU_GID_D2H_REQ, BIT(1)),
354 CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdown, CXL_PMU_GID_D2H_REQ, BIT(2)),
355 CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdshared, CXL_PMU_GID_D2H_REQ, BIT(3)),
356 CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdany, CXL_PMU_GID_D2H_REQ, BIT(4)),
357 CXL_PMU_EVENT_CXL_ATTR(d2h_req_rdownnodata, CXL_PMU_GID_D2H_REQ, BIT(5)),
358 CXL_PMU_EVENT_CXL_ATTR(d2h_req_itomwr, CXL_PMU_GID_D2H_REQ, BIT(6)),
359 CXL_PMU_EVENT_CXL_ATTR(d2h_req_wrcurr, CXL_PMU_GID_D2H_REQ, BIT(7)),
360 CXL_PMU_EVENT_CXL_ATTR(d2h_req_clflush, CXL_PMU_GID_D2H_REQ, BIT(8)),
361 CXL_PMU_EVENT_CXL_ATTR(d2h_req_cleanevict, CXL_PMU_GID_D2H_REQ, BIT(9)),
362 CXL_PMU_EVENT_CXL_ATTR(d2h_req_dirtyevict, CXL_PMU_GID_D2H_REQ, BIT(10)),
363 CXL_PMU_EVENT_CXL_ATTR(d2h_req_cleanevictnodata, CXL_PMU_GID_D2H_REQ, BIT(11)),
364 CXL_PMU_EVENT_CXL_ATTR(d2h_req_wowrinv, CXL_PMU_GID_D2H_REQ, BIT(12)),
365 CXL_PMU_EVENT_CXL_ATTR(d2h_req_wowrinvf, CXL_PMU_GID_D2H_REQ, BIT(13)),
366 CXL_PMU_EVENT_CXL_ATTR(d2h_req_wrinv, CXL_PMU_GID_D2H_REQ, BIT(14)),
367 CXL_PMU_EVENT_CXL_ATTR(d2h_req_cacheflushed, CXL_PMU_GID_D2H_REQ, BIT(16)),
368 /* CXL rev 3.0 Table 3-20 - D2H Repsonse Encodings */
369 CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspihiti, CXL_PMU_GID_D2H_RSP, BIT(4)),
370 CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspvhitv, CXL_PMU_GID_D2H_RSP, BIT(6)),
371 CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspihitse, CXL_PMU_GID_D2H_RSP, BIT(5)),
372 CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspshitse, CXL_PMU_GID_D2H_RSP, BIT(1)),
373 CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspsfwdm, CXL_PMU_GID_D2H_RSP, BIT(7)),
374 CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspifwdm, CXL_PMU_GID_D2H_RSP, BIT(15)),
375 CXL_PMU_EVENT_CXL_ATTR(d2h_rsp_rspvfwdv, CXL_PMU_GID_D2H_RSP, BIT(22)),
376 /* CXL rev 3.0 Table 3-21 - CXL.cache - Mapping of H2D Requests to D2H Responses */
377 CXL_PMU_EVENT_CXL_ATTR(h2d_req_snpdata, CXL_PMU_GID_H2D_REQ, BIT(1)),
378 CXL_PMU_EVENT_CXL_ATTR(h2d_req_snpinv, CXL_PMU_GID_H2D_REQ, BIT(2)),
379 CXL_PMU_EVENT_CXL_ATTR(h2d_req_snpcur, CXL_PMU_GID_H2D_REQ, BIT(3)),
380 /* CXL rev 3.0 Table 3-22 - H2D Response Opcode Encodings */
381 CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_writepull, CXL_PMU_GID_H2D_RSP, BIT(1)),
382 CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_go, CXL_PMU_GID_H2D_RSP, BIT(4)),
383 CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_gowritepull, CXL_PMU_GID_H2D_RSP, BIT(5)),
384 CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_extcmp, CXL_PMU_GID_H2D_RSP, BIT(6)),
385 CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_gowritepulldrop, CXL_PMU_GID_H2D_RSP, BIT(8)),
386 CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_fastgowritepull, CXL_PMU_GID_H2D_RSP, BIT(13)),
387 CXL_PMU_EVENT_CXL_ATTR(h2d_rsp_goerrwritepull, CXL_PMU_GID_H2D_RSP, BIT(15)),
388 /* CXL rev 3.0 Table 13-5 directly lists these */
389 CXL_PMU_EVENT_CXL_ATTR(cachedata_d2h_data, CXL_PMU_GID_CACHE_DATA, BIT(0)),
390 CXL_PMU_EVENT_CXL_ATTR(cachedata_h2d_data, CXL_PMU_GID_CACHE_DATA, BIT(1)),
391 /* CXL rev 3.0 Table 3-29 M2S Req Memory Opcodes */
392 CXL_PMU_EVENT_CXL_ATTR(m2s_req_meminv, CXL_PMU_GID_M2S_REQ, BIT(0)),
393 CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrd, CXL_PMU_GID_M2S_REQ, BIT(1)),
394 CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrddata, CXL_PMU_GID_M2S_REQ, BIT(2)),
395 CXL_PMU_EVENT_CXL_ATTR(m2s_req_memrdfwd, CXL_PMU_GID_M2S_REQ, BIT(3)),
396 CXL_PMU_EVENT_CXL_ATTR(m2s_req_memwrfwd, CXL_PMU_GID_M2S_REQ, BIT(4)),
397 CXL_PMU_EVENT_CXL_ATTR(m2s_req_memspecrd, CXL_PMU_GID_M2S_REQ, BIT(8)),
398 CXL_PMU_EVENT_CXL_ATTR(m2s_req_meminvnt, CXL_PMU_GID_M2S_REQ, BIT(9)),
399 CXL_PMU_EVENT_CXL_ATTR(m2s_req_memcleanevict, CXL_PMU_GID_M2S_REQ, BIT(10)),
400 /* CXL rev 3.0 Table 3-35 M2S RwD Memory Opcodes */
401 CXL_PMU_EVENT_CXL_ATTR(m2s_rwd_memwr, CXL_PMU_GID_M2S_RWD, BIT(1)),
402 CXL_PMU_EVENT_CXL_ATTR(m2s_rwd_memwrptl, CXL_PMU_GID_M2S_RWD, BIT(2)),
403 CXL_PMU_EVENT_CXL_ATTR(m2s_rwd_biconflict, CXL_PMU_GID_M2S_RWD, BIT(4)),
404 /* CXL rev 3.0 Table 3-38 M2S BIRsp Memory Opcodes */
405 CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_i, CXL_PMU_GID_M2S_BIRSP, BIT(0)),
406 CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_s, CXL_PMU_GID_M2S_BIRSP, BIT(1)),
407 CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_e, CXL_PMU_GID_M2S_BIRSP, BIT(2)),
408 CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_iblk, CXL_PMU_GID_M2S_BIRSP, BIT(4)),
409 CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_sblk, CXL_PMU_GID_M2S_BIRSP, BIT(5)),
410 CXL_PMU_EVENT_CXL_ATTR(m2s_birsp_eblk, CXL_PMU_GID_M2S_BIRSP, BIT(6)),
411 /* CXL rev 3.0 Table 3-40 S2M BISnp Opcodes */
412 CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_cur, CXL_PMU_GID_S2M_BISNP, BIT(0)),
413 CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_data, CXL_PMU_GID_S2M_BISNP, BIT(1)),
414 CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_inv, CXL_PMU_GID_S2M_BISNP, BIT(2)),
415 CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_curblk, CXL_PMU_GID_S2M_BISNP, BIT(4)),
416 CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_datblk, CXL_PMU_GID_S2M_BISNP, BIT(5)),
417 CXL_PMU_EVENT_CXL_ATTR(s2m_bisnp_invblk, CXL_PMU_GID_S2M_BISNP, BIT(6)),
418 /* CXL rev 3.0 Table 3-43 S2M NDR Opcopdes */
419 CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmp, CXL_PMU_GID_S2M_NDR, BIT(0)),
420 CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmps, CXL_PMU_GID_S2M_NDR, BIT(1)),
421 CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_cmpe, CXL_PMU_GID_S2M_NDR, BIT(2)),
422 CXL_PMU_EVENT_CXL_ATTR(s2m_ndr_biconflictack, CXL_PMU_GID_S2M_NDR, BIT(4)),
423 /* CXL rev 3.0 Table 3-46 S2M DRS opcodes */
424 CXL_PMU_EVENT_CXL_ATTR(s2m_drs_memdata, CXL_PMU_GID_S2M_DRS, BIT(0)),
425 CXL_PMU_EVENT_CXL_ATTR(s2m_drs_memdatanxm, CXL_PMU_GID_S2M_DRS, BIT(1)),
426 /* CXL rev 3.0 Table 13-5 directly lists these */
427 CXL_PMU_EVENT_CXL_ATTR(ddr_act, CXL_PMU_GID_DDR, BIT(0)),
428 CXL_PMU_EVENT_CXL_ATTR(ddr_pre, CXL_PMU_GID_DDR, BIT(1)),
429 CXL_PMU_EVENT_CXL_ATTR(ddr_casrd, CXL_PMU_GID_DDR, BIT(2)),
430 CXL_PMU_EVENT_CXL_ATTR(ddr_caswr, CXL_PMU_GID_DDR, BIT(3)),
431 CXL_PMU_EVENT_CXL_ATTR(ddr_refresh, CXL_PMU_GID_DDR, BIT(4)),
432 CXL_PMU_EVENT_CXL_ATTR(ddr_selfrefreshent, CXL_PMU_GID_DDR, BIT(5)),
433 CXL_PMU_EVENT_CXL_ATTR(ddr_rfm, CXL_PMU_GID_DDR, BIT(6)),
434 NULL
435 };
436
cxl_pmu_find_fixed_counter_ev_cap(struct cxl_pmu_info * info,int vid,int gid,int msk)437 static struct cxl_pmu_ev_cap *cxl_pmu_find_fixed_counter_ev_cap(struct cxl_pmu_info *info,
438 int vid, int gid, int msk)
439 {
440 struct cxl_pmu_ev_cap *pmu_ev;
441
442 list_for_each_entry(pmu_ev, &info->event_caps_fixed, node) {
443 if (vid != pmu_ev->vid || gid != pmu_ev->gid)
444 continue;
445
446 /* Precise match for fixed counter */
447 if (msk == pmu_ev->msk)
448 return pmu_ev;
449 }
450
451 return ERR_PTR(-EINVAL);
452 }
453
cxl_pmu_find_config_counter_ev_cap(struct cxl_pmu_info * info,int vid,int gid,int msk)454 static struct cxl_pmu_ev_cap *cxl_pmu_find_config_counter_ev_cap(struct cxl_pmu_info *info,
455 int vid, int gid, int msk)
456 {
457 struct cxl_pmu_ev_cap *pmu_ev;
458
459 list_for_each_entry(pmu_ev, &info->event_caps_configurable, node) {
460 if (vid != pmu_ev->vid || gid != pmu_ev->gid)
461 continue;
462
463 /* Request mask must be subset of supported */
464 if (msk & ~pmu_ev->msk)
465 continue;
466
467 return pmu_ev;
468 }
469
470 return ERR_PTR(-EINVAL);
471 }
472
cxl_pmu_event_is_visible(struct kobject * kobj,struct attribute * attr,int a)473 static umode_t cxl_pmu_event_is_visible(struct kobject *kobj, struct attribute *attr, int a)
474 {
475 struct device_attribute *dev_attr = container_of(attr, struct device_attribute, attr);
476 struct perf_pmu_events_attr *pmu_attr =
477 container_of(dev_attr, struct perf_pmu_events_attr, attr);
478 struct device *dev = kobj_to_dev(kobj);
479 struct cxl_pmu_info *info = dev_get_drvdata(dev);
480 int vid = FIELD_GET(CXL_PMU_ATTR_CONFIG_VID_MSK, pmu_attr->id);
481 int gid = FIELD_GET(CXL_PMU_ATTR_CONFIG_GID_MSK, pmu_attr->id);
482 int msk = FIELD_GET(CXL_PMU_ATTR_CONFIG_MASK_MSK, pmu_attr->id);
483
484 if (!IS_ERR(cxl_pmu_find_fixed_counter_ev_cap(info, vid, gid, msk)))
485 return attr->mode;
486
487 if (!IS_ERR(cxl_pmu_find_config_counter_ev_cap(info, vid, gid, msk)))
488 return attr->mode;
489
490 return 0;
491 }
492
493 static const struct attribute_group cxl_pmu_events = {
494 .name = "events",
495 .attrs = cxl_pmu_event_attrs,
496 .is_visible = cxl_pmu_event_is_visible,
497 };
498
cpumask_show(struct device * dev,struct device_attribute * attr,char * buf)499 static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
500 char *buf)
501 {
502 struct cxl_pmu_info *info = dev_get_drvdata(dev);
503
504 return cpumap_print_to_pagebuf(true, buf, cpumask_of(info->on_cpu));
505 }
506 static DEVICE_ATTR_RO(cpumask);
507
508 static struct attribute *cxl_pmu_cpumask_attrs[] = {
509 &dev_attr_cpumask.attr,
510 NULL
511 };
512
513 static const struct attribute_group cxl_pmu_cpumask_group = {
514 .attrs = cxl_pmu_cpumask_attrs,
515 };
516
517 static const struct attribute_group *cxl_pmu_attr_groups[] = {
518 &cxl_pmu_events,
519 &cxl_pmu_format_group,
520 &cxl_pmu_cpumask_group,
521 NULL
522 };
523
524 /* If counter_idx == NULL, don't try to allocate a counter. */
cxl_pmu_get_event_idx(struct perf_event * event,int * counter_idx,int * event_idx)525 static int cxl_pmu_get_event_idx(struct perf_event *event, int *counter_idx,
526 int *event_idx)
527 {
528 struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
529 DECLARE_BITMAP(configurable_and_free, CXL_PMU_MAX_COUNTERS);
530 struct cxl_pmu_ev_cap *pmu_ev;
531 u32 mask;
532 u16 gid, vid;
533 int i;
534
535 vid = cxl_pmu_config_get_vid(event);
536 gid = cxl_pmu_config_get_gid(event);
537 mask = cxl_pmu_config_get_mask(event);
538
539 pmu_ev = cxl_pmu_find_fixed_counter_ev_cap(info, vid, gid, mask);
540 if (!IS_ERR(pmu_ev)) {
541 if (!counter_idx)
542 return 0;
543 if (!test_bit(pmu_ev->counter_idx, info->used_counter_bm)) {
544 *counter_idx = pmu_ev->counter_idx;
545 return 0;
546 }
547 /* Fixed counter is in use, but maybe a configurable one? */
548 }
549
550 pmu_ev = cxl_pmu_find_config_counter_ev_cap(info, vid, gid, mask);
551 if (!IS_ERR(pmu_ev)) {
552 if (!counter_idx)
553 return 0;
554
555 bitmap_andnot(configurable_and_free, info->conf_counter_bm,
556 info->used_counter_bm, CXL_PMU_MAX_COUNTERS);
557
558 i = find_first_bit(configurable_and_free, CXL_PMU_MAX_COUNTERS);
559 if (i == CXL_PMU_MAX_COUNTERS)
560 return -EINVAL;
561
562 *counter_idx = i;
563 return 0;
564 }
565
566 return -EINVAL;
567 }
568
cxl_pmu_event_init(struct perf_event * event)569 static int cxl_pmu_event_init(struct perf_event *event)
570 {
571 struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
572 int rc;
573
574 /* Top level type sanity check - is this a Hardware Event being requested */
575 if (event->attr.type != event->pmu->type)
576 return -ENOENT;
577
578 if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK)
579 return -EOPNOTSUPP;
580 /* TODO: Validation of any filter */
581
582 /*
583 * Verify that it is possible to count what was requested. Either must
584 * be a fixed counter that is a precise match or a configurable counter
585 * where this is a subset.
586 */
587 rc = cxl_pmu_get_event_idx(event, NULL, NULL);
588 if (rc < 0)
589 return rc;
590
591 event->cpu = info->on_cpu;
592
593 return 0;
594 }
595
cxl_pmu_enable(struct pmu * pmu)596 static void cxl_pmu_enable(struct pmu *pmu)
597 {
598 struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(pmu);
599 void __iomem *base = info->base;
600
601 /* Can assume frozen at this stage */
602 writeq(0, base + CXL_PMU_FREEZE_REG);
603 }
604
cxl_pmu_disable(struct pmu * pmu)605 static void cxl_pmu_disable(struct pmu *pmu)
606 {
607 struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(pmu);
608 void __iomem *base = info->base;
609
610 /*
611 * Whilst bits above number of counters are RsvdZ
612 * they are unlikely to be repurposed given
613 * number of counters is allowed to be 64 leaving
614 * no reserved bits. Hence this is only slightly
615 * naughty.
616 */
617 writeq(GENMASK_ULL(63, 0), base + CXL_PMU_FREEZE_REG);
618 }
619
cxl_pmu_event_start(struct perf_event * event,int flags)620 static void cxl_pmu_event_start(struct perf_event *event, int flags)
621 {
622 struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
623 struct hw_perf_event *hwc = &event->hw;
624 void __iomem *base = info->base;
625 u64 cfg;
626
627 /*
628 * All paths to here should either set these flags directly or
629 * call cxl_pmu_event_stop() which will ensure the correct state.
630 */
631 if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
632 return;
633
634 WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
635 hwc->state = 0;
636
637 /*
638 * Currently only hdm filter control is implemnted, this code will
639 * want generalizing when more filters are added.
640 */
641 if (info->filter_hdm) {
642 if (cxl_pmu_config1_hdm_filter_en(event))
643 cfg = cxl_pmu_config2_get_hdm_decoder(event);
644 else
645 cfg = GENMASK(31, 0); /* No filtering if 0xFFFF_FFFF */
646 writeq(cfg, base + CXL_PMU_FILTER_CFG_REG(hwc->idx, 0));
647 }
648
649 cfg = readq(base + CXL_PMU_COUNTER_CFG_REG(hwc->idx));
650 cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_INT_ON_OVRFLW, 1);
651 cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_FREEZE_ON_OVRFLW, 1);
652 cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_ENABLE, 1);
653 cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_EDGE,
654 cxl_pmu_config1_get_edge(event) ? 1 : 0);
655 cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_INVERT,
656 cxl_pmu_config1_get_invert(event) ? 1 : 0);
657
658 /* Fixed purpose counters have next two fields RO */
659 if (test_bit(hwc->idx, info->conf_counter_bm)) {
660 cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_EVENT_GRP_ID_IDX_MSK,
661 hwc->event_base);
662 cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_EVENTS_MSK,
663 cxl_pmu_config_get_mask(event));
664 }
665 cfg &= ~CXL_PMU_COUNTER_CFG_THRESHOLD_MSK;
666 /*
667 * For events that generate only 1 count per clock the CXL 3.0 spec
668 * states the threshold shall be set to 1 but if set to 0 it will
669 * count the raw value anwyay?
670 * There is no definition of what events will count multiple per cycle
671 * and hence to which non 1 values of threshold can apply.
672 * (CXL 3.0 8.2.7.2.1 Counter Configuration - threshold field definition)
673 */
674 cfg |= FIELD_PREP(CXL_PMU_COUNTER_CFG_THRESHOLD_MSK,
675 cxl_pmu_config1_get_threshold(event));
676 writeq(cfg, base + CXL_PMU_COUNTER_CFG_REG(hwc->idx));
677
678 local64_set(&hwc->prev_count, 0);
679 writeq(0, base + CXL_PMU_COUNTER_REG(hwc->idx));
680
681 perf_event_update_userpage(event);
682 }
683
cxl_pmu_read_counter(struct perf_event * event)684 static u64 cxl_pmu_read_counter(struct perf_event *event)
685 {
686 struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
687 void __iomem *base = info->base;
688
689 return readq(base + CXL_PMU_COUNTER_REG(event->hw.idx));
690 }
691
__cxl_pmu_read(struct perf_event * event,bool overflow)692 static void __cxl_pmu_read(struct perf_event *event, bool overflow)
693 {
694 struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
695 struct hw_perf_event *hwc = &event->hw;
696 u64 new_cnt, prev_cnt, delta;
697
698 do {
699 prev_cnt = local64_read(&hwc->prev_count);
700 new_cnt = cxl_pmu_read_counter(event);
701 } while (local64_cmpxchg(&hwc->prev_count, prev_cnt, new_cnt) != prev_cnt);
702
703 /*
704 * If we know an overflow occur then take that into account.
705 * Note counter is not reset as that would lose events
706 */
707 delta = (new_cnt - prev_cnt) & GENMASK_ULL(info->counter_width - 1, 0);
708 if (overflow && delta < GENMASK_ULL(info->counter_width - 1, 0))
709 delta += (1UL << info->counter_width);
710
711 local64_add(delta, &event->count);
712 }
713
cxl_pmu_read(struct perf_event * event)714 static void cxl_pmu_read(struct perf_event *event)
715 {
716 __cxl_pmu_read(event, false);
717 }
718
cxl_pmu_event_stop(struct perf_event * event,int flags)719 static void cxl_pmu_event_stop(struct perf_event *event, int flags)
720 {
721 struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
722 void __iomem *base = info->base;
723 struct hw_perf_event *hwc = &event->hw;
724 u64 cfg;
725
726 cxl_pmu_read(event);
727 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
728 hwc->state |= PERF_HES_STOPPED;
729
730 cfg = readq(base + CXL_PMU_COUNTER_CFG_REG(hwc->idx));
731 cfg &= ~(FIELD_PREP(CXL_PMU_COUNTER_CFG_INT_ON_OVRFLW, 1) |
732 FIELD_PREP(CXL_PMU_COUNTER_CFG_ENABLE, 1));
733 writeq(cfg, base + CXL_PMU_COUNTER_CFG_REG(hwc->idx));
734
735 hwc->state |= PERF_HES_UPTODATE;
736 }
737
cxl_pmu_event_add(struct perf_event * event,int flags)738 static int cxl_pmu_event_add(struct perf_event *event, int flags)
739 {
740 struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
741 struct hw_perf_event *hwc = &event->hw;
742 int idx, rc;
743 int event_idx = 0;
744
745 hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
746
747 rc = cxl_pmu_get_event_idx(event, &idx, &event_idx);
748 if (rc < 0)
749 return rc;
750
751 hwc->idx = idx;
752
753 /* Only set for configurable counters */
754 hwc->event_base = event_idx;
755 info->hw_events[idx] = event;
756 set_bit(idx, info->used_counter_bm);
757
758 if (flags & PERF_EF_START)
759 cxl_pmu_event_start(event, PERF_EF_RELOAD);
760
761 return 0;
762 }
763
cxl_pmu_event_del(struct perf_event * event,int flags)764 static void cxl_pmu_event_del(struct perf_event *event, int flags)
765 {
766 struct cxl_pmu_info *info = pmu_to_cxl_pmu_info(event->pmu);
767 struct hw_perf_event *hwc = &event->hw;
768
769 cxl_pmu_event_stop(event, PERF_EF_UPDATE);
770 clear_bit(hwc->idx, info->used_counter_bm);
771 info->hw_events[hwc->idx] = NULL;
772 perf_event_update_userpage(event);
773 }
774
cxl_pmu_irq(int irq,void * data)775 static irqreturn_t cxl_pmu_irq(int irq, void *data)
776 {
777 struct cxl_pmu_info *info = data;
778 void __iomem *base = info->base;
779 u64 overflowed;
780 DECLARE_BITMAP(overflowedbm, 64);
781 int i;
782
783 overflowed = readq(base + CXL_PMU_OVERFLOW_REG);
784
785 /* Interrupt may be shared, so maybe it isn't ours */
786 if (!overflowed)
787 return IRQ_NONE;
788
789 bitmap_from_arr64(overflowedbm, &overflowed, 64);
790 for_each_set_bit(i, overflowedbm, info->num_counters) {
791 struct perf_event *event = info->hw_events[i];
792
793 if (!event) {
794 dev_dbg(info->pmu.dev,
795 "overflow but on non enabled counter %d\n", i);
796 continue;
797 }
798
799 __cxl_pmu_read(event, true);
800 }
801
802 writeq(overflowed, base + CXL_PMU_OVERFLOW_REG);
803
804 return IRQ_HANDLED;
805 }
806
cxl_pmu_perf_unregister(void * _info)807 static void cxl_pmu_perf_unregister(void *_info)
808 {
809 struct cxl_pmu_info *info = _info;
810
811 perf_pmu_unregister(&info->pmu);
812 }
813
cxl_pmu_cpuhp_remove(void * _info)814 static void cxl_pmu_cpuhp_remove(void *_info)
815 {
816 struct cxl_pmu_info *info = _info;
817
818 cpuhp_state_remove_instance_nocalls(cxl_pmu_cpuhp_state_num, &info->node);
819 }
820
cxl_pmu_probe(struct device * dev)821 static int cxl_pmu_probe(struct device *dev)
822 {
823 struct cxl_pmu *pmu = to_cxl_pmu(dev);
824 struct pci_dev *pdev = to_pci_dev(dev->parent);
825 struct cxl_pmu_info *info;
826 char *irq_name;
827 char *dev_name;
828 int rc, irq;
829
830 info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL);
831 if (!info)
832 return -ENOMEM;
833
834 dev_set_drvdata(dev, info);
835 INIT_LIST_HEAD(&info->event_caps_fixed);
836 INIT_LIST_HEAD(&info->event_caps_configurable);
837
838 info->base = pmu->base;
839
840 info->on_cpu = -1;
841 rc = cxl_pmu_parse_caps(dev, info);
842 if (rc)
843 return rc;
844
845 info->hw_events = devm_kcalloc(dev, sizeof(*info->hw_events),
846 info->num_counters, GFP_KERNEL);
847 if (!info->hw_events)
848 return -ENOMEM;
849
850 switch (pmu->type) {
851 case CXL_PMU_MEMDEV:
852 dev_name = devm_kasprintf(dev, GFP_KERNEL, "cxl_pmu_mem%d.%d",
853 pmu->assoc_id, pmu->index);
854 break;
855 }
856 if (!dev_name)
857 return -ENOMEM;
858
859 info->pmu = (struct pmu) {
860 .name = dev_name,
861 .parent = dev,
862 .module = THIS_MODULE,
863 .event_init = cxl_pmu_event_init,
864 .pmu_enable = cxl_pmu_enable,
865 .pmu_disable = cxl_pmu_disable,
866 .add = cxl_pmu_event_add,
867 .del = cxl_pmu_event_del,
868 .start = cxl_pmu_event_start,
869 .stop = cxl_pmu_event_stop,
870 .read = cxl_pmu_read,
871 .task_ctx_nr = perf_invalid_context,
872 .attr_groups = cxl_pmu_attr_groups,
873 .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
874 };
875
876 if (info->irq <= 0)
877 return -EINVAL;
878
879 rc = pci_irq_vector(pdev, info->irq);
880 if (rc < 0)
881 return rc;
882 irq = rc;
883
884 irq_name = devm_kasprintf(dev, GFP_KERNEL, "%s_overflow\n", dev_name);
885 if (!irq_name)
886 return -ENOMEM;
887
888 rc = devm_request_irq(dev, irq, cxl_pmu_irq, IRQF_SHARED | IRQF_ONESHOT,
889 irq_name, info);
890 if (rc)
891 return rc;
892 info->irq = irq;
893
894 rc = cpuhp_state_add_instance(cxl_pmu_cpuhp_state_num, &info->node);
895 if (rc)
896 return rc;
897
898 rc = devm_add_action_or_reset(dev, cxl_pmu_cpuhp_remove, info);
899 if (rc)
900 return rc;
901
902 rc = perf_pmu_register(&info->pmu, info->pmu.name, -1);
903 if (rc)
904 return rc;
905
906 rc = devm_add_action_or_reset(dev, cxl_pmu_perf_unregister, info);
907 if (rc)
908 return rc;
909
910 return 0;
911 }
912
913 static struct cxl_driver cxl_pmu_driver = {
914 .name = "cxl_pmu",
915 .probe = cxl_pmu_probe,
916 .id = CXL_DEVICE_PMU,
917 };
918
cxl_pmu_online_cpu(unsigned int cpu,struct hlist_node * node)919 static int cxl_pmu_online_cpu(unsigned int cpu, struct hlist_node *node)
920 {
921 struct cxl_pmu_info *info = hlist_entry_safe(node, struct cxl_pmu_info, node);
922
923 if (info->on_cpu != -1)
924 return 0;
925
926 info->on_cpu = cpu;
927 /*
928 * CPU HP lock is held so we should be guaranteed that the CPU hasn't yet
929 * gone away again.
930 */
931 WARN_ON(irq_set_affinity(info->irq, cpumask_of(cpu)));
932
933 return 0;
934 }
935
cxl_pmu_offline_cpu(unsigned int cpu,struct hlist_node * node)936 static int cxl_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
937 {
938 struct cxl_pmu_info *info = hlist_entry_safe(node, struct cxl_pmu_info, node);
939 unsigned int target;
940
941 if (info->on_cpu != cpu)
942 return 0;
943
944 info->on_cpu = -1;
945 target = cpumask_any_but(cpu_online_mask, cpu);
946 if (target >= nr_cpu_ids) {
947 dev_err(info->pmu.dev, "Unable to find a suitable CPU\n");
948 return 0;
949 }
950
951 perf_pmu_migrate_context(&info->pmu, cpu, target);
952 info->on_cpu = target;
953 /*
954 * CPU HP lock is held so we should be guaranteed that this CPU hasn't yet
955 * gone away.
956 */
957 WARN_ON(irq_set_affinity(info->irq, cpumask_of(target)));
958
959 return 0;
960 }
961
cxl_pmu_init(void)962 static __init int cxl_pmu_init(void)
963 {
964 int rc;
965
966 rc = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
967 "AP_PERF_CXL_PMU_ONLINE",
968 cxl_pmu_online_cpu, cxl_pmu_offline_cpu);
969 if (rc < 0)
970 return rc;
971 cxl_pmu_cpuhp_state_num = rc;
972
973 rc = cxl_driver_register(&cxl_pmu_driver);
974 if (rc)
975 cpuhp_remove_multi_state(cxl_pmu_cpuhp_state_num);
976
977 return rc;
978 }
979
cxl_pmu_exit(void)980 static __exit void cxl_pmu_exit(void)
981 {
982 cxl_driver_unregister(&cxl_pmu_driver);
983 cpuhp_remove_multi_state(cxl_pmu_cpuhp_state_num);
984 }
985
986 MODULE_LICENSE("GPL");
987 MODULE_IMPORT_NS(CXL);
988 module_init(cxl_pmu_init);
989 module_exit(cxl_pmu_exit);
990 MODULE_ALIAS_CXL(CXL_DEVICE_PMU);
991