xref: /openbmc/linux/drivers/hwtracing/ptt/hisi_ptt.c (revision 7a836736b6537b0e2633381d743d9c1559ce243c)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Driver for HiSilicon PCIe tune and trace device
4  *
5  * Copyright (c) 2022 HiSilicon Technologies Co., Ltd.
6  * Author: Yicong Yang <yangyicong@hisilicon.com>
7  */
8 
9 #include <linux/bitfield.h>
10 #include <linux/bitops.h>
11 #include <linux/cpuhotplug.h>
12 #include <linux/delay.h>
13 #include <linux/dma-mapping.h>
14 #include <linux/interrupt.h>
15 #include <linux/io.h>
16 #include <linux/iommu.h>
17 #include <linux/iopoll.h>
18 #include <linux/module.h>
19 #include <linux/sysfs.h>
20 #include <linux/vmalloc.h>
21 
22 #include "hisi_ptt.h"
23 
24 /* Dynamic CPU hotplug state used by PTT */
25 static enum cpuhp_state hisi_ptt_pmu_online;
26 
27 static bool hisi_ptt_wait_tuning_finish(struct hisi_ptt *hisi_ptt)
28 {
29 	u32 val;
30 
31 	return !readl_poll_timeout(hisi_ptt->iobase + HISI_PTT_TUNING_INT_STAT,
32 				   val, !(val & HISI_PTT_TUNING_INT_STAT_MASK),
33 				   HISI_PTT_WAIT_POLL_INTERVAL_US,
34 				   HISI_PTT_WAIT_TUNE_TIMEOUT_US);
35 }
36 
37 static ssize_t hisi_ptt_tune_attr_show(struct device *dev,
38 				       struct device_attribute *attr,
39 				       char *buf)
40 {
41 	struct hisi_ptt *hisi_ptt = to_hisi_ptt(dev_get_drvdata(dev));
42 	struct dev_ext_attribute *ext_attr;
43 	struct hisi_ptt_tune_desc *desc;
44 	u32 reg;
45 	u16 val;
46 
47 	ext_attr = container_of(attr, struct dev_ext_attribute, attr);
48 	desc = ext_attr->var;
49 
50 	mutex_lock(&hisi_ptt->tune_lock);
51 
52 	reg = readl(hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
53 	reg &= ~(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB);
54 	reg |= FIELD_PREP(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB,
55 			  desc->event_code);
56 	writel(reg, hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
57 
58 	/* Write all 1 to indicates it's the read process */
59 	writel(~0U, hisi_ptt->iobase + HISI_PTT_TUNING_DATA);
60 
61 	if (!hisi_ptt_wait_tuning_finish(hisi_ptt)) {
62 		mutex_unlock(&hisi_ptt->tune_lock);
63 		return -ETIMEDOUT;
64 	}
65 
66 	reg = readl(hisi_ptt->iobase + HISI_PTT_TUNING_DATA);
67 	reg &= HISI_PTT_TUNING_DATA_VAL_MASK;
68 	val = FIELD_GET(HISI_PTT_TUNING_DATA_VAL_MASK, reg);
69 
70 	mutex_unlock(&hisi_ptt->tune_lock);
71 	return sysfs_emit(buf, "%u\n", val);
72 }
73 
74 static ssize_t hisi_ptt_tune_attr_store(struct device *dev,
75 					struct device_attribute *attr,
76 					const char *buf, size_t count)
77 {
78 	struct hisi_ptt *hisi_ptt = to_hisi_ptt(dev_get_drvdata(dev));
79 	struct dev_ext_attribute *ext_attr;
80 	struct hisi_ptt_tune_desc *desc;
81 	u32 reg;
82 	u16 val;
83 
84 	ext_attr = container_of(attr, struct dev_ext_attribute, attr);
85 	desc = ext_attr->var;
86 
87 	if (kstrtou16(buf, 10, &val))
88 		return -EINVAL;
89 
90 	mutex_lock(&hisi_ptt->tune_lock);
91 
92 	reg = readl(hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
93 	reg &= ~(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB);
94 	reg |= FIELD_PREP(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB,
95 			  desc->event_code);
96 	writel(reg, hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
97 	writel(FIELD_PREP(HISI_PTT_TUNING_DATA_VAL_MASK, val),
98 	       hisi_ptt->iobase + HISI_PTT_TUNING_DATA);
99 
100 	if (!hisi_ptt_wait_tuning_finish(hisi_ptt)) {
101 		mutex_unlock(&hisi_ptt->tune_lock);
102 		return -ETIMEDOUT;
103 	}
104 
105 	mutex_unlock(&hisi_ptt->tune_lock);
106 	return count;
107 }
108 
109 #define HISI_PTT_TUNE_ATTR(_name, _val, _show, _store)			\
110 	static struct hisi_ptt_tune_desc _name##_desc = {		\
111 		.name = #_name,						\
112 		.event_code = (_val),					\
113 	};								\
114 	static struct dev_ext_attribute hisi_ptt_##_name##_attr = {	\
115 		.attr	= __ATTR(_name, 0600, _show, _store),		\
116 		.var	= &_name##_desc,				\
117 	}
118 
119 #define HISI_PTT_TUNE_ATTR_COMMON(_name, _val)		\
120 	HISI_PTT_TUNE_ATTR(_name, _val,			\
121 			   hisi_ptt_tune_attr_show,	\
122 			   hisi_ptt_tune_attr_store)
123 
124 /*
125  * The value of the tuning event are composed of two parts: main event code
126  * in BIT[0,15] and subevent code in BIT[16,23]. For example, qox_tx_cpl is
127  * a subevent of 'Tx path QoS control' which for tuning the weight of Tx
128  * completion TLPs. See hisi_ptt.rst documentation for more information.
129  */
130 #define HISI_PTT_TUNE_QOS_TX_CPL		(0x4 | (3 << 16))
131 #define HISI_PTT_TUNE_QOS_TX_NP			(0x4 | (4 << 16))
132 #define HISI_PTT_TUNE_QOS_TX_P			(0x4 | (5 << 16))
133 #define HISI_PTT_TUNE_RX_ALLOC_BUF_LEVEL	(0x5 | (6 << 16))
134 #define HISI_PTT_TUNE_TX_ALLOC_BUF_LEVEL	(0x5 | (7 << 16))
135 
136 HISI_PTT_TUNE_ATTR_COMMON(qos_tx_cpl, HISI_PTT_TUNE_QOS_TX_CPL);
137 HISI_PTT_TUNE_ATTR_COMMON(qos_tx_np, HISI_PTT_TUNE_QOS_TX_NP);
138 HISI_PTT_TUNE_ATTR_COMMON(qos_tx_p, HISI_PTT_TUNE_QOS_TX_P);
139 HISI_PTT_TUNE_ATTR_COMMON(rx_alloc_buf_level, HISI_PTT_TUNE_RX_ALLOC_BUF_LEVEL);
140 HISI_PTT_TUNE_ATTR_COMMON(tx_alloc_buf_level, HISI_PTT_TUNE_TX_ALLOC_BUF_LEVEL);
141 
142 static struct attribute *hisi_ptt_tune_attrs[] = {
143 	&hisi_ptt_qos_tx_cpl_attr.attr.attr,
144 	&hisi_ptt_qos_tx_np_attr.attr.attr,
145 	&hisi_ptt_qos_tx_p_attr.attr.attr,
146 	&hisi_ptt_rx_alloc_buf_level_attr.attr.attr,
147 	&hisi_ptt_tx_alloc_buf_level_attr.attr.attr,
148 	NULL,
149 };
150 
151 static struct attribute_group hisi_ptt_tune_group = {
152 	.name	= "tune",
153 	.attrs	= hisi_ptt_tune_attrs,
154 };
155 
156 static u16 hisi_ptt_get_filter_val(u16 devid, bool is_port)
157 {
158 	if (is_port)
159 		return BIT(HISI_PCIE_CORE_PORT_ID(devid & 0xff));
160 
161 	return devid;
162 }
163 
164 static bool hisi_ptt_wait_trace_hw_idle(struct hisi_ptt *hisi_ptt)
165 {
166 	u32 val;
167 
168 	return !readl_poll_timeout_atomic(hisi_ptt->iobase + HISI_PTT_TRACE_STS,
169 					  val, val & HISI_PTT_TRACE_IDLE,
170 					  HISI_PTT_WAIT_POLL_INTERVAL_US,
171 					  HISI_PTT_WAIT_TRACE_TIMEOUT_US);
172 }
173 
174 static void hisi_ptt_wait_dma_reset_done(struct hisi_ptt *hisi_ptt)
175 {
176 	u32 val;
177 
178 	readl_poll_timeout_atomic(hisi_ptt->iobase + HISI_PTT_TRACE_WR_STS,
179 				  val, !val, HISI_PTT_RESET_POLL_INTERVAL_US,
180 				  HISI_PTT_RESET_TIMEOUT_US);
181 }
182 
183 static void hisi_ptt_trace_end(struct hisi_ptt *hisi_ptt)
184 {
185 	writel(0, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
186 	hisi_ptt->trace_ctrl.started = false;
187 }
188 
189 static int hisi_ptt_trace_start(struct hisi_ptt *hisi_ptt)
190 {
191 	struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
192 	u32 val;
193 	int i;
194 
195 	/* Check device idle before start trace */
196 	if (!hisi_ptt_wait_trace_hw_idle(hisi_ptt)) {
197 		pci_err(hisi_ptt->pdev, "Failed to start trace, the device is still busy\n");
198 		return -EBUSY;
199 	}
200 
201 	ctrl->started = true;
202 
203 	/* Reset the DMA before start tracing */
204 	val = readl(hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
205 	val |= HISI_PTT_TRACE_CTRL_RST;
206 	writel(val, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
207 
208 	hisi_ptt_wait_dma_reset_done(hisi_ptt);
209 
210 	val = readl(hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
211 	val &= ~HISI_PTT_TRACE_CTRL_RST;
212 	writel(val, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
213 
214 	/* Reset the index of current buffer */
215 	hisi_ptt->trace_ctrl.buf_index = 0;
216 
217 	/* Zero the trace buffers */
218 	for (i = 0; i < HISI_PTT_TRACE_BUF_CNT; i++)
219 		memset(ctrl->trace_buf[i].addr, 0, HISI_PTT_TRACE_BUF_SIZE);
220 
221 	/* Clear the interrupt status */
222 	writel(HISI_PTT_TRACE_INT_STAT_MASK, hisi_ptt->iobase + HISI_PTT_TRACE_INT_STAT);
223 	writel(0, hisi_ptt->iobase + HISI_PTT_TRACE_INT_MASK);
224 
225 	/* Set the trace control register */
226 	val = FIELD_PREP(HISI_PTT_TRACE_CTRL_TYPE_SEL, ctrl->type);
227 	val |= FIELD_PREP(HISI_PTT_TRACE_CTRL_RXTX_SEL, ctrl->direction);
228 	val |= FIELD_PREP(HISI_PTT_TRACE_CTRL_DATA_FORMAT, ctrl->format);
229 	val |= FIELD_PREP(HISI_PTT_TRACE_CTRL_TARGET_SEL, hisi_ptt->trace_ctrl.filter);
230 	if (!hisi_ptt->trace_ctrl.is_port)
231 		val |= HISI_PTT_TRACE_CTRL_FILTER_MODE;
232 
233 	/* Start the Trace */
234 	val |= HISI_PTT_TRACE_CTRL_EN;
235 	writel(val, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
236 
237 	return 0;
238 }
239 
240 static int hisi_ptt_update_aux(struct hisi_ptt *hisi_ptt, int index, bool stop)
241 {
242 	struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
243 	struct perf_output_handle *handle = &ctrl->handle;
244 	struct perf_event *event = handle->event;
245 	struct hisi_ptt_pmu_buf *buf;
246 	size_t size;
247 	void *addr;
248 
249 	buf = perf_get_aux(handle);
250 	if (!buf || !handle->size)
251 		return -EINVAL;
252 
253 	addr = ctrl->trace_buf[ctrl->buf_index].addr;
254 
255 	/*
256 	 * If we're going to stop, read the size of already traced data from
257 	 * HISI_PTT_TRACE_WR_STS. Otherwise we're coming from the interrupt,
258 	 * the data size is always HISI_PTT_TRACE_BUF_SIZE.
259 	 */
260 	if (stop) {
261 		u32 reg;
262 
263 		reg = readl(hisi_ptt->iobase + HISI_PTT_TRACE_WR_STS);
264 		size = FIELD_GET(HISI_PTT_TRACE_WR_STS_WRITE, reg);
265 	} else {
266 		size = HISI_PTT_TRACE_BUF_SIZE;
267 	}
268 
269 	memcpy(buf->base + buf->pos, addr, size);
270 	buf->pos += size;
271 
272 	/*
273 	 * Just commit the traced data if we're going to stop. Otherwise if the
274 	 * resident AUX buffer cannot contain the data of next trace buffer,
275 	 * apply a new one.
276 	 */
277 	if (stop) {
278 		perf_aux_output_end(handle, buf->pos);
279 	} else if (buf->length - buf->pos < HISI_PTT_TRACE_BUF_SIZE) {
280 		perf_aux_output_end(handle, buf->pos);
281 
282 		buf = perf_aux_output_begin(handle, event);
283 		if (!buf)
284 			return -EINVAL;
285 
286 		buf->pos = handle->head % buf->length;
287 		if (buf->length - buf->pos < HISI_PTT_TRACE_BUF_SIZE) {
288 			perf_aux_output_end(handle, 0);
289 			return -EINVAL;
290 		}
291 	}
292 
293 	return 0;
294 }
295 
296 static irqreturn_t hisi_ptt_isr(int irq, void *context)
297 {
298 	struct hisi_ptt *hisi_ptt = context;
299 	u32 status, buf_idx;
300 
301 	status = readl(hisi_ptt->iobase + HISI_PTT_TRACE_INT_STAT);
302 	if (!(status & HISI_PTT_TRACE_INT_STAT_MASK))
303 		return IRQ_NONE;
304 
305 	buf_idx = ffs(status) - 1;
306 
307 	/* Clear the interrupt status of buffer @buf_idx */
308 	writel(status, hisi_ptt->iobase + HISI_PTT_TRACE_INT_STAT);
309 
310 	/*
311 	 * Update the AUX buffer and cache the current buffer index,
312 	 * as we need to know this and save the data when the trace
313 	 * is ended out of the interrupt handler. End the trace
314 	 * if the updating fails.
315 	 */
316 	if (hisi_ptt_update_aux(hisi_ptt, buf_idx, false))
317 		hisi_ptt_trace_end(hisi_ptt);
318 	else
319 		hisi_ptt->trace_ctrl.buf_index = (buf_idx + 1) % HISI_PTT_TRACE_BUF_CNT;
320 
321 	return IRQ_HANDLED;
322 }
323 
324 static void hisi_ptt_irq_free_vectors(void *pdev)
325 {
326 	pci_free_irq_vectors(pdev);
327 }
328 
329 static int hisi_ptt_register_irq(struct hisi_ptt *hisi_ptt)
330 {
331 	struct pci_dev *pdev = hisi_ptt->pdev;
332 	int ret;
333 
334 	ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
335 	if (ret < 0) {
336 		pci_err(pdev, "failed to allocate irq vector, ret = %d\n", ret);
337 		return ret;
338 	}
339 
340 	ret = devm_add_action_or_reset(&pdev->dev, hisi_ptt_irq_free_vectors, pdev);
341 	if (ret < 0)
342 		return ret;
343 
344 	hisi_ptt->trace_irq = pci_irq_vector(pdev, HISI_PTT_TRACE_DMA_IRQ);
345 	ret = devm_request_irq(&pdev->dev, hisi_ptt->trace_irq, hisi_ptt_isr,
346 				IRQF_NOBALANCING | IRQF_NO_THREAD, DRV_NAME,
347 				hisi_ptt);
348 	if (ret) {
349 		pci_err(pdev, "failed to request irq %d, ret = %d\n",
350 			hisi_ptt->trace_irq, ret);
351 		return ret;
352 	}
353 
354 	return 0;
355 }
356 
357 static void hisi_ptt_del_free_filter(struct hisi_ptt *hisi_ptt,
358 				      struct hisi_ptt_filter_desc *filter)
359 {
360 	if (filter->is_port)
361 		hisi_ptt->port_mask &= ~hisi_ptt_get_filter_val(filter->devid, true);
362 
363 	list_del(&filter->list);
364 	kfree(filter->name);
365 	kfree(filter);
366 }
367 
368 static struct hisi_ptt_filter_desc *
369 hisi_ptt_alloc_add_filter(struct hisi_ptt *hisi_ptt, u16 devid, bool is_port)
370 {
371 	struct hisi_ptt_filter_desc *filter;
372 	u8 devfn = devid & 0xff;
373 	char *filter_name;
374 
375 	filter_name = kasprintf(GFP_KERNEL, "%04x:%02x:%02x.%d", pci_domain_nr(hisi_ptt->pdev->bus),
376 				 PCI_BUS_NUM(devid), PCI_SLOT(devfn), PCI_FUNC(devfn));
377 	if (!filter_name) {
378 		pci_err(hisi_ptt->pdev, "failed to allocate name for filter %04x:%02x:%02x.%d\n",
379 			pci_domain_nr(hisi_ptt->pdev->bus), PCI_BUS_NUM(devid),
380 			PCI_SLOT(devfn), PCI_FUNC(devfn));
381 		return NULL;
382 	}
383 
384 	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
385 	if (!filter) {
386 		pci_err(hisi_ptt->pdev, "failed to add filter for %s\n",
387 			filter_name);
388 		kfree(filter_name);
389 		return NULL;
390 	}
391 
392 	filter->name = filter_name;
393 	filter->is_port = is_port;
394 	filter->devid = devid;
395 
396 	if (filter->is_port) {
397 		list_add_tail(&filter->list, &hisi_ptt->port_filters);
398 
399 		/* Update the available port mask */
400 		hisi_ptt->port_mask |= hisi_ptt_get_filter_val(filter->devid, true);
401 	} else {
402 		list_add_tail(&filter->list, &hisi_ptt->req_filters);
403 	}
404 
405 	return filter;
406 }
407 
408 static ssize_t hisi_ptt_filter_show(struct device *dev, struct device_attribute *attr,
409 				    char *buf)
410 {
411 	struct hisi_ptt_filter_desc *filter;
412 	unsigned long filter_val;
413 
414 	filter = container_of(attr, struct hisi_ptt_filter_desc, attr);
415 	filter_val = hisi_ptt_get_filter_val(filter->devid, filter->is_port) |
416 		     (filter->is_port ? HISI_PTT_PMU_FILTER_IS_PORT : 0);
417 
418 	return sysfs_emit(buf, "0x%05lx\n", filter_val);
419 }
420 
421 static int hisi_ptt_create_rp_filter_attr(struct hisi_ptt *hisi_ptt,
422 					  struct hisi_ptt_filter_desc *filter)
423 {
424 	struct kobject *kobj = &hisi_ptt->hisi_ptt_pmu.dev->kobj;
425 
426 	sysfs_attr_init(&filter->attr.attr);
427 	filter->attr.attr.name = filter->name;
428 	filter->attr.attr.mode = 0400; /* DEVICE_ATTR_ADMIN_RO */
429 	filter->attr.show = hisi_ptt_filter_show;
430 
431 	return sysfs_add_file_to_group(kobj, &filter->attr.attr,
432 				       HISI_PTT_RP_FILTERS_GRP_NAME);
433 }
434 
435 static void hisi_ptt_remove_rp_filter_attr(struct hisi_ptt *hisi_ptt,
436 					  struct hisi_ptt_filter_desc *filter)
437 {
438 	struct kobject *kobj = &hisi_ptt->hisi_ptt_pmu.dev->kobj;
439 
440 	sysfs_remove_file_from_group(kobj, &filter->attr.attr,
441 				     HISI_PTT_RP_FILTERS_GRP_NAME);
442 }
443 
444 static int hisi_ptt_create_req_filter_attr(struct hisi_ptt *hisi_ptt,
445 					   struct hisi_ptt_filter_desc *filter)
446 {
447 	struct kobject *kobj = &hisi_ptt->hisi_ptt_pmu.dev->kobj;
448 
449 	sysfs_attr_init(&filter->attr.attr);
450 	filter->attr.attr.name = filter->name;
451 	filter->attr.attr.mode = 0400; /* DEVICE_ATTR_ADMIN_RO */
452 	filter->attr.show = hisi_ptt_filter_show;
453 
454 	return sysfs_add_file_to_group(kobj, &filter->attr.attr,
455 				       HISI_PTT_REQ_FILTERS_GRP_NAME);
456 }
457 
458 static void hisi_ptt_remove_req_filter_attr(struct hisi_ptt *hisi_ptt,
459 					   struct hisi_ptt_filter_desc *filter)
460 {
461 	struct kobject *kobj = &hisi_ptt->hisi_ptt_pmu.dev->kobj;
462 
463 	sysfs_remove_file_from_group(kobj, &filter->attr.attr,
464 				     HISI_PTT_REQ_FILTERS_GRP_NAME);
465 }
466 
467 static int hisi_ptt_create_filter_attr(struct hisi_ptt *hisi_ptt,
468 				       struct hisi_ptt_filter_desc *filter)
469 {
470 	int ret;
471 
472 	if (filter->is_port)
473 		ret = hisi_ptt_create_rp_filter_attr(hisi_ptt, filter);
474 	else
475 		ret = hisi_ptt_create_req_filter_attr(hisi_ptt, filter);
476 
477 	if (ret)
478 		pci_err(hisi_ptt->pdev, "failed to create sysfs attribute for filter %s\n",
479 			filter->name);
480 
481 	return ret;
482 }
483 
484 static void hisi_ptt_remove_filter_attr(struct hisi_ptt *hisi_ptt,
485 					struct hisi_ptt_filter_desc *filter)
486 {
487 	if (filter->is_port)
488 		hisi_ptt_remove_rp_filter_attr(hisi_ptt, filter);
489 	else
490 		hisi_ptt_remove_req_filter_attr(hisi_ptt, filter);
491 }
492 
493 static void hisi_ptt_remove_all_filter_attributes(void *data)
494 {
495 	struct hisi_ptt_filter_desc *filter;
496 	struct hisi_ptt *hisi_ptt = data;
497 
498 	mutex_lock(&hisi_ptt->filter_lock);
499 
500 	list_for_each_entry(filter, &hisi_ptt->req_filters, list)
501 		hisi_ptt_remove_filter_attr(hisi_ptt, filter);
502 
503 	list_for_each_entry(filter, &hisi_ptt->port_filters, list)
504 		hisi_ptt_remove_filter_attr(hisi_ptt, filter);
505 
506 	hisi_ptt->sysfs_inited = false;
507 	mutex_unlock(&hisi_ptt->filter_lock);
508 }
509 
510 static int hisi_ptt_init_filter_attributes(struct hisi_ptt *hisi_ptt)
511 {
512 	struct hisi_ptt_filter_desc *filter;
513 	int ret;
514 
515 	mutex_lock(&hisi_ptt->filter_lock);
516 
517 	/*
518 	 * Register the reset callback in the first stage. In reset we traverse
519 	 * the filters list to remove the sysfs attributes so the callback can
520 	 * be called safely even without below filter attributes creation.
521 	 */
522 	ret = devm_add_action(&hisi_ptt->pdev->dev,
523 			      hisi_ptt_remove_all_filter_attributes,
524 			      hisi_ptt);
525 	if (ret)
526 		goto out;
527 
528 	list_for_each_entry(filter, &hisi_ptt->port_filters, list) {
529 		ret = hisi_ptt_create_filter_attr(hisi_ptt, filter);
530 		if (ret)
531 			goto out;
532 	}
533 
534 	list_for_each_entry(filter, &hisi_ptt->req_filters, list) {
535 		ret = hisi_ptt_create_filter_attr(hisi_ptt, filter);
536 		if (ret)
537 			goto out;
538 	}
539 
540 	hisi_ptt->sysfs_inited = true;
541 out:
542 	mutex_unlock(&hisi_ptt->filter_lock);
543 	return ret;
544 }
545 
546 static void hisi_ptt_update_filters(struct work_struct *work)
547 {
548 	struct delayed_work *delayed_work = to_delayed_work(work);
549 	struct hisi_ptt_filter_update_info info;
550 	struct hisi_ptt_filter_desc *filter;
551 	struct hisi_ptt *hisi_ptt;
552 
553 	hisi_ptt = container_of(delayed_work, struct hisi_ptt, work);
554 
555 	if (!mutex_trylock(&hisi_ptt->filter_lock)) {
556 		schedule_delayed_work(&hisi_ptt->work, HISI_PTT_WORK_DELAY_MS);
557 		return;
558 	}
559 
560 	while (kfifo_get(&hisi_ptt->filter_update_kfifo, &info)) {
561 		if (info.is_add) {
562 			/*
563 			 * Notify the users if failed to add this filter, others
564 			 * still work and available. See the comments in
565 			 * hisi_ptt_init_filters().
566 			 */
567 			filter = hisi_ptt_alloc_add_filter(hisi_ptt, info.devid, info.is_port);
568 			if (!filter)
569 				continue;
570 
571 			/*
572 			 * If filters' sysfs entries hasn't been initialized,
573 			 * then we're still at probe stage. Add the filters to
574 			 * the list and later hisi_ptt_init_filter_attributes()
575 			 * will create sysfs attributes for all the filters.
576 			 */
577 			if (hisi_ptt->sysfs_inited &&
578 			    hisi_ptt_create_filter_attr(hisi_ptt, filter)) {
579 				hisi_ptt_del_free_filter(hisi_ptt, filter);
580 				continue;
581 			}
582 		} else {
583 			struct hisi_ptt_filter_desc *tmp;
584 			struct list_head *target_list;
585 
586 			target_list = info.is_port ? &hisi_ptt->port_filters :
587 				      &hisi_ptt->req_filters;
588 
589 			list_for_each_entry_safe(filter, tmp, target_list, list)
590 				if (filter->devid == info.devid) {
591 					if (hisi_ptt->sysfs_inited)
592 						hisi_ptt_remove_filter_attr(hisi_ptt, filter);
593 
594 					hisi_ptt_del_free_filter(hisi_ptt, filter);
595 					break;
596 				}
597 		}
598 	}
599 
600 	mutex_unlock(&hisi_ptt->filter_lock);
601 }
602 
603 /*
604  * A PCI bus notifier is used here for dynamically updating the filter
605  * list.
606  */
607 static int hisi_ptt_notifier_call(struct notifier_block *nb, unsigned long action,
608 				  void *data)
609 {
610 	struct hisi_ptt *hisi_ptt = container_of(nb, struct hisi_ptt, hisi_ptt_nb);
611 	struct hisi_ptt_filter_update_info info;
612 	struct pci_dev *pdev, *root_port;
613 	struct device *dev = data;
614 	u32 port_devid;
615 
616 	pdev = to_pci_dev(dev);
617 	root_port = pcie_find_root_port(pdev);
618 	if (!root_port)
619 		return 0;
620 
621 	port_devid = pci_dev_id(root_port);
622 	if (port_devid < hisi_ptt->lower_bdf ||
623 	    port_devid > hisi_ptt->upper_bdf)
624 		return 0;
625 
626 	info.is_port = pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT;
627 	info.devid = pci_dev_id(pdev);
628 
629 	switch (action) {
630 	case BUS_NOTIFY_ADD_DEVICE:
631 		info.is_add = true;
632 		break;
633 	case BUS_NOTIFY_DEL_DEVICE:
634 		info.is_add = false;
635 		break;
636 	default:
637 		return 0;
638 	}
639 
640 	/*
641 	 * The FIFO size is 16 which is sufficient for almost all the cases,
642 	 * since each PCIe core will have most 8 Root Ports (typically only
643 	 * 1~4 Root Ports). On failure log the failed filter and let user
644 	 * handle it.
645 	 */
646 	if (kfifo_in_spinlocked(&hisi_ptt->filter_update_kfifo, &info, 1,
647 				&hisi_ptt->filter_update_lock))
648 		schedule_delayed_work(&hisi_ptt->work, 0);
649 	else
650 		pci_warn(hisi_ptt->pdev,
651 			 "filter update fifo overflow for target %s\n",
652 			 pci_name(pdev));
653 
654 	return 0;
655 }
656 
657 static int hisi_ptt_init_filters(struct pci_dev *pdev, void *data)
658 {
659 	struct pci_dev *root_port = pcie_find_root_port(pdev);
660 	struct hisi_ptt_filter_desc *filter;
661 	struct hisi_ptt *hisi_ptt = data;
662 	u32 port_devid;
663 
664 	if (!root_port)
665 		return 0;
666 
667 	port_devid = pci_dev_id(root_port);
668 	if (port_devid < hisi_ptt->lower_bdf ||
669 	    port_devid > hisi_ptt->upper_bdf)
670 		return 0;
671 
672 	/*
673 	 * We won't fail the probe if filter allocation failed here. The filters
674 	 * should be partial initialized and users would know which filter fails
675 	 * through the log. Other functions of PTT device are still available.
676 	 */
677 	filter = hisi_ptt_alloc_add_filter(hisi_ptt, pci_dev_id(pdev),
678 					    pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT);
679 	if (!filter)
680 		return -ENOMEM;
681 
682 	return 0;
683 }
684 
685 static void hisi_ptt_release_filters(void *data)
686 {
687 	struct hisi_ptt_filter_desc *filter, *tmp;
688 	struct hisi_ptt *hisi_ptt = data;
689 
690 	list_for_each_entry_safe(filter, tmp, &hisi_ptt->req_filters, list)
691 		hisi_ptt_del_free_filter(hisi_ptt, filter);
692 
693 	list_for_each_entry_safe(filter, tmp, &hisi_ptt->port_filters, list)
694 		hisi_ptt_del_free_filter(hisi_ptt, filter);
695 }
696 
697 static int hisi_ptt_config_trace_buf(struct hisi_ptt *hisi_ptt)
698 {
699 	struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
700 	struct device *dev = &hisi_ptt->pdev->dev;
701 	int i;
702 
703 	ctrl->trace_buf = devm_kcalloc(dev, HISI_PTT_TRACE_BUF_CNT,
704 				       sizeof(*ctrl->trace_buf), GFP_KERNEL);
705 	if (!ctrl->trace_buf)
706 		return -ENOMEM;
707 
708 	for (i = 0; i < HISI_PTT_TRACE_BUF_CNT; ++i) {
709 		ctrl->trace_buf[i].addr = dmam_alloc_coherent(dev, HISI_PTT_TRACE_BUF_SIZE,
710 							     &ctrl->trace_buf[i].dma,
711 							     GFP_KERNEL);
712 		if (!ctrl->trace_buf[i].addr)
713 			return -ENOMEM;
714 	}
715 
716 	/* Configure the trace DMA buffer */
717 	for (i = 0; i < HISI_PTT_TRACE_BUF_CNT; i++) {
718 		writel(lower_32_bits(ctrl->trace_buf[i].dma),
719 		       hisi_ptt->iobase + HISI_PTT_TRACE_ADDR_BASE_LO_0 +
720 		       i * HISI_PTT_TRACE_ADDR_STRIDE);
721 		writel(upper_32_bits(ctrl->trace_buf[i].dma),
722 		       hisi_ptt->iobase + HISI_PTT_TRACE_ADDR_BASE_HI_0 +
723 		       i * HISI_PTT_TRACE_ADDR_STRIDE);
724 	}
725 	writel(HISI_PTT_TRACE_BUF_SIZE, hisi_ptt->iobase + HISI_PTT_TRACE_ADDR_SIZE);
726 
727 	return 0;
728 }
729 
730 static int hisi_ptt_init_ctrls(struct hisi_ptt *hisi_ptt)
731 {
732 	struct pci_dev *pdev = hisi_ptt->pdev;
733 	struct pci_bus *bus;
734 	int ret;
735 	u32 reg;
736 
737 	INIT_DELAYED_WORK(&hisi_ptt->work, hisi_ptt_update_filters);
738 	INIT_KFIFO(hisi_ptt->filter_update_kfifo);
739 	spin_lock_init(&hisi_ptt->filter_update_lock);
740 
741 	INIT_LIST_HEAD(&hisi_ptt->port_filters);
742 	INIT_LIST_HEAD(&hisi_ptt->req_filters);
743 	mutex_init(&hisi_ptt->filter_lock);
744 
745 	ret = hisi_ptt_config_trace_buf(hisi_ptt);
746 	if (ret)
747 		return ret;
748 
749 	/*
750 	 * The device range register provides the information about the root
751 	 * ports which the RCiEP can control and trace. The RCiEP and the root
752 	 * ports which it supports are on the same PCIe core, with same domain
753 	 * number but maybe different bus number. The device range register
754 	 * will tell us which root ports we can support, Bit[31:16] indicates
755 	 * the upper BDF numbers of the root port, while Bit[15:0] indicates
756 	 * the lower.
757 	 */
758 	reg = readl(hisi_ptt->iobase + HISI_PTT_DEVICE_RANGE);
759 	hisi_ptt->upper_bdf = FIELD_GET(HISI_PTT_DEVICE_RANGE_UPPER, reg);
760 	hisi_ptt->lower_bdf = FIELD_GET(HISI_PTT_DEVICE_RANGE_LOWER, reg);
761 
762 	bus = pci_find_bus(pci_domain_nr(pdev->bus), PCI_BUS_NUM(hisi_ptt->upper_bdf));
763 	if (bus)
764 		pci_walk_bus(bus, hisi_ptt_init_filters, hisi_ptt);
765 
766 	ret = devm_add_action_or_reset(&pdev->dev, hisi_ptt_release_filters, hisi_ptt);
767 	if (ret)
768 		return ret;
769 
770 	hisi_ptt->trace_ctrl.on_cpu = -1;
771 	return 0;
772 }
773 
774 static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
775 			    char *buf)
776 {
777 	struct hisi_ptt *hisi_ptt = to_hisi_ptt(dev_get_drvdata(dev));
778 	const cpumask_t *cpumask = cpumask_of_node(dev_to_node(&hisi_ptt->pdev->dev));
779 
780 	return cpumap_print_to_pagebuf(true, buf, cpumask);
781 }
782 static DEVICE_ATTR_RO(cpumask);
783 
784 static struct attribute *hisi_ptt_cpumask_attrs[] = {
785 	&dev_attr_cpumask.attr,
786 	NULL
787 };
788 
789 static const struct attribute_group hisi_ptt_cpumask_attr_group = {
790 	.attrs = hisi_ptt_cpumask_attrs,
791 };
792 
793 /*
794  * Bit 19 indicates the filter type, 1 for Root Port filter and 0 for Requester
795  * filter. Bit[15:0] indicates the filter value, for Root Port filter it's
796  * a bit mask of desired ports and for Requester filter it's the Requester ID
797  * of the desired PCIe function. Bit[18:16] is reserved for extension.
798  *
799  * See hisi_ptt.rst documentation for detailed information.
800  */
801 PMU_FORMAT_ATTR(filter,		"config:0-19");
802 PMU_FORMAT_ATTR(direction,	"config:20-23");
803 PMU_FORMAT_ATTR(type,		"config:24-31");
804 PMU_FORMAT_ATTR(format,		"config:32-35");
805 
806 static struct attribute *hisi_ptt_pmu_format_attrs[] = {
807 	&format_attr_filter.attr,
808 	&format_attr_direction.attr,
809 	&format_attr_type.attr,
810 	&format_attr_format.attr,
811 	NULL
812 };
813 
814 static struct attribute_group hisi_ptt_pmu_format_group = {
815 	.name = "format",
816 	.attrs = hisi_ptt_pmu_format_attrs,
817 };
818 
819 static ssize_t hisi_ptt_filter_multiselect_show(struct device *dev,
820 						struct device_attribute *attr,
821 						char *buf)
822 {
823 	struct dev_ext_attribute *ext_attr;
824 
825 	ext_attr = container_of(attr, struct dev_ext_attribute, attr);
826 	return sysfs_emit(buf, "%s\n", (char *)ext_attr->var);
827 }
828 
829 static struct dev_ext_attribute root_port_filters_multiselect = {
830 	.attr = {
831 		.attr = { .name = "multiselect", .mode = 0400 },
832 		.show = hisi_ptt_filter_multiselect_show,
833 	},
834 	.var = "1",
835 };
836 
837 static struct attribute *hisi_ptt_pmu_root_ports_attrs[] = {
838 	&root_port_filters_multiselect.attr.attr,
839 	NULL
840 };
841 
842 static struct attribute_group hisi_ptt_pmu_root_ports_group = {
843 	.name = HISI_PTT_RP_FILTERS_GRP_NAME,
844 	.attrs = hisi_ptt_pmu_root_ports_attrs,
845 };
846 
847 static struct dev_ext_attribute requester_filters_multiselect = {
848 	.attr = {
849 		.attr = { .name = "multiselect", .mode = 0400 },
850 		.show = hisi_ptt_filter_multiselect_show,
851 	},
852 	.var = "0",
853 };
854 
855 static struct attribute *hisi_ptt_pmu_requesters_attrs[] = {
856 	&requester_filters_multiselect.attr.attr,
857 	NULL
858 };
859 
860 static struct attribute_group hisi_ptt_pmu_requesters_group = {
861 	.name = HISI_PTT_REQ_FILTERS_GRP_NAME,
862 	.attrs = hisi_ptt_pmu_requesters_attrs,
863 };
864 
865 static const struct attribute_group *hisi_ptt_pmu_groups[] = {
866 	&hisi_ptt_cpumask_attr_group,
867 	&hisi_ptt_pmu_format_group,
868 	&hisi_ptt_tune_group,
869 	&hisi_ptt_pmu_root_ports_group,
870 	&hisi_ptt_pmu_requesters_group,
871 	NULL
872 };
873 
874 static int hisi_ptt_trace_valid_direction(u32 val)
875 {
876 	/*
877 	 * The direction values have different effects according to the data
878 	 * format (specified in the parentheses). TLP set A/B means different
879 	 * set of TLP types. See hisi_ptt.rst documentation for more details.
880 	 */
881 	static const u32 hisi_ptt_trace_available_direction[] = {
882 		0,	/* inbound(4DW) or reserved(8DW) */
883 		1,	/* outbound(4DW) */
884 		2,	/* {in, out}bound(4DW) or inbound(8DW), TLP set A */
885 		3,	/* {in, out}bound(4DW) or inbound(8DW), TLP set B */
886 	};
887 	int i;
888 
889 	for (i = 0; i < ARRAY_SIZE(hisi_ptt_trace_available_direction); i++) {
890 		if (val == hisi_ptt_trace_available_direction[i])
891 			return 0;
892 	}
893 
894 	return -EINVAL;
895 }
896 
897 static int hisi_ptt_trace_valid_type(u32 val)
898 {
899 	/* Different types can be set simultaneously */
900 	static const u32 hisi_ptt_trace_available_type[] = {
901 		1,	/* posted_request */
902 		2,	/* non-posted_request */
903 		4,	/* completion */
904 	};
905 	int i;
906 
907 	if (!val)
908 		return -EINVAL;
909 
910 	/*
911 	 * Walk the available list and clear the valid bits of
912 	 * the config. If there is any resident bit after the
913 	 * walk then the config is invalid.
914 	 */
915 	for (i = 0; i < ARRAY_SIZE(hisi_ptt_trace_available_type); i++)
916 		val &= ~hisi_ptt_trace_available_type[i];
917 
918 	if (val)
919 		return -EINVAL;
920 
921 	return 0;
922 }
923 
924 static int hisi_ptt_trace_valid_format(u32 val)
925 {
926 	static const u32 hisi_ptt_trace_availble_format[] = {
927 		0,	/* 4DW */
928 		1,	/* 8DW */
929 	};
930 	int i;
931 
932 	for (i = 0; i < ARRAY_SIZE(hisi_ptt_trace_availble_format); i++) {
933 		if (val == hisi_ptt_trace_availble_format[i])
934 			return 0;
935 	}
936 
937 	return -EINVAL;
938 }
939 
940 static int hisi_ptt_trace_valid_filter(struct hisi_ptt *hisi_ptt, u64 config)
941 {
942 	unsigned long val, port_mask = hisi_ptt->port_mask;
943 	struct hisi_ptt_filter_desc *filter;
944 	int ret = 0;
945 
946 	hisi_ptt->trace_ctrl.is_port = FIELD_GET(HISI_PTT_PMU_FILTER_IS_PORT, config);
947 	val = FIELD_GET(HISI_PTT_PMU_FILTER_VAL_MASK, config);
948 
949 	/*
950 	 * Port filters are defined as bit mask. For port filters, check
951 	 * the bits in the @val are within the range of hisi_ptt->port_mask
952 	 * and whether it's empty or not, otherwise user has specified
953 	 * some unsupported root ports.
954 	 *
955 	 * For Requester ID filters, walk the available filter list to see
956 	 * whether we have one matched.
957 	 */
958 	mutex_lock(&hisi_ptt->filter_lock);
959 	if (!hisi_ptt->trace_ctrl.is_port) {
960 		list_for_each_entry(filter, &hisi_ptt->req_filters, list) {
961 			if (val == hisi_ptt_get_filter_val(filter->devid, filter->is_port))
962 				goto out;
963 		}
964 	} else if (bitmap_subset(&val, &port_mask, BITS_PER_LONG)) {
965 		goto out;
966 	}
967 
968 	ret = -EINVAL;
969 out:
970 	mutex_unlock(&hisi_ptt->filter_lock);
971 	return ret;
972 }
973 
974 static void hisi_ptt_pmu_init_configs(struct hisi_ptt *hisi_ptt, struct perf_event *event)
975 {
976 	struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
977 	u32 val;
978 
979 	val = FIELD_GET(HISI_PTT_PMU_FILTER_VAL_MASK, event->attr.config);
980 	hisi_ptt->trace_ctrl.filter = val;
981 
982 	val = FIELD_GET(HISI_PTT_PMU_DIRECTION_MASK, event->attr.config);
983 	ctrl->direction = val;
984 
985 	val = FIELD_GET(HISI_PTT_PMU_TYPE_MASK, event->attr.config);
986 	ctrl->type = val;
987 
988 	val = FIELD_GET(HISI_PTT_PMU_FORMAT_MASK, event->attr.config);
989 	ctrl->format = val;
990 }
991 
992 static int hisi_ptt_pmu_event_init(struct perf_event *event)
993 {
994 	struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
995 	int ret;
996 	u32 val;
997 
998 	if (event->attr.type != hisi_ptt->hisi_ptt_pmu.type)
999 		return -ENOENT;
1000 
1001 	if (event->cpu < 0) {
1002 		dev_dbg(event->pmu->dev, "Per-task mode not supported\n");
1003 		return -EOPNOTSUPP;
1004 	}
1005 
1006 	if (event->attach_state & PERF_ATTACH_TASK)
1007 		return -EOPNOTSUPP;
1008 
1009 	ret = hisi_ptt_trace_valid_filter(hisi_ptt, event->attr.config);
1010 	if (ret < 0)
1011 		return ret;
1012 
1013 	val = FIELD_GET(HISI_PTT_PMU_DIRECTION_MASK, event->attr.config);
1014 	ret = hisi_ptt_trace_valid_direction(val);
1015 	if (ret < 0)
1016 		return ret;
1017 
1018 	val = FIELD_GET(HISI_PTT_PMU_TYPE_MASK, event->attr.config);
1019 	ret = hisi_ptt_trace_valid_type(val);
1020 	if (ret < 0)
1021 		return ret;
1022 
1023 	val = FIELD_GET(HISI_PTT_PMU_FORMAT_MASK, event->attr.config);
1024 	return hisi_ptt_trace_valid_format(val);
1025 }
1026 
1027 static void *hisi_ptt_pmu_setup_aux(struct perf_event *event, void **pages,
1028 				    int nr_pages, bool overwrite)
1029 {
1030 	struct hisi_ptt_pmu_buf *buf;
1031 	struct page **pagelist;
1032 	int i;
1033 
1034 	if (overwrite) {
1035 		dev_warn(event->pmu->dev, "Overwrite mode is not supported\n");
1036 		return NULL;
1037 	}
1038 
1039 	/* If the pages size less than buffers, we cannot start trace */
1040 	if (nr_pages < HISI_PTT_TRACE_TOTAL_BUF_SIZE / PAGE_SIZE)
1041 		return NULL;
1042 
1043 	buf = kzalloc(sizeof(*buf), GFP_KERNEL);
1044 	if (!buf)
1045 		return NULL;
1046 
1047 	pagelist = kcalloc(nr_pages, sizeof(*pagelist), GFP_KERNEL);
1048 	if (!pagelist)
1049 		goto err;
1050 
1051 	for (i = 0; i < nr_pages; i++)
1052 		pagelist[i] = virt_to_page(pages[i]);
1053 
1054 	buf->base = vmap(pagelist, nr_pages, VM_MAP, PAGE_KERNEL);
1055 	if (!buf->base) {
1056 		kfree(pagelist);
1057 		goto err;
1058 	}
1059 
1060 	buf->nr_pages = nr_pages;
1061 	buf->length = nr_pages * PAGE_SIZE;
1062 	buf->pos = 0;
1063 
1064 	kfree(pagelist);
1065 	return buf;
1066 err:
1067 	kfree(buf);
1068 	return NULL;
1069 }
1070 
1071 static void hisi_ptt_pmu_free_aux(void *aux)
1072 {
1073 	struct hisi_ptt_pmu_buf *buf = aux;
1074 
1075 	vunmap(buf->base);
1076 	kfree(buf);
1077 }
1078 
1079 static void hisi_ptt_pmu_start(struct perf_event *event, int flags)
1080 {
1081 	struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
1082 	struct perf_output_handle *handle = &hisi_ptt->trace_ctrl.handle;
1083 	struct hw_perf_event *hwc = &event->hw;
1084 	struct device *dev = event->pmu->dev;
1085 	struct hisi_ptt_pmu_buf *buf;
1086 	int cpu = event->cpu;
1087 	int ret;
1088 
1089 	hwc->state = 0;
1090 
1091 	/* Serialize the perf process if user specified several CPUs */
1092 	spin_lock(&hisi_ptt->pmu_lock);
1093 	if (hisi_ptt->trace_ctrl.started) {
1094 		dev_dbg(dev, "trace has already started\n");
1095 		goto stop;
1096 	}
1097 
1098 	/*
1099 	 * Handle the interrupt on the same cpu which starts the trace to avoid
1100 	 * context mismatch. Otherwise we'll trigger the WARN from the perf
1101 	 * core in event_function_local(). If CPU passed is offline we'll fail
1102 	 * here, just log it since we can do nothing here.
1103 	 */
1104 	ret = irq_set_affinity(hisi_ptt->trace_irq, cpumask_of(cpu));
1105 	if (ret)
1106 		dev_warn(dev, "failed to set the affinity of trace interrupt\n");
1107 
1108 	hisi_ptt->trace_ctrl.on_cpu = cpu;
1109 
1110 	buf = perf_aux_output_begin(handle, event);
1111 	if (!buf) {
1112 		dev_dbg(dev, "aux output begin failed\n");
1113 		goto stop;
1114 	}
1115 
1116 	buf->pos = handle->head % buf->length;
1117 
1118 	hisi_ptt_pmu_init_configs(hisi_ptt, event);
1119 
1120 	ret = hisi_ptt_trace_start(hisi_ptt);
1121 	if (ret) {
1122 		dev_dbg(dev, "trace start failed, ret = %d\n", ret);
1123 		perf_aux_output_end(handle, 0);
1124 		goto stop;
1125 	}
1126 
1127 	spin_unlock(&hisi_ptt->pmu_lock);
1128 	return;
1129 stop:
1130 	event->hw.state |= PERF_HES_STOPPED;
1131 	spin_unlock(&hisi_ptt->pmu_lock);
1132 }
1133 
1134 static void hisi_ptt_pmu_stop(struct perf_event *event, int flags)
1135 {
1136 	struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
1137 	struct hw_perf_event *hwc = &event->hw;
1138 
1139 	if (hwc->state & PERF_HES_STOPPED)
1140 		return;
1141 
1142 	spin_lock(&hisi_ptt->pmu_lock);
1143 	if (hisi_ptt->trace_ctrl.started) {
1144 		hisi_ptt_trace_end(hisi_ptt);
1145 
1146 		if (!hisi_ptt_wait_trace_hw_idle(hisi_ptt))
1147 			dev_warn(event->pmu->dev, "Device is still busy\n");
1148 
1149 		hisi_ptt_update_aux(hisi_ptt, hisi_ptt->trace_ctrl.buf_index, true);
1150 	}
1151 	spin_unlock(&hisi_ptt->pmu_lock);
1152 
1153 	hwc->state |= PERF_HES_STOPPED;
1154 	perf_event_update_userpage(event);
1155 	hwc->state |= PERF_HES_UPTODATE;
1156 }
1157 
1158 static int hisi_ptt_pmu_add(struct perf_event *event, int flags)
1159 {
1160 	struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
1161 	struct hw_perf_event *hwc = &event->hw;
1162 	int cpu = event->cpu;
1163 
1164 	/* Only allow the cpus on the device's node to add the event */
1165 	if (!cpumask_test_cpu(cpu, cpumask_of_node(dev_to_node(&hisi_ptt->pdev->dev))))
1166 		return 0;
1167 
1168 	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
1169 
1170 	if (flags & PERF_EF_START) {
1171 		hisi_ptt_pmu_start(event, PERF_EF_RELOAD);
1172 		if (hwc->state & PERF_HES_STOPPED)
1173 			return -EINVAL;
1174 	}
1175 
1176 	return 0;
1177 }
1178 
1179 static void hisi_ptt_pmu_del(struct perf_event *event, int flags)
1180 {
1181 	hisi_ptt_pmu_stop(event, PERF_EF_UPDATE);
1182 }
1183 
1184 static void hisi_ptt_pmu_read(struct perf_event *event)
1185 {
1186 }
1187 
1188 static void hisi_ptt_remove_cpuhp_instance(void *hotplug_node)
1189 {
1190 	cpuhp_state_remove_instance_nocalls(hisi_ptt_pmu_online, hotplug_node);
1191 }
1192 
1193 static void hisi_ptt_unregister_pmu(void *pmu)
1194 {
1195 	perf_pmu_unregister(pmu);
1196 }
1197 
1198 static int hisi_ptt_register_pmu(struct hisi_ptt *hisi_ptt)
1199 {
1200 	u16 core_id, sicl_id;
1201 	char *pmu_name;
1202 	u32 reg;
1203 	int ret;
1204 
1205 	ret = cpuhp_state_add_instance_nocalls(hisi_ptt_pmu_online,
1206 					       &hisi_ptt->hotplug_node);
1207 	if (ret)
1208 		return ret;
1209 
1210 	ret = devm_add_action_or_reset(&hisi_ptt->pdev->dev,
1211 				       hisi_ptt_remove_cpuhp_instance,
1212 				       &hisi_ptt->hotplug_node);
1213 	if (ret)
1214 		return ret;
1215 
1216 	mutex_init(&hisi_ptt->tune_lock);
1217 	spin_lock_init(&hisi_ptt->pmu_lock);
1218 
1219 	hisi_ptt->hisi_ptt_pmu = (struct pmu) {
1220 		.module		= THIS_MODULE,
1221 		.capabilities	= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_NO_EXCLUDE,
1222 		.task_ctx_nr	= perf_sw_context,
1223 		.attr_groups	= hisi_ptt_pmu_groups,
1224 		.event_init	= hisi_ptt_pmu_event_init,
1225 		.setup_aux	= hisi_ptt_pmu_setup_aux,
1226 		.free_aux	= hisi_ptt_pmu_free_aux,
1227 		.start		= hisi_ptt_pmu_start,
1228 		.stop		= hisi_ptt_pmu_stop,
1229 		.add		= hisi_ptt_pmu_add,
1230 		.del		= hisi_ptt_pmu_del,
1231 		.read		= hisi_ptt_pmu_read,
1232 	};
1233 
1234 	reg = readl(hisi_ptt->iobase + HISI_PTT_LOCATION);
1235 	core_id = FIELD_GET(HISI_PTT_CORE_ID, reg);
1236 	sicl_id = FIELD_GET(HISI_PTT_SICL_ID, reg);
1237 
1238 	pmu_name = devm_kasprintf(&hisi_ptt->pdev->dev, GFP_KERNEL, "hisi_ptt%u_%u",
1239 				  sicl_id, core_id);
1240 	if (!pmu_name)
1241 		return -ENOMEM;
1242 
1243 	ret = perf_pmu_register(&hisi_ptt->hisi_ptt_pmu, pmu_name, -1);
1244 	if (ret)
1245 		return ret;
1246 
1247 	return devm_add_action_or_reset(&hisi_ptt->pdev->dev,
1248 					hisi_ptt_unregister_pmu,
1249 					&hisi_ptt->hisi_ptt_pmu);
1250 }
1251 
1252 static void hisi_ptt_unregister_filter_update_notifier(void *data)
1253 {
1254 	struct hisi_ptt *hisi_ptt = data;
1255 
1256 	bus_unregister_notifier(&pci_bus_type, &hisi_ptt->hisi_ptt_nb);
1257 
1258 	/* Cancel any work that has been queued */
1259 	cancel_delayed_work_sync(&hisi_ptt->work);
1260 }
1261 
1262 /* Register the bus notifier for dynamically updating the filter list */
1263 static int hisi_ptt_register_filter_update_notifier(struct hisi_ptt *hisi_ptt)
1264 {
1265 	int ret;
1266 
1267 	hisi_ptt->hisi_ptt_nb.notifier_call = hisi_ptt_notifier_call;
1268 	ret = bus_register_notifier(&pci_bus_type, &hisi_ptt->hisi_ptt_nb);
1269 	if (ret)
1270 		return ret;
1271 
1272 	return devm_add_action_or_reset(&hisi_ptt->pdev->dev,
1273 					hisi_ptt_unregister_filter_update_notifier,
1274 					hisi_ptt);
1275 }
1276 
1277 /*
1278  * The DMA of PTT trace can only use direct mappings due to some
1279  * hardware restriction. Check whether there is no IOMMU or the
1280  * policy of the IOMMU domain is passthrough, otherwise the trace
1281  * cannot work.
1282  *
1283  * The PTT device is supposed to behind an ARM SMMUv3, which
1284  * should have passthrough the device by a quirk.
1285  */
1286 static int hisi_ptt_check_iommu_mapping(struct pci_dev *pdev)
1287 {
1288 	struct iommu_domain *iommu_domain;
1289 
1290 	iommu_domain = iommu_get_domain_for_dev(&pdev->dev);
1291 	if (!iommu_domain || iommu_domain->type == IOMMU_DOMAIN_IDENTITY)
1292 		return 0;
1293 
1294 	return -EOPNOTSUPP;
1295 }
1296 
1297 static int hisi_ptt_probe(struct pci_dev *pdev,
1298 			  const struct pci_device_id *id)
1299 {
1300 	struct hisi_ptt *hisi_ptt;
1301 	int ret;
1302 
1303 	ret = hisi_ptt_check_iommu_mapping(pdev);
1304 	if (ret) {
1305 		pci_err(pdev, "requires direct DMA mappings\n");
1306 		return ret;
1307 	}
1308 
1309 	hisi_ptt = devm_kzalloc(&pdev->dev, sizeof(*hisi_ptt), GFP_KERNEL);
1310 	if (!hisi_ptt)
1311 		return -ENOMEM;
1312 
1313 	hisi_ptt->pdev = pdev;
1314 	pci_set_drvdata(pdev, hisi_ptt);
1315 
1316 	ret = pcim_enable_device(pdev);
1317 	if (ret) {
1318 		pci_err(pdev, "failed to enable device, ret = %d\n", ret);
1319 		return ret;
1320 	}
1321 
1322 	ret = pcim_iomap_regions(pdev, BIT(2), DRV_NAME);
1323 	if (ret) {
1324 		pci_err(pdev, "failed to remap io memory, ret = %d\n", ret);
1325 		return ret;
1326 	}
1327 
1328 	hisi_ptt->iobase = pcim_iomap_table(pdev)[2];
1329 
1330 	ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1331 	if (ret) {
1332 		pci_err(pdev, "failed to set 64 bit dma mask, ret = %d\n", ret);
1333 		return ret;
1334 	}
1335 
1336 	pci_set_master(pdev);
1337 
1338 	ret = hisi_ptt_register_irq(hisi_ptt);
1339 	if (ret)
1340 		return ret;
1341 
1342 	ret = hisi_ptt_init_ctrls(hisi_ptt);
1343 	if (ret) {
1344 		pci_err(pdev, "failed to init controls, ret = %d\n", ret);
1345 		return ret;
1346 	}
1347 
1348 	ret = hisi_ptt_register_filter_update_notifier(hisi_ptt);
1349 	if (ret)
1350 		pci_warn(pdev, "failed to register filter update notifier, ret = %d", ret);
1351 
1352 	ret = hisi_ptt_register_pmu(hisi_ptt);
1353 	if (ret) {
1354 		pci_err(pdev, "failed to register PMU device, ret = %d", ret);
1355 		return ret;
1356 	}
1357 
1358 	ret = hisi_ptt_init_filter_attributes(hisi_ptt);
1359 	if (ret) {
1360 		pci_err(pdev, "failed to init sysfs filter attributes, ret = %d", ret);
1361 		return ret;
1362 	}
1363 
1364 	return 0;
1365 }
1366 
1367 static const struct pci_device_id hisi_ptt_id_tbl[] = {
1368 	{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, 0xa12e) },
1369 	{ }
1370 };
1371 MODULE_DEVICE_TABLE(pci, hisi_ptt_id_tbl);
1372 
1373 static struct pci_driver hisi_ptt_driver = {
1374 	.name = DRV_NAME,
1375 	.id_table = hisi_ptt_id_tbl,
1376 	.probe = hisi_ptt_probe,
1377 };
1378 
1379 static int hisi_ptt_cpu_teardown(unsigned int cpu, struct hlist_node *node)
1380 {
1381 	struct hisi_ptt *hisi_ptt;
1382 	struct device *dev;
1383 	int target, src;
1384 
1385 	hisi_ptt = hlist_entry_safe(node, struct hisi_ptt, hotplug_node);
1386 	src = hisi_ptt->trace_ctrl.on_cpu;
1387 	dev = hisi_ptt->hisi_ptt_pmu.dev;
1388 
1389 	if (!hisi_ptt->trace_ctrl.started || src != cpu)
1390 		return 0;
1391 
1392 	target = cpumask_any_but(cpumask_of_node(dev_to_node(&hisi_ptt->pdev->dev)), cpu);
1393 	if (target >= nr_cpu_ids) {
1394 		dev_err(dev, "no available cpu for perf context migration\n");
1395 		return 0;
1396 	}
1397 
1398 	perf_pmu_migrate_context(&hisi_ptt->hisi_ptt_pmu, src, target);
1399 
1400 	/*
1401 	 * Also make sure the interrupt bind to the migrated CPU as well. Warn
1402 	 * the user on failure here.
1403 	 */
1404 	if (irq_set_affinity(hisi_ptt->trace_irq, cpumask_of(target)))
1405 		dev_warn(dev, "failed to set the affinity of trace interrupt\n");
1406 
1407 	hisi_ptt->trace_ctrl.on_cpu = target;
1408 	return 0;
1409 }
1410 
1411 static int __init hisi_ptt_init(void)
1412 {
1413 	int ret;
1414 
1415 	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, DRV_NAME, NULL,
1416 				      hisi_ptt_cpu_teardown);
1417 	if (ret < 0)
1418 		return ret;
1419 	hisi_ptt_pmu_online = ret;
1420 
1421 	ret = pci_register_driver(&hisi_ptt_driver);
1422 	if (ret)
1423 		cpuhp_remove_multi_state(hisi_ptt_pmu_online);
1424 
1425 	return ret;
1426 }
1427 module_init(hisi_ptt_init);
1428 
1429 static void __exit hisi_ptt_exit(void)
1430 {
1431 	pci_unregister_driver(&hisi_ptt_driver);
1432 	cpuhp_remove_multi_state(hisi_ptt_pmu_online);
1433 }
1434 module_exit(hisi_ptt_exit);
1435 
1436 MODULE_LICENSE("GPL");
1437 MODULE_AUTHOR("Yicong Yang <yangyicong@hisilicon.com>");
1438 MODULE_DESCRIPTION("Driver for HiSilicon PCIe tune and trace device");
1439