xref: /openbmc/linux/drivers/iommu/intel/perf.c (revision 4f2c0a4acffbec01079c28f839422e64ddeff004)
155ee5e67SLu Baolu // SPDX-License-Identifier: GPL-2.0
29ddc3482SAndy Shevchenko /*
355ee5e67SLu Baolu  * perf.c - performance monitor
455ee5e67SLu Baolu  *
555ee5e67SLu Baolu  * Copyright (C) 2021 Intel Corporation
655ee5e67SLu Baolu  *
755ee5e67SLu Baolu  * Author: Lu Baolu <baolu.lu@linux.intel.com>
855ee5e67SLu Baolu  *         Fenghua Yu <fenghua.yu@intel.com>
955ee5e67SLu Baolu  */
1055ee5e67SLu Baolu 
1155ee5e67SLu Baolu #include <linux/spinlock.h>
1255ee5e67SLu Baolu 
13*2585a279SLu Baolu #include "iommu.h"
1455ee5e67SLu Baolu #include "perf.h"
1555ee5e67SLu Baolu 
1655ee5e67SLu Baolu static DEFINE_SPINLOCK(latency_lock);
1755ee5e67SLu Baolu 
dmar_latency_enabled(struct intel_iommu * iommu,enum latency_type type)1855ee5e67SLu Baolu bool dmar_latency_enabled(struct intel_iommu *iommu, enum latency_type type)
1955ee5e67SLu Baolu {
2055ee5e67SLu Baolu 	struct latency_statistic *lstat = iommu->perf_statistic;
2155ee5e67SLu Baolu 
2255ee5e67SLu Baolu 	return lstat && lstat[type].enabled;
2355ee5e67SLu Baolu }
2455ee5e67SLu Baolu 
dmar_latency_enable(struct intel_iommu * iommu,enum latency_type type)2555ee5e67SLu Baolu int dmar_latency_enable(struct intel_iommu *iommu, enum latency_type type)
2655ee5e67SLu Baolu {
2755ee5e67SLu Baolu 	struct latency_statistic *lstat;
2855ee5e67SLu Baolu 	unsigned long flags;
2955ee5e67SLu Baolu 	int ret = -EBUSY;
3055ee5e67SLu Baolu 
3155ee5e67SLu Baolu 	if (dmar_latency_enabled(iommu, type))
3255ee5e67SLu Baolu 		return 0;
3355ee5e67SLu Baolu 
3455ee5e67SLu Baolu 	spin_lock_irqsave(&latency_lock, flags);
3555ee5e67SLu Baolu 	if (!iommu->perf_statistic) {
3655ee5e67SLu Baolu 		iommu->perf_statistic = kzalloc(sizeof(*lstat) * DMAR_LATENCY_NUM,
3755ee5e67SLu Baolu 						GFP_ATOMIC);
3855ee5e67SLu Baolu 		if (!iommu->perf_statistic) {
3955ee5e67SLu Baolu 			ret = -ENOMEM;
4055ee5e67SLu Baolu 			goto unlock_out;
4155ee5e67SLu Baolu 		}
4255ee5e67SLu Baolu 	}
4355ee5e67SLu Baolu 
4455ee5e67SLu Baolu 	lstat = iommu->perf_statistic;
4555ee5e67SLu Baolu 
4655ee5e67SLu Baolu 	if (!lstat[type].enabled) {
4755ee5e67SLu Baolu 		lstat[type].enabled = true;
4855ee5e67SLu Baolu 		lstat[type].counter[COUNTS_MIN] = UINT_MAX;
4955ee5e67SLu Baolu 		ret = 0;
5055ee5e67SLu Baolu 	}
5155ee5e67SLu Baolu unlock_out:
5255ee5e67SLu Baolu 	spin_unlock_irqrestore(&latency_lock, flags);
5355ee5e67SLu Baolu 
5455ee5e67SLu Baolu 	return ret;
5555ee5e67SLu Baolu }
5655ee5e67SLu Baolu 
dmar_latency_disable(struct intel_iommu * iommu,enum latency_type type)5755ee5e67SLu Baolu void dmar_latency_disable(struct intel_iommu *iommu, enum latency_type type)
5855ee5e67SLu Baolu {
5955ee5e67SLu Baolu 	struct latency_statistic *lstat = iommu->perf_statistic;
6055ee5e67SLu Baolu 	unsigned long flags;
6155ee5e67SLu Baolu 
6255ee5e67SLu Baolu 	if (!dmar_latency_enabled(iommu, type))
6355ee5e67SLu Baolu 		return;
6455ee5e67SLu Baolu 
6555ee5e67SLu Baolu 	spin_lock_irqsave(&latency_lock, flags);
6655ee5e67SLu Baolu 	memset(&lstat[type], 0, sizeof(*lstat) * DMAR_LATENCY_NUM);
6755ee5e67SLu Baolu 	spin_unlock_irqrestore(&latency_lock, flags);
6855ee5e67SLu Baolu }
6955ee5e67SLu Baolu 
dmar_latency_update(struct intel_iommu * iommu,enum latency_type type,u64 latency)7055ee5e67SLu Baolu void dmar_latency_update(struct intel_iommu *iommu, enum latency_type type, u64 latency)
7155ee5e67SLu Baolu {
7255ee5e67SLu Baolu 	struct latency_statistic *lstat = iommu->perf_statistic;
7355ee5e67SLu Baolu 	unsigned long flags;
7455ee5e67SLu Baolu 	u64 min, max;
7555ee5e67SLu Baolu 
7655ee5e67SLu Baolu 	if (!dmar_latency_enabled(iommu, type))
7755ee5e67SLu Baolu 		return;
7855ee5e67SLu Baolu 
7955ee5e67SLu Baolu 	spin_lock_irqsave(&latency_lock, flags);
8055ee5e67SLu Baolu 	if (latency < 100)
8155ee5e67SLu Baolu 		lstat[type].counter[COUNTS_10e2]++;
8255ee5e67SLu Baolu 	else if (latency < 1000)
8355ee5e67SLu Baolu 		lstat[type].counter[COUNTS_10e3]++;
8455ee5e67SLu Baolu 	else if (latency < 10000)
8555ee5e67SLu Baolu 		lstat[type].counter[COUNTS_10e4]++;
8655ee5e67SLu Baolu 	else if (latency < 100000)
8755ee5e67SLu Baolu 		lstat[type].counter[COUNTS_10e5]++;
8855ee5e67SLu Baolu 	else if (latency < 1000000)
8955ee5e67SLu Baolu 		lstat[type].counter[COUNTS_10e6]++;
9055ee5e67SLu Baolu 	else if (latency < 10000000)
9155ee5e67SLu Baolu 		lstat[type].counter[COUNTS_10e7]++;
9255ee5e67SLu Baolu 	else
9355ee5e67SLu Baolu 		lstat[type].counter[COUNTS_10e8_plus]++;
9455ee5e67SLu Baolu 
9555ee5e67SLu Baolu 	min = lstat[type].counter[COUNTS_MIN];
9655ee5e67SLu Baolu 	max = lstat[type].counter[COUNTS_MAX];
9755ee5e67SLu Baolu 	lstat[type].counter[COUNTS_MIN] = min_t(u64, min, latency);
9855ee5e67SLu Baolu 	lstat[type].counter[COUNTS_MAX] = max_t(u64, max, latency);
9955ee5e67SLu Baolu 	lstat[type].counter[COUNTS_SUM] += latency;
10055ee5e67SLu Baolu 	lstat[type].samples++;
10155ee5e67SLu Baolu 	spin_unlock_irqrestore(&latency_lock, flags);
10255ee5e67SLu Baolu }
10355ee5e67SLu Baolu 
10455ee5e67SLu Baolu static char *latency_counter_names[] = {
10555ee5e67SLu Baolu 	"                  <0.1us",
10655ee5e67SLu Baolu 	"   0.1us-1us", "    1us-10us", "  10us-100us",
10755ee5e67SLu Baolu 	"   100us-1ms", "    1ms-10ms", "      >=10ms",
10855ee5e67SLu Baolu 	"     min(us)", "     max(us)", " average(us)"
10955ee5e67SLu Baolu };
11055ee5e67SLu Baolu 
11155ee5e67SLu Baolu static char *latency_type_names[] = {
11255ee5e67SLu Baolu 	"   inv_iotlb", "  inv_devtlb", "     inv_iec",
11355ee5e67SLu Baolu 	"     svm_prq"
11455ee5e67SLu Baolu };
11555ee5e67SLu Baolu 
dmar_latency_snapshot(struct intel_iommu * iommu,char * str,size_t size)11655ee5e67SLu Baolu int dmar_latency_snapshot(struct intel_iommu *iommu, char *str, size_t size)
11755ee5e67SLu Baolu {
11855ee5e67SLu Baolu 	struct latency_statistic *lstat = iommu->perf_statistic;
11955ee5e67SLu Baolu 	unsigned long flags;
12055ee5e67SLu Baolu 	int bytes = 0, i, j;
12155ee5e67SLu Baolu 
12255ee5e67SLu Baolu 	memset(str, 0, size);
12355ee5e67SLu Baolu 
12455ee5e67SLu Baolu 	for (i = 0; i < COUNTS_NUM; i++)
12555ee5e67SLu Baolu 		bytes += snprintf(str + bytes, size - bytes,
12655ee5e67SLu Baolu 				  "%s", latency_counter_names[i]);
12755ee5e67SLu Baolu 
12855ee5e67SLu Baolu 	spin_lock_irqsave(&latency_lock, flags);
12955ee5e67SLu Baolu 	for (i = 0; i < DMAR_LATENCY_NUM; i++) {
13055ee5e67SLu Baolu 		if (!dmar_latency_enabled(iommu, i))
13155ee5e67SLu Baolu 			continue;
13255ee5e67SLu Baolu 
13355ee5e67SLu Baolu 		bytes += snprintf(str + bytes, size - bytes,
13455ee5e67SLu Baolu 				  "\n%s", latency_type_names[i]);
13555ee5e67SLu Baolu 
13655ee5e67SLu Baolu 		for (j = 0; j < COUNTS_NUM; j++) {
13755ee5e67SLu Baolu 			u64 val = lstat[i].counter[j];
13855ee5e67SLu Baolu 
13955ee5e67SLu Baolu 			switch (j) {
14055ee5e67SLu Baolu 			case COUNTS_MIN:
14155ee5e67SLu Baolu 				if (val == UINT_MAX)
14255ee5e67SLu Baolu 					val = 0;
14355ee5e67SLu Baolu 				else
144d6a9642bSJoerg Roedel 					val = div_u64(val, 1000);
14555ee5e67SLu Baolu 				break;
14655ee5e67SLu Baolu 			case COUNTS_MAX:
147d6a9642bSJoerg Roedel 				val = div_u64(val, 1000);
14855ee5e67SLu Baolu 				break;
14955ee5e67SLu Baolu 			case COUNTS_SUM:
15055ee5e67SLu Baolu 				if (lstat[i].samples)
151d6a9642bSJoerg Roedel 					val = div_u64(val, (lstat[i].samples * 1000));
15255ee5e67SLu Baolu 				else
15355ee5e67SLu Baolu 					val = 0;
15455ee5e67SLu Baolu 				break;
15555ee5e67SLu Baolu 			default:
15655ee5e67SLu Baolu 				break;
15755ee5e67SLu Baolu 			}
15855ee5e67SLu Baolu 
15955ee5e67SLu Baolu 			bytes += snprintf(str + bytes, size - bytes,
16055ee5e67SLu Baolu 					  "%12lld", val);
16155ee5e67SLu Baolu 		}
16255ee5e67SLu Baolu 	}
16355ee5e67SLu Baolu 	spin_unlock_irqrestore(&latency_lock, flags);
16455ee5e67SLu Baolu 
16555ee5e67SLu Baolu 	return bytes;
16655ee5e67SLu Baolu }
167