1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Virtual Processor Dispatch Trace Log
4  *
5  * (C) Copyright IBM Corporation 2009
6  *
7  * Author: Jeremy Kerr <jk@ozlabs.org>
8  */
9 
10 #include <linux/slab.h>
11 #include <linux/spinlock.h>
12 #include <asm/smp.h>
13 #include <linux/uaccess.h>
14 #include <linux/debugfs.h>
15 #include <asm/firmware.h>
16 #include <asm/dtl.h>
17 #include <asm/lppaca.h>
18 #include <asm/plpar_wrappers.h>
19 #include <asm/machdep.h>
20 
21 #ifdef CONFIG_DTL
22 struct dtl {
23 	struct dtl_entry	*buf;
24 	int			cpu;
25 	int			buf_entries;
26 	u64			last_idx;
27 	spinlock_t		lock;
28 };
29 static DEFINE_PER_CPU(struct dtl, cpu_dtl);
30 
31 static u8 dtl_event_mask = DTL_LOG_ALL;
32 
33 
34 /*
35  * Size of per-cpu log buffers. Firmware requires that the buffer does
36  * not cross a 4k boundary.
37  */
38 static int dtl_buf_entries = N_DISPATCH_LOG;
39 
40 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
41 
42 /*
43  * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls
44  * reading from the dispatch trace log.  If other code wants to consume
45  * DTL entries, it can set this pointer to a function that will get
46  * called once for each DTL entry that gets processed.
47  */
48 static void (*dtl_consumer)(struct dtl_entry *entry, u64 index);
49 
50 struct dtl_ring {
51 	u64	write_index;
52 	struct dtl_entry *write_ptr;
53 	struct dtl_entry *buf;
54 	struct dtl_entry *buf_end;
55 };
56 
57 static DEFINE_PER_CPU(struct dtl_ring, dtl_rings);
58 
59 static atomic_t dtl_count;
60 
61 /*
62  * The cpu accounting code controls the DTL ring buffer, and we get
63  * given entries as they are processed.
64  */
65 static void consume_dtle(struct dtl_entry *dtle, u64 index)
66 {
67 	struct dtl_ring *dtlr = this_cpu_ptr(&dtl_rings);
68 	struct dtl_entry *wp = dtlr->write_ptr;
69 	struct lppaca *vpa = local_paca->lppaca_ptr;
70 
71 	if (!wp)
72 		return;
73 
74 	*wp = *dtle;
75 	barrier();
76 
77 	/* check for hypervisor ring buffer overflow, ignore this entry if so */
78 	if (index + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx))
79 		return;
80 
81 	++wp;
82 	if (wp == dtlr->buf_end)
83 		wp = dtlr->buf;
84 	dtlr->write_ptr = wp;
85 
86 	/* incrementing write_index makes the new entry visible */
87 	smp_wmb();
88 	++dtlr->write_index;
89 }
90 
91 static int dtl_start(struct dtl *dtl)
92 {
93 	struct dtl_ring *dtlr = &per_cpu(dtl_rings, dtl->cpu);
94 
95 	dtlr->buf = dtl->buf;
96 	dtlr->buf_end = dtl->buf + dtl->buf_entries;
97 	dtlr->write_index = 0;
98 
99 	/* setting write_ptr enables logging into our buffer */
100 	smp_wmb();
101 	dtlr->write_ptr = dtl->buf;
102 
103 	/* enable event logging */
104 	lppaca_of(dtl->cpu).dtl_enable_mask |= dtl_event_mask;
105 
106 	dtl_consumer = consume_dtle;
107 	atomic_inc(&dtl_count);
108 	return 0;
109 }
110 
111 static void dtl_stop(struct dtl *dtl)
112 {
113 	struct dtl_ring *dtlr = &per_cpu(dtl_rings, dtl->cpu);
114 
115 	dtlr->write_ptr = NULL;
116 	smp_wmb();
117 
118 	dtlr->buf = NULL;
119 
120 	/* restore dtl_enable_mask */
121 	lppaca_of(dtl->cpu).dtl_enable_mask = DTL_LOG_PREEMPT;
122 
123 	if (atomic_dec_and_test(&dtl_count))
124 		dtl_consumer = NULL;
125 }
126 
127 static u64 dtl_current_index(struct dtl *dtl)
128 {
129 	return per_cpu(dtl_rings, dtl->cpu).write_index;
130 }
131 
132 #else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
133 
134 static int dtl_start(struct dtl *dtl)
135 {
136 	unsigned long addr;
137 	int ret, hwcpu;
138 
139 	/* Register our dtl buffer with the hypervisor. The HV expects the
140 	 * buffer size to be passed in the second word of the buffer */
141 	((u32 *)dtl->buf)[1] = cpu_to_be32(DISPATCH_LOG_BYTES);
142 
143 	hwcpu = get_hard_smp_processor_id(dtl->cpu);
144 	addr = __pa(dtl->buf);
145 	ret = register_dtl(hwcpu, addr);
146 	if (ret) {
147 		printk(KERN_WARNING "%s: DTL registration for cpu %d (hw %d) "
148 		       "failed with %d\n", __func__, dtl->cpu, hwcpu, ret);
149 		return -EIO;
150 	}
151 
152 	/* set our initial buffer indices */
153 	lppaca_of(dtl->cpu).dtl_idx = 0;
154 
155 	/* ensure that our updates to the lppaca fields have occurred before
156 	 * we actually enable the logging */
157 	smp_wmb();
158 
159 	/* enable event logging */
160 	lppaca_of(dtl->cpu).dtl_enable_mask = dtl_event_mask;
161 
162 	return 0;
163 }
164 
165 static void dtl_stop(struct dtl *dtl)
166 {
167 	int hwcpu = get_hard_smp_processor_id(dtl->cpu);
168 
169 	lppaca_of(dtl->cpu).dtl_enable_mask = 0x0;
170 
171 	unregister_dtl(hwcpu);
172 }
173 
174 static u64 dtl_current_index(struct dtl *dtl)
175 {
176 	return be64_to_cpu(lppaca_of(dtl->cpu).dtl_idx);
177 }
178 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
179 
180 static int dtl_enable(struct dtl *dtl)
181 {
182 	long int n_entries;
183 	long int rc;
184 	struct dtl_entry *buf = NULL;
185 
186 	if (!dtl_cache)
187 		return -ENOMEM;
188 
189 	/* only allow one reader */
190 	if (dtl->buf)
191 		return -EBUSY;
192 
193 	/* ensure there are no other conflicting dtl users */
194 	if (!down_read_trylock(&dtl_access_lock))
195 		return -EBUSY;
196 
197 	n_entries = dtl_buf_entries;
198 	buf = kmem_cache_alloc_node(dtl_cache, GFP_KERNEL, cpu_to_node(dtl->cpu));
199 	if (!buf) {
200 		printk(KERN_WARNING "%s: buffer alloc failed for cpu %d\n",
201 				__func__, dtl->cpu);
202 		up_read(&dtl_access_lock);
203 		return -ENOMEM;
204 	}
205 
206 	spin_lock(&dtl->lock);
207 	rc = -EBUSY;
208 	if (!dtl->buf) {
209 		/* store the original allocation size for use during read */
210 		dtl->buf_entries = n_entries;
211 		dtl->buf = buf;
212 		dtl->last_idx = 0;
213 		rc = dtl_start(dtl);
214 		if (rc)
215 			dtl->buf = NULL;
216 	}
217 	spin_unlock(&dtl->lock);
218 
219 	if (rc) {
220 		up_read(&dtl_access_lock);
221 		kmem_cache_free(dtl_cache, buf);
222 	}
223 
224 	return rc;
225 }
226 
227 static void dtl_disable(struct dtl *dtl)
228 {
229 	spin_lock(&dtl->lock);
230 	dtl_stop(dtl);
231 	kmem_cache_free(dtl_cache, dtl->buf);
232 	dtl->buf = NULL;
233 	dtl->buf_entries = 0;
234 	spin_unlock(&dtl->lock);
235 	up_read(&dtl_access_lock);
236 }
237 
238 /* file interface */
239 
240 static int dtl_file_open(struct inode *inode, struct file *filp)
241 {
242 	struct dtl *dtl = inode->i_private;
243 	int rc;
244 
245 	rc = dtl_enable(dtl);
246 	if (rc)
247 		return rc;
248 
249 	filp->private_data = dtl;
250 	return 0;
251 }
252 
253 static int dtl_file_release(struct inode *inode, struct file *filp)
254 {
255 	struct dtl *dtl = inode->i_private;
256 	dtl_disable(dtl);
257 	return 0;
258 }
259 
260 static ssize_t dtl_file_read(struct file *filp, char __user *buf, size_t len,
261 		loff_t *pos)
262 {
263 	long int rc, n_read, n_req, read_size;
264 	struct dtl *dtl;
265 	u64 cur_idx, last_idx, i;
266 
267 	if ((len % sizeof(struct dtl_entry)) != 0)
268 		return -EINVAL;
269 
270 	dtl = filp->private_data;
271 
272 	/* requested number of entries to read */
273 	n_req = len / sizeof(struct dtl_entry);
274 
275 	/* actual number of entries read */
276 	n_read = 0;
277 
278 	spin_lock(&dtl->lock);
279 
280 	cur_idx = dtl_current_index(dtl);
281 	last_idx = dtl->last_idx;
282 
283 	if (last_idx + dtl->buf_entries <= cur_idx)
284 		last_idx = cur_idx - dtl->buf_entries + 1;
285 
286 	if (last_idx + n_req > cur_idx)
287 		n_req = cur_idx - last_idx;
288 
289 	if (n_req > 0)
290 		dtl->last_idx = last_idx + n_req;
291 
292 	spin_unlock(&dtl->lock);
293 
294 	if (n_req <= 0)
295 		return 0;
296 
297 	i = last_idx % dtl->buf_entries;
298 
299 	/* read the tail of the buffer if we've wrapped */
300 	if (i + n_req > dtl->buf_entries) {
301 		read_size = dtl->buf_entries - i;
302 
303 		rc = copy_to_user(buf, &dtl->buf[i],
304 				read_size * sizeof(struct dtl_entry));
305 		if (rc)
306 			return -EFAULT;
307 
308 		i = 0;
309 		n_req -= read_size;
310 		n_read += read_size;
311 		buf += read_size * sizeof(struct dtl_entry);
312 	}
313 
314 	/* .. and now the head */
315 	rc = copy_to_user(buf, &dtl->buf[i], n_req * sizeof(struct dtl_entry));
316 	if (rc)
317 		return -EFAULT;
318 
319 	n_read += n_req;
320 
321 	return n_read * sizeof(struct dtl_entry);
322 }
323 
324 static const struct file_operations dtl_fops = {
325 	.open		= dtl_file_open,
326 	.release	= dtl_file_release,
327 	.read		= dtl_file_read,
328 	.llseek		= no_llseek,
329 };
330 
331 static struct dentry *dtl_dir;
332 
333 static void dtl_setup_file(struct dtl *dtl)
334 {
335 	char name[10];
336 
337 	sprintf(name, "cpu-%d", dtl->cpu);
338 
339 	debugfs_create_file(name, 0400, dtl_dir, dtl, &dtl_fops);
340 }
341 
342 static int dtl_init(void)
343 {
344 	int i;
345 
346 	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
347 		return -ENODEV;
348 
349 	/* set up common debugfs structure */
350 
351 	dtl_dir = debugfs_create_dir("dtl", arch_debugfs_dir);
352 
353 	debugfs_create_x8("dtl_event_mask", 0600, dtl_dir, &dtl_event_mask);
354 	debugfs_create_u32("dtl_buf_entries", 0400, dtl_dir, &dtl_buf_entries);
355 
356 	/* set up the per-cpu log structures */
357 	for_each_possible_cpu(i) {
358 		struct dtl *dtl = &per_cpu(cpu_dtl, i);
359 		spin_lock_init(&dtl->lock);
360 		dtl->cpu = i;
361 
362 		dtl_setup_file(dtl);
363 	}
364 
365 	return 0;
366 }
367 machine_arch_initcall(pseries, dtl_init);
368 #endif /* CONFIG_DTL */
369 
370 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
371 /*
372  * Scan the dispatch trace log and count up the stolen time.
373  * Should be called with interrupts disabled.
374  */
375 static notrace u64 scan_dispatch_log(u64 stop_tb)
376 {
377 	u64 i = local_paca->dtl_ridx;
378 	struct dtl_entry *dtl = local_paca->dtl_curr;
379 	struct dtl_entry *dtl_end = local_paca->dispatch_log_end;
380 	struct lppaca *vpa = local_paca->lppaca_ptr;
381 	u64 tb_delta;
382 	u64 stolen = 0;
383 	u64 dtb;
384 
385 	if (!dtl)
386 		return 0;
387 
388 	if (i == be64_to_cpu(vpa->dtl_idx))
389 		return 0;
390 	while (i < be64_to_cpu(vpa->dtl_idx)) {
391 		dtb = be64_to_cpu(dtl->timebase);
392 		tb_delta = be32_to_cpu(dtl->enqueue_to_dispatch_time) +
393 			be32_to_cpu(dtl->ready_to_enqueue_time);
394 		barrier();
395 		if (i + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) {
396 			/* buffer has overflowed */
397 			i = be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG;
398 			dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG);
399 			continue;
400 		}
401 		if (dtb > stop_tb)
402 			break;
403 #ifdef CONFIG_DTL
404 		if (dtl_consumer)
405 			dtl_consumer(dtl, i);
406 #endif
407 		stolen += tb_delta;
408 		++i;
409 		++dtl;
410 		if (dtl == dtl_end)
411 			dtl = local_paca->dispatch_log;
412 	}
413 	local_paca->dtl_ridx = i;
414 	local_paca->dtl_curr = dtl;
415 	return stolen;
416 }
417 
418 /*
419  * Accumulate stolen time by scanning the dispatch trace log.
420  * Called on entry from user mode.
421  */
422 void notrace pseries_accumulate_stolen_time(void)
423 {
424 	u64 sst, ust;
425 	struct cpu_accounting_data *acct = &local_paca->accounting;
426 
427 	sst = scan_dispatch_log(acct->starttime_user);
428 	ust = scan_dispatch_log(acct->starttime);
429 	acct->stime -= sst;
430 	acct->utime -= ust;
431 	acct->steal_time += ust + sst;
432 }
433 
434 u64 pseries_calculate_stolen_time(u64 stop_tb)
435 {
436 	if (!firmware_has_feature(FW_FEATURE_SPLPAR))
437 		return 0;
438 
439 	if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx))
440 		return scan_dispatch_log(stop_tb);
441 
442 	return 0;
443 }
444 
445 #endif
446