1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Virtual Processor Dispatch Trace Log 4 * 5 * (C) Copyright IBM Corporation 2009 6 * 7 * Author: Jeremy Kerr <jk@ozlabs.org> 8 */ 9 10 #include <linux/slab.h> 11 #include <linux/spinlock.h> 12 #include <asm/smp.h> 13 #include <linux/uaccess.h> 14 #include <linux/debugfs.h> 15 #include <asm/firmware.h> 16 #include <asm/dtl.h> 17 #include <asm/lppaca.h> 18 #include <asm/plpar_wrappers.h> 19 #include <asm/machdep.h> 20 21 #ifdef CONFIG_DTL 22 struct dtl { 23 struct dtl_entry *buf; 24 int cpu; 25 int buf_entries; 26 u64 last_idx; 27 spinlock_t lock; 28 }; 29 static DEFINE_PER_CPU(struct dtl, cpu_dtl); 30 31 static u8 dtl_event_mask = DTL_LOG_ALL; 32 33 34 /* 35 * Size of per-cpu log buffers. Firmware requires that the buffer does 36 * not cross a 4k boundary. 37 */ 38 static int dtl_buf_entries = N_DISPATCH_LOG; 39 40 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 41 42 /* 43 * When CONFIG_VIRT_CPU_ACCOUNTING_NATIVE = y, the cpu accounting code controls 44 * reading from the dispatch trace log. If other code wants to consume 45 * DTL entries, it can set this pointer to a function that will get 46 * called once for each DTL entry that gets processed. 47 */ 48 static void (*dtl_consumer)(struct dtl_entry *entry, u64 index); 49 50 struct dtl_ring { 51 u64 write_index; 52 struct dtl_entry *write_ptr; 53 struct dtl_entry *buf; 54 struct dtl_entry *buf_end; 55 }; 56 57 static DEFINE_PER_CPU(struct dtl_ring, dtl_rings); 58 59 static atomic_t dtl_count; 60 61 /* 62 * The cpu accounting code controls the DTL ring buffer, and we get 63 * given entries as they are processed. 64 */ 65 static void consume_dtle(struct dtl_entry *dtle, u64 index) 66 { 67 struct dtl_ring *dtlr = this_cpu_ptr(&dtl_rings); 68 struct dtl_entry *wp = dtlr->write_ptr; 69 struct lppaca *vpa = local_paca->lppaca_ptr; 70 71 if (!wp) 72 return; 73 74 *wp = *dtle; 75 barrier(); 76 77 /* check for hypervisor ring buffer overflow, ignore this entry if so */ 78 if (index + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) 79 return; 80 81 ++wp; 82 if (wp == dtlr->buf_end) 83 wp = dtlr->buf; 84 dtlr->write_ptr = wp; 85 86 /* incrementing write_index makes the new entry visible */ 87 smp_wmb(); 88 ++dtlr->write_index; 89 } 90 91 static int dtl_start(struct dtl *dtl) 92 { 93 struct dtl_ring *dtlr = &per_cpu(dtl_rings, dtl->cpu); 94 95 dtlr->buf = dtl->buf; 96 dtlr->buf_end = dtl->buf + dtl->buf_entries; 97 dtlr->write_index = 0; 98 99 /* setting write_ptr enables logging into our buffer */ 100 smp_wmb(); 101 dtlr->write_ptr = dtl->buf; 102 103 /* enable event logging */ 104 lppaca_of(dtl->cpu).dtl_enable_mask |= dtl_event_mask; 105 106 dtl_consumer = consume_dtle; 107 atomic_inc(&dtl_count); 108 return 0; 109 } 110 111 static void dtl_stop(struct dtl *dtl) 112 { 113 struct dtl_ring *dtlr = &per_cpu(dtl_rings, dtl->cpu); 114 115 dtlr->write_ptr = NULL; 116 smp_wmb(); 117 118 dtlr->buf = NULL; 119 120 /* restore dtl_enable_mask */ 121 lppaca_of(dtl->cpu).dtl_enable_mask = DTL_LOG_PREEMPT; 122 123 if (atomic_dec_and_test(&dtl_count)) 124 dtl_consumer = NULL; 125 } 126 127 static u64 dtl_current_index(struct dtl *dtl) 128 { 129 return per_cpu(dtl_rings, dtl->cpu).write_index; 130 } 131 132 #else /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ 133 134 static int dtl_start(struct dtl *dtl) 135 { 136 unsigned long addr; 137 int ret, hwcpu; 138 139 /* Register our dtl buffer with the hypervisor. The HV expects the 140 * buffer size to be passed in the second word of the buffer */ 141 ((u32 *)dtl->buf)[1] = cpu_to_be32(DISPATCH_LOG_BYTES); 142 143 hwcpu = get_hard_smp_processor_id(dtl->cpu); 144 addr = __pa(dtl->buf); 145 ret = register_dtl(hwcpu, addr); 146 if (ret) { 147 printk(KERN_WARNING "%s: DTL registration for cpu %d (hw %d) " 148 "failed with %d\n", __func__, dtl->cpu, hwcpu, ret); 149 return -EIO; 150 } 151 152 /* set our initial buffer indices */ 153 lppaca_of(dtl->cpu).dtl_idx = 0; 154 155 /* ensure that our updates to the lppaca fields have occurred before 156 * we actually enable the logging */ 157 smp_wmb(); 158 159 /* enable event logging */ 160 lppaca_of(dtl->cpu).dtl_enable_mask = dtl_event_mask; 161 162 return 0; 163 } 164 165 static void dtl_stop(struct dtl *dtl) 166 { 167 int hwcpu = get_hard_smp_processor_id(dtl->cpu); 168 169 lppaca_of(dtl->cpu).dtl_enable_mask = 0x0; 170 171 unregister_dtl(hwcpu); 172 } 173 174 static u64 dtl_current_index(struct dtl *dtl) 175 { 176 return be64_to_cpu(lppaca_of(dtl->cpu).dtl_idx); 177 } 178 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ 179 180 static int dtl_enable(struct dtl *dtl) 181 { 182 long int n_entries; 183 long int rc; 184 struct dtl_entry *buf = NULL; 185 186 if (!dtl_cache) 187 return -ENOMEM; 188 189 /* only allow one reader */ 190 if (dtl->buf) 191 return -EBUSY; 192 193 /* ensure there are no other conflicting dtl users */ 194 if (!down_read_trylock(&dtl_access_lock)) 195 return -EBUSY; 196 197 n_entries = dtl_buf_entries; 198 buf = kmem_cache_alloc_node(dtl_cache, GFP_KERNEL, cpu_to_node(dtl->cpu)); 199 if (!buf) { 200 printk(KERN_WARNING "%s: buffer alloc failed for cpu %d\n", 201 __func__, dtl->cpu); 202 up_read(&dtl_access_lock); 203 return -ENOMEM; 204 } 205 206 spin_lock(&dtl->lock); 207 rc = -EBUSY; 208 if (!dtl->buf) { 209 /* store the original allocation size for use during read */ 210 dtl->buf_entries = n_entries; 211 dtl->buf = buf; 212 dtl->last_idx = 0; 213 rc = dtl_start(dtl); 214 if (rc) 215 dtl->buf = NULL; 216 } 217 spin_unlock(&dtl->lock); 218 219 if (rc) { 220 up_read(&dtl_access_lock); 221 kmem_cache_free(dtl_cache, buf); 222 } 223 224 return rc; 225 } 226 227 static void dtl_disable(struct dtl *dtl) 228 { 229 spin_lock(&dtl->lock); 230 dtl_stop(dtl); 231 kmem_cache_free(dtl_cache, dtl->buf); 232 dtl->buf = NULL; 233 dtl->buf_entries = 0; 234 spin_unlock(&dtl->lock); 235 up_read(&dtl_access_lock); 236 } 237 238 /* file interface */ 239 240 static int dtl_file_open(struct inode *inode, struct file *filp) 241 { 242 struct dtl *dtl = inode->i_private; 243 int rc; 244 245 rc = dtl_enable(dtl); 246 if (rc) 247 return rc; 248 249 filp->private_data = dtl; 250 return 0; 251 } 252 253 static int dtl_file_release(struct inode *inode, struct file *filp) 254 { 255 struct dtl *dtl = inode->i_private; 256 dtl_disable(dtl); 257 return 0; 258 } 259 260 static ssize_t dtl_file_read(struct file *filp, char __user *buf, size_t len, 261 loff_t *pos) 262 { 263 long int rc, n_read, n_req, read_size; 264 struct dtl *dtl; 265 u64 cur_idx, last_idx, i; 266 267 if ((len % sizeof(struct dtl_entry)) != 0) 268 return -EINVAL; 269 270 dtl = filp->private_data; 271 272 /* requested number of entries to read */ 273 n_req = len / sizeof(struct dtl_entry); 274 275 /* actual number of entries read */ 276 n_read = 0; 277 278 spin_lock(&dtl->lock); 279 280 cur_idx = dtl_current_index(dtl); 281 last_idx = dtl->last_idx; 282 283 if (last_idx + dtl->buf_entries <= cur_idx) 284 last_idx = cur_idx - dtl->buf_entries + 1; 285 286 if (last_idx + n_req > cur_idx) 287 n_req = cur_idx - last_idx; 288 289 if (n_req > 0) 290 dtl->last_idx = last_idx + n_req; 291 292 spin_unlock(&dtl->lock); 293 294 if (n_req <= 0) 295 return 0; 296 297 i = last_idx % dtl->buf_entries; 298 299 /* read the tail of the buffer if we've wrapped */ 300 if (i + n_req > dtl->buf_entries) { 301 read_size = dtl->buf_entries - i; 302 303 rc = copy_to_user(buf, &dtl->buf[i], 304 read_size * sizeof(struct dtl_entry)); 305 if (rc) 306 return -EFAULT; 307 308 i = 0; 309 n_req -= read_size; 310 n_read += read_size; 311 buf += read_size * sizeof(struct dtl_entry); 312 } 313 314 /* .. and now the head */ 315 rc = copy_to_user(buf, &dtl->buf[i], n_req * sizeof(struct dtl_entry)); 316 if (rc) 317 return -EFAULT; 318 319 n_read += n_req; 320 321 return n_read * sizeof(struct dtl_entry); 322 } 323 324 static const struct file_operations dtl_fops = { 325 .open = dtl_file_open, 326 .release = dtl_file_release, 327 .read = dtl_file_read, 328 .llseek = no_llseek, 329 }; 330 331 static struct dentry *dtl_dir; 332 333 static void dtl_setup_file(struct dtl *dtl) 334 { 335 char name[10]; 336 337 sprintf(name, "cpu-%d", dtl->cpu); 338 339 debugfs_create_file(name, 0400, dtl_dir, dtl, &dtl_fops); 340 } 341 342 static int dtl_init(void) 343 { 344 int i; 345 346 if (!firmware_has_feature(FW_FEATURE_SPLPAR)) 347 return -ENODEV; 348 349 /* set up common debugfs structure */ 350 351 dtl_dir = debugfs_create_dir("dtl", arch_debugfs_dir); 352 353 debugfs_create_x8("dtl_event_mask", 0600, dtl_dir, &dtl_event_mask); 354 debugfs_create_u32("dtl_buf_entries", 0400, dtl_dir, &dtl_buf_entries); 355 356 /* set up the per-cpu log structures */ 357 for_each_possible_cpu(i) { 358 struct dtl *dtl = &per_cpu(cpu_dtl, i); 359 spin_lock_init(&dtl->lock); 360 dtl->cpu = i; 361 362 dtl_setup_file(dtl); 363 } 364 365 return 0; 366 } 367 machine_arch_initcall(pseries, dtl_init); 368 #endif /* CONFIG_DTL */ 369 370 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 371 /* 372 * Scan the dispatch trace log and count up the stolen time. 373 * Should be called with interrupts disabled. 374 */ 375 static notrace u64 scan_dispatch_log(u64 stop_tb) 376 { 377 u64 i = local_paca->dtl_ridx; 378 struct dtl_entry *dtl = local_paca->dtl_curr; 379 struct dtl_entry *dtl_end = local_paca->dispatch_log_end; 380 struct lppaca *vpa = local_paca->lppaca_ptr; 381 u64 tb_delta; 382 u64 stolen = 0; 383 u64 dtb; 384 385 if (!dtl) 386 return 0; 387 388 if (i == be64_to_cpu(vpa->dtl_idx)) 389 return 0; 390 while (i < be64_to_cpu(vpa->dtl_idx)) { 391 dtb = be64_to_cpu(dtl->timebase); 392 tb_delta = be32_to_cpu(dtl->enqueue_to_dispatch_time) + 393 be32_to_cpu(dtl->ready_to_enqueue_time); 394 barrier(); 395 if (i + N_DISPATCH_LOG < be64_to_cpu(vpa->dtl_idx)) { 396 /* buffer has overflowed */ 397 i = be64_to_cpu(vpa->dtl_idx) - N_DISPATCH_LOG; 398 dtl = local_paca->dispatch_log + (i % N_DISPATCH_LOG); 399 continue; 400 } 401 if (dtb > stop_tb) 402 break; 403 #ifdef CONFIG_DTL 404 if (dtl_consumer) 405 dtl_consumer(dtl, i); 406 #endif 407 stolen += tb_delta; 408 ++i; 409 ++dtl; 410 if (dtl == dtl_end) 411 dtl = local_paca->dispatch_log; 412 } 413 local_paca->dtl_ridx = i; 414 local_paca->dtl_curr = dtl; 415 return stolen; 416 } 417 418 /* 419 * Accumulate stolen time by scanning the dispatch trace log. 420 * Called on entry from user mode. 421 */ 422 void notrace pseries_accumulate_stolen_time(void) 423 { 424 u64 sst, ust; 425 struct cpu_accounting_data *acct = &local_paca->accounting; 426 427 sst = scan_dispatch_log(acct->starttime_user); 428 ust = scan_dispatch_log(acct->starttime); 429 acct->stime -= sst; 430 acct->utime -= ust; 431 acct->steal_time += ust + sst; 432 } 433 434 u64 pseries_calculate_stolen_time(u64 stop_tb) 435 { 436 if (!firmware_has_feature(FW_FEATURE_SPLPAR)) 437 return 0; 438 439 if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx)) 440 return scan_dispatch_log(stop_tb); 441 442 return 0; 443 } 444 445 #endif 446