1 /* 2 * Copyright © 2015-2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Robert Bragg <robert@sixbynine.org> 25 */ 26 27 28 /** 29 * DOC: i915 Perf Overview 30 * 31 * Gen graphics supports a large number of performance counters that can help 32 * driver and application developers understand and optimize their use of the 33 * GPU. 34 * 35 * This i915 perf interface enables userspace to configure and open a file 36 * descriptor representing a stream of GPU metrics which can then be read() as 37 * a stream of sample records. 38 * 39 * The interface is particularly suited to exposing buffered metrics that are 40 * captured by DMA from the GPU, unsynchronized with and unrelated to the CPU. 41 * 42 * Streams representing a single context are accessible to applications with a 43 * corresponding drm file descriptor, such that OpenGL can use the interface 44 * without special privileges. Access to system-wide metrics requires root 45 * privileges by default, unless changed via the dev.i915.perf_event_paranoid 46 * sysctl option. 47 * 48 */ 49 50 /** 51 * DOC: i915 Perf History and Comparison with Core Perf 52 * 53 * The interface was initially inspired by the core Perf infrastructure but 54 * some notable differences are: 55 * 56 * i915 perf file descriptors represent a "stream" instead of an "event"; where 57 * a perf event primarily corresponds to a single 64bit value, while a stream 58 * might sample sets of tightly-coupled counters, depending on the 59 * configuration. For example the Gen OA unit isn't designed to support 60 * orthogonal configurations of individual counters; it's configured for a set 61 * of related counters. Samples for an i915 perf stream capturing OA metrics 62 * will include a set of counter values packed in a compact HW specific format. 63 * The OA unit supports a number of different packing formats which can be 64 * selected by the user opening the stream. Perf has support for grouping 65 * events, but each event in the group is configured, validated and 66 * authenticated individually with separate system calls. 67 * 68 * i915 perf stream configurations are provided as an array of u64 (key,value) 69 * pairs, instead of a fixed struct with multiple miscellaneous config members, 70 * interleaved with event-type specific members. 71 * 72 * i915 perf doesn't support exposing metrics via an mmap'd circular buffer. 73 * The supported metrics are being written to memory by the GPU unsynchronized 74 * with the CPU, using HW specific packing formats for counter sets. Sometimes 75 * the constraints on HW configuration require reports to be filtered before it 76 * would be acceptable to expose them to unprivileged applications - to hide 77 * the metrics of other processes/contexts. For these use cases a read() based 78 * interface is a good fit, and provides an opportunity to filter data as it 79 * gets copied from the GPU mapped buffers to userspace buffers. 80 * 81 * 82 * Issues hit with first prototype based on Core Perf 83 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 84 * 85 * The first prototype of this driver was based on the core perf 86 * infrastructure, and while we did make that mostly work, with some changes to 87 * perf, we found we were breaking or working around too many assumptions baked 88 * into perf's currently cpu centric design. 89 * 90 * In the end we didn't see a clear benefit to making perf's implementation and 91 * interface more complex by changing design assumptions while we knew we still 92 * wouldn't be able to use any existing perf based userspace tools. 93 * 94 * Also considering the Gen specific nature of the Observability hardware and 95 * how userspace will sometimes need to combine i915 perf OA metrics with 96 * side-band OA data captured via MI_REPORT_PERF_COUNT commands; we're 97 * expecting the interface to be used by a platform specific userspace such as 98 * OpenGL or tools. This is to say; we aren't inherently missing out on having 99 * a standard vendor/architecture agnostic interface by not using perf. 100 * 101 * 102 * For posterity, in case we might re-visit trying to adapt core perf to be 103 * better suited to exposing i915 metrics these were the main pain points we 104 * hit: 105 * 106 * - The perf based OA PMU driver broke some significant design assumptions: 107 * 108 * Existing perf pmus are used for profiling work on a cpu and we were 109 * introducing the idea of _IS_DEVICE pmus with different security 110 * implications, the need to fake cpu-related data (such as user/kernel 111 * registers) to fit with perf's current design, and adding _DEVICE records 112 * as a way to forward device-specific status records. 113 * 114 * The OA unit writes reports of counters into a circular buffer, without 115 * involvement from the CPU, making our PMU driver the first of a kind. 116 * 117 * Given the way we were periodically forward data from the GPU-mapped, OA 118 * buffer to perf's buffer, those bursts of sample writes looked to perf like 119 * we were sampling too fast and so we had to subvert its throttling checks. 120 * 121 * Perf supports groups of counters and allows those to be read via 122 * transactions internally but transactions currently seem designed to be 123 * explicitly initiated from the cpu (say in response to a userspace read()) 124 * and while we could pull a report out of the OA buffer we can't 125 * trigger a report from the cpu on demand. 126 * 127 * Related to being report based; the OA counters are configured in HW as a 128 * set while perf generally expects counter configurations to be orthogonal. 129 * Although counters can be associated with a group leader as they are 130 * opened, there's no clear precedent for being able to provide group-wide 131 * configuration attributes (for example we want to let userspace choose the 132 * OA unit report format used to capture all counters in a set, or specify a 133 * GPU context to filter metrics on). We avoided using perf's grouping 134 * feature and forwarded OA reports to userspace via perf's 'raw' sample 135 * field. This suited our userspace well considering how coupled the counters 136 * are when dealing with normalizing. It would be inconvenient to split 137 * counters up into separate events, only to require userspace to recombine 138 * them. For Mesa it's also convenient to be forwarded raw, periodic reports 139 * for combining with the side-band raw reports it captures using 140 * MI_REPORT_PERF_COUNT commands. 141 * 142 * - As a side note on perf's grouping feature; there was also some concern 143 * that using PERF_FORMAT_GROUP as a way to pack together counter values 144 * would quite drastically inflate our sample sizes, which would likely 145 * lower the effective sampling resolutions we could use when the available 146 * memory bandwidth is limited. 147 * 148 * With the OA unit's report formats, counters are packed together as 32 149 * or 40bit values, with the largest report size being 256 bytes. 150 * 151 * PERF_FORMAT_GROUP values are 64bit, but there doesn't appear to be a 152 * documented ordering to the values, implying PERF_FORMAT_ID must also be 153 * used to add a 64bit ID before each value; giving 16 bytes per counter. 154 * 155 * Related to counter orthogonality; we can't time share the OA unit, while 156 * event scheduling is a central design idea within perf for allowing 157 * userspace to open + enable more events than can be configured in HW at any 158 * one time. The OA unit is not designed to allow re-configuration while in 159 * use. We can't reconfigure the OA unit without losing internal OA unit 160 * state which we can't access explicitly to save and restore. Reconfiguring 161 * the OA unit is also relatively slow, involving ~100 register writes. From 162 * userspace Mesa also depends on a stable OA configuration when emitting 163 * MI_REPORT_PERF_COUNT commands and importantly the OA unit can't be 164 * disabled while there are outstanding MI_RPC commands lest we hang the 165 * command streamer. 166 * 167 * The contents of sample records aren't extensible by device drivers (i.e. 168 * the sample_type bits). As an example; Sourab Gupta had been looking to 169 * attach GPU timestamps to our OA samples. We were shoehorning OA reports 170 * into sample records by using the 'raw' field, but it's tricky to pack more 171 * than one thing into this field because events/core.c currently only lets a 172 * pmu give a single raw data pointer plus len which will be copied into the 173 * ring buffer. To include more than the OA report we'd have to copy the 174 * report into an intermediate larger buffer. I'd been considering allowing a 175 * vector of data+len values to be specified for copying the raw data, but 176 * it felt like a kludge to being using the raw field for this purpose. 177 * 178 * - It felt like our perf based PMU was making some technical compromises 179 * just for the sake of using perf: 180 * 181 * perf_event_open() requires events to either relate to a pid or a specific 182 * cpu core, while our device pmu related to neither. Events opened with a 183 * pid will be automatically enabled/disabled according to the scheduling of 184 * that process - so not appropriate for us. When an event is related to a 185 * cpu id, perf ensures pmu methods will be invoked via an inter process 186 * interrupt on that core. To avoid invasive changes our userspace opened OA 187 * perf events for a specific cpu. This was workable but it meant the 188 * majority of the OA driver ran in atomic context, including all OA report 189 * forwarding, which wasn't really necessary in our case and seems to make 190 * our locking requirements somewhat complex as we handled the interaction 191 * with the rest of the i915 driver. 192 */ 193 194 #include <linux/anon_inodes.h> 195 #include <linux/sizes.h> 196 #include <linux/uuid.h> 197 198 #include "gem/i915_gem_context.h" 199 #include "gem/i915_gem_pm.h" 200 #include "gt/intel_lrc_reg.h" 201 202 #include "i915_drv.h" 203 #include "i915_perf.h" 204 #include "oa/i915_oa_hsw.h" 205 #include "oa/i915_oa_bdw.h" 206 #include "oa/i915_oa_chv.h" 207 #include "oa/i915_oa_sklgt2.h" 208 #include "oa/i915_oa_sklgt3.h" 209 #include "oa/i915_oa_sklgt4.h" 210 #include "oa/i915_oa_bxt.h" 211 #include "oa/i915_oa_kblgt2.h" 212 #include "oa/i915_oa_kblgt3.h" 213 #include "oa/i915_oa_glk.h" 214 #include "oa/i915_oa_cflgt2.h" 215 #include "oa/i915_oa_cflgt3.h" 216 #include "oa/i915_oa_cnl.h" 217 #include "oa/i915_oa_icl.h" 218 219 /* HW requires this to be a power of two, between 128k and 16M, though driver 220 * is currently generally designed assuming the largest 16M size is used such 221 * that the overflow cases are unlikely in normal operation. 222 */ 223 #define OA_BUFFER_SIZE SZ_16M 224 225 #define OA_TAKEN(tail, head) ((tail - head) & (OA_BUFFER_SIZE - 1)) 226 227 /** 228 * DOC: OA Tail Pointer Race 229 * 230 * There's a HW race condition between OA unit tail pointer register updates and 231 * writes to memory whereby the tail pointer can sometimes get ahead of what's 232 * been written out to the OA buffer so far (in terms of what's visible to the 233 * CPU). 234 * 235 * Although this can be observed explicitly while copying reports to userspace 236 * by checking for a zeroed report-id field in tail reports, we want to account 237 * for this earlier, as part of the oa_buffer_check to avoid lots of redundant 238 * read() attempts. 239 * 240 * In effect we define a tail pointer for reading that lags the real tail 241 * pointer by at least %OA_TAIL_MARGIN_NSEC nanoseconds, which gives enough 242 * time for the corresponding reports to become visible to the CPU. 243 * 244 * To manage this we actually track two tail pointers: 245 * 1) An 'aging' tail with an associated timestamp that is tracked until we 246 * can trust the corresponding data is visible to the CPU; at which point 247 * it is considered 'aged'. 248 * 2) An 'aged' tail that can be used for read()ing. 249 * 250 * The two separate pointers let us decouple read()s from tail pointer aging. 251 * 252 * The tail pointers are checked and updated at a limited rate within a hrtimer 253 * callback (the same callback that is used for delivering EPOLLIN events) 254 * 255 * Initially the tails are marked invalid with %INVALID_TAIL_PTR which 256 * indicates that an updated tail pointer is needed. 257 * 258 * Most of the implementation details for this workaround are in 259 * oa_buffer_check_unlocked() and _append_oa_reports() 260 * 261 * Note for posterity: previously the driver used to define an effective tail 262 * pointer that lagged the real pointer by a 'tail margin' measured in bytes 263 * derived from %OA_TAIL_MARGIN_NSEC and the configured sampling frequency. 264 * This was flawed considering that the OA unit may also automatically generate 265 * non-periodic reports (such as on context switch) or the OA unit may be 266 * enabled without any periodic sampling. 267 */ 268 #define OA_TAIL_MARGIN_NSEC 100000ULL 269 #define INVALID_TAIL_PTR 0xffffffff 270 271 /* frequency for checking whether the OA unit has written new reports to the 272 * circular OA buffer... 273 */ 274 #define POLL_FREQUENCY 200 275 #define POLL_PERIOD (NSEC_PER_SEC / POLL_FREQUENCY) 276 277 /* for sysctl proc_dointvec_minmax of dev.i915.perf_stream_paranoid */ 278 static u32 i915_perf_stream_paranoid = true; 279 280 /* The maximum exponent the hardware accepts is 63 (essentially it selects one 281 * of the 64bit timestamp bits to trigger reports from) but there's currently 282 * no known use case for sampling as infrequently as once per 47 thousand years. 283 * 284 * Since the timestamps included in OA reports are only 32bits it seems 285 * reasonable to limit the OA exponent where it's still possible to account for 286 * overflow in OA report timestamps. 287 */ 288 #define OA_EXPONENT_MAX 31 289 290 #define INVALID_CTX_ID 0xffffffff 291 292 /* On Gen8+ automatically triggered OA reports include a 'reason' field... */ 293 #define OAREPORT_REASON_MASK 0x3f 294 #define OAREPORT_REASON_SHIFT 19 295 #define OAREPORT_REASON_TIMER (1<<0) 296 #define OAREPORT_REASON_CTX_SWITCH (1<<3) 297 #define OAREPORT_REASON_CLK_RATIO (1<<5) 298 299 300 /* For sysctl proc_dointvec_minmax of i915_oa_max_sample_rate 301 * 302 * The highest sampling frequency we can theoretically program the OA unit 303 * with is always half the timestamp frequency: E.g. 6.25Mhz for Haswell. 304 * 305 * Initialized just before we register the sysctl parameter. 306 */ 307 static int oa_sample_rate_hard_limit; 308 309 /* Theoretically we can program the OA unit to sample every 160ns but don't 310 * allow that by default unless root... 311 * 312 * The default threshold of 100000Hz is based on perf's similar 313 * kernel.perf_event_max_sample_rate sysctl parameter. 314 */ 315 static u32 i915_oa_max_sample_rate = 100000; 316 317 /* XXX: beware if future OA HW adds new report formats that the current 318 * code assumes all reports have a power-of-two size and ~(size - 1) can 319 * be used as a mask to align the OA tail pointer. 320 */ 321 static const struct i915_oa_format hsw_oa_formats[I915_OA_FORMAT_MAX] = { 322 [I915_OA_FORMAT_A13] = { 0, 64 }, 323 [I915_OA_FORMAT_A29] = { 1, 128 }, 324 [I915_OA_FORMAT_A13_B8_C8] = { 2, 128 }, 325 /* A29_B8_C8 Disallowed as 192 bytes doesn't factor into buffer size */ 326 [I915_OA_FORMAT_B4_C8] = { 4, 64 }, 327 [I915_OA_FORMAT_A45_B8_C8] = { 5, 256 }, 328 [I915_OA_FORMAT_B4_C8_A16] = { 6, 128 }, 329 [I915_OA_FORMAT_C4_B8] = { 7, 64 }, 330 }; 331 332 static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = { 333 [I915_OA_FORMAT_A12] = { 0, 64 }, 334 [I915_OA_FORMAT_A12_B8_C8] = { 2, 128 }, 335 [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, 336 [I915_OA_FORMAT_C4_B8] = { 7, 64 }, 337 }; 338 339 #define SAMPLE_OA_REPORT (1<<0) 340 341 /** 342 * struct perf_open_properties - for validated properties given to open a stream 343 * @sample_flags: `DRM_I915_PERF_PROP_SAMPLE_*` properties are tracked as flags 344 * @single_context: Whether a single or all gpu contexts should be monitored 345 * @ctx_handle: A gem ctx handle for use with @single_context 346 * @metrics_set: An ID for an OA unit metric set advertised via sysfs 347 * @oa_format: An OA unit HW report format 348 * @oa_periodic: Whether to enable periodic OA unit sampling 349 * @oa_period_exponent: The OA unit sampling period is derived from this 350 * 351 * As read_properties_unlocked() enumerates and validates the properties given 352 * to open a stream of metrics the configuration is built up in the structure 353 * which starts out zero initialized. 354 */ 355 struct perf_open_properties { 356 u32 sample_flags; 357 358 u64 single_context:1; 359 u64 ctx_handle; 360 361 /* OA sampling state */ 362 int metrics_set; 363 int oa_format; 364 bool oa_periodic; 365 int oa_period_exponent; 366 }; 367 368 static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer); 369 370 static void free_oa_config(struct drm_i915_private *dev_priv, 371 struct i915_oa_config *oa_config) 372 { 373 if (!PTR_ERR(oa_config->flex_regs)) 374 kfree(oa_config->flex_regs); 375 if (!PTR_ERR(oa_config->b_counter_regs)) 376 kfree(oa_config->b_counter_regs); 377 if (!PTR_ERR(oa_config->mux_regs)) 378 kfree(oa_config->mux_regs); 379 kfree(oa_config); 380 } 381 382 static void put_oa_config(struct drm_i915_private *dev_priv, 383 struct i915_oa_config *oa_config) 384 { 385 if (!atomic_dec_and_test(&oa_config->ref_count)) 386 return; 387 388 free_oa_config(dev_priv, oa_config); 389 } 390 391 static int get_oa_config(struct drm_i915_private *dev_priv, 392 int metrics_set, 393 struct i915_oa_config **out_config) 394 { 395 int ret; 396 397 if (metrics_set == 1) { 398 *out_config = &dev_priv->perf.test_config; 399 atomic_inc(&dev_priv->perf.test_config.ref_count); 400 return 0; 401 } 402 403 ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); 404 if (ret) 405 return ret; 406 407 *out_config = idr_find(&dev_priv->perf.metrics_idr, metrics_set); 408 if (!*out_config) 409 ret = -EINVAL; 410 else 411 atomic_inc(&(*out_config)->ref_count); 412 413 mutex_unlock(&dev_priv->perf.metrics_lock); 414 415 return ret; 416 } 417 418 static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream) 419 { 420 struct drm_i915_private *dev_priv = stream->dev_priv; 421 422 return I915_READ(GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK; 423 } 424 425 static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream) 426 { 427 struct drm_i915_private *dev_priv = stream->dev_priv; 428 u32 oastatus1 = I915_READ(GEN7_OASTATUS1); 429 430 return oastatus1 & GEN7_OASTATUS1_TAIL_MASK; 431 } 432 433 /** 434 * oa_buffer_check_unlocked - check for data and update tail ptr state 435 * @stream: i915 stream instance 436 * 437 * This is either called via fops (for blocking reads in user ctx) or the poll 438 * check hrtimer (atomic ctx) to check the OA buffer tail pointer and check 439 * if there is data available for userspace to read. 440 * 441 * This function is central to providing a workaround for the OA unit tail 442 * pointer having a race with respect to what data is visible to the CPU. 443 * It is responsible for reading tail pointers from the hardware and giving 444 * the pointers time to 'age' before they are made available for reading. 445 * (See description of OA_TAIL_MARGIN_NSEC above for further details.) 446 * 447 * Besides returning true when there is data available to read() this function 448 * also has the side effect of updating the oa_buffer.tails[], .aging_timestamp 449 * and .aged_tail_idx state used for reading. 450 * 451 * Note: It's safe to read OA config state here unlocked, assuming that this is 452 * only called while the stream is enabled, while the global OA configuration 453 * can't be modified. 454 * 455 * Returns: %true if the OA buffer contains data, else %false 456 */ 457 static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) 458 { 459 struct drm_i915_private *dev_priv = stream->dev_priv; 460 int report_size = stream->oa_buffer.format_size; 461 unsigned long flags; 462 unsigned int aged_idx; 463 u32 head, hw_tail, aged_tail, aging_tail; 464 u64 now; 465 466 /* We have to consider the (unlikely) possibility that read() errors 467 * could result in an OA buffer reset which might reset the head, 468 * tails[] and aged_tail state. 469 */ 470 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 471 472 /* NB: The head we observe here might effectively be a little out of 473 * date (between head and tails[aged_idx].offset if there is currently 474 * a read() in progress. 475 */ 476 head = stream->oa_buffer.head; 477 478 aged_idx = stream->oa_buffer.aged_tail_idx; 479 aged_tail = stream->oa_buffer.tails[aged_idx].offset; 480 aging_tail = stream->oa_buffer.tails[!aged_idx].offset; 481 482 hw_tail = dev_priv->perf.ops.oa_hw_tail_read(stream); 483 484 /* The tail pointer increases in 64 byte increments, 485 * not in report_size steps... 486 */ 487 hw_tail &= ~(report_size - 1); 488 489 now = ktime_get_mono_fast_ns(); 490 491 /* Update the aged tail 492 * 493 * Flip the tail pointer available for read()s once the aging tail is 494 * old enough to trust that the corresponding data will be visible to 495 * the CPU... 496 * 497 * Do this before updating the aging pointer in case we may be able to 498 * immediately start aging a new pointer too (if new data has become 499 * available) without needing to wait for a later hrtimer callback. 500 */ 501 if (aging_tail != INVALID_TAIL_PTR && 502 ((now - stream->oa_buffer.aging_timestamp) > 503 OA_TAIL_MARGIN_NSEC)) { 504 505 aged_idx ^= 1; 506 stream->oa_buffer.aged_tail_idx = aged_idx; 507 508 aged_tail = aging_tail; 509 510 /* Mark that we need a new pointer to start aging... */ 511 stream->oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR; 512 aging_tail = INVALID_TAIL_PTR; 513 } 514 515 /* Update the aging tail 516 * 517 * We throttle aging tail updates until we have a new tail that 518 * represents >= one report more data than is already available for 519 * reading. This ensures there will be enough data for a successful 520 * read once this new pointer has aged and ensures we will give the new 521 * pointer time to age. 522 */ 523 if (aging_tail == INVALID_TAIL_PTR && 524 (aged_tail == INVALID_TAIL_PTR || 525 OA_TAKEN(hw_tail, aged_tail) >= report_size)) { 526 struct i915_vma *vma = stream->oa_buffer.vma; 527 u32 gtt_offset = i915_ggtt_offset(vma); 528 529 /* Be paranoid and do a bounds check on the pointer read back 530 * from hardware, just in case some spurious hardware condition 531 * could put the tail out of bounds... 532 */ 533 if (hw_tail >= gtt_offset && 534 hw_tail < (gtt_offset + OA_BUFFER_SIZE)) { 535 stream->oa_buffer.tails[!aged_idx].offset = 536 aging_tail = hw_tail; 537 stream->oa_buffer.aging_timestamp = now; 538 } else { 539 DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n", 540 hw_tail); 541 } 542 } 543 544 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 545 546 return aged_tail == INVALID_TAIL_PTR ? 547 false : OA_TAKEN(aged_tail, head) >= report_size; 548 } 549 550 /** 551 * append_oa_status - Appends a status record to a userspace read() buffer. 552 * @stream: An i915-perf stream opened for OA metrics 553 * @buf: destination buffer given by userspace 554 * @count: the number of bytes userspace wants to read 555 * @offset: (inout): the current position for writing into @buf 556 * @type: The kind of status to report to userspace 557 * 558 * Writes a status record (such as `DRM_I915_PERF_RECORD_OA_REPORT_LOST`) 559 * into the userspace read() buffer. 560 * 561 * The @buf @offset will only be updated on success. 562 * 563 * Returns: 0 on success, negative error code on failure. 564 */ 565 static int append_oa_status(struct i915_perf_stream *stream, 566 char __user *buf, 567 size_t count, 568 size_t *offset, 569 enum drm_i915_perf_record_type type) 570 { 571 struct drm_i915_perf_record_header header = { type, 0, sizeof(header) }; 572 573 if ((count - *offset) < header.size) 574 return -ENOSPC; 575 576 if (copy_to_user(buf + *offset, &header, sizeof(header))) 577 return -EFAULT; 578 579 (*offset) += header.size; 580 581 return 0; 582 } 583 584 /** 585 * append_oa_sample - Copies single OA report into userspace read() buffer. 586 * @stream: An i915-perf stream opened for OA metrics 587 * @buf: destination buffer given by userspace 588 * @count: the number of bytes userspace wants to read 589 * @offset: (inout): the current position for writing into @buf 590 * @report: A single OA report to (optionally) include as part of the sample 591 * 592 * The contents of a sample are configured through `DRM_I915_PERF_PROP_SAMPLE_*` 593 * properties when opening a stream, tracked as `stream->sample_flags`. This 594 * function copies the requested components of a single sample to the given 595 * read() @buf. 596 * 597 * The @buf @offset will only be updated on success. 598 * 599 * Returns: 0 on success, negative error code on failure. 600 */ 601 static int append_oa_sample(struct i915_perf_stream *stream, 602 char __user *buf, 603 size_t count, 604 size_t *offset, 605 const u8 *report) 606 { 607 int report_size = stream->oa_buffer.format_size; 608 struct drm_i915_perf_record_header header; 609 u32 sample_flags = stream->sample_flags; 610 611 header.type = DRM_I915_PERF_RECORD_SAMPLE; 612 header.pad = 0; 613 header.size = stream->sample_size; 614 615 if ((count - *offset) < header.size) 616 return -ENOSPC; 617 618 buf += *offset; 619 if (copy_to_user(buf, &header, sizeof(header))) 620 return -EFAULT; 621 buf += sizeof(header); 622 623 if (sample_flags & SAMPLE_OA_REPORT) { 624 if (copy_to_user(buf, report, report_size)) 625 return -EFAULT; 626 } 627 628 (*offset) += header.size; 629 630 return 0; 631 } 632 633 /** 634 * Copies all buffered OA reports into userspace read() buffer. 635 * @stream: An i915-perf stream opened for OA metrics 636 * @buf: destination buffer given by userspace 637 * @count: the number of bytes userspace wants to read 638 * @offset: (inout): the current position for writing into @buf 639 * 640 * Notably any error condition resulting in a short read (-%ENOSPC or 641 * -%EFAULT) will be returned even though one or more records may 642 * have been successfully copied. In this case it's up to the caller 643 * to decide if the error should be squashed before returning to 644 * userspace. 645 * 646 * Note: reports are consumed from the head, and appended to the 647 * tail, so the tail chases the head?... If you think that's mad 648 * and back-to-front you're not alone, but this follows the 649 * Gen PRM naming convention. 650 * 651 * Returns: 0 on success, negative error code on failure. 652 */ 653 static int gen8_append_oa_reports(struct i915_perf_stream *stream, 654 char __user *buf, 655 size_t count, 656 size_t *offset) 657 { 658 struct drm_i915_private *dev_priv = stream->dev_priv; 659 int report_size = stream->oa_buffer.format_size; 660 u8 *oa_buf_base = stream->oa_buffer.vaddr; 661 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); 662 u32 mask = (OA_BUFFER_SIZE - 1); 663 size_t start_offset = *offset; 664 unsigned long flags; 665 unsigned int aged_tail_idx; 666 u32 head, tail; 667 u32 taken; 668 int ret = 0; 669 670 if (WARN_ON(!stream->enabled)) 671 return -EIO; 672 673 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 674 675 head = stream->oa_buffer.head; 676 aged_tail_idx = stream->oa_buffer.aged_tail_idx; 677 tail = stream->oa_buffer.tails[aged_tail_idx].offset; 678 679 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 680 681 /* 682 * An invalid tail pointer here means we're still waiting for the poll 683 * hrtimer callback to give us a pointer 684 */ 685 if (tail == INVALID_TAIL_PTR) 686 return -EAGAIN; 687 688 /* 689 * NB: oa_buffer.head/tail include the gtt_offset which we don't want 690 * while indexing relative to oa_buf_base. 691 */ 692 head -= gtt_offset; 693 tail -= gtt_offset; 694 695 /* 696 * An out of bounds or misaligned head or tail pointer implies a driver 697 * bug since we validate + align the tail pointers we read from the 698 * hardware and we are in full control of the head pointer which should 699 * only be incremented by multiples of the report size (notably also 700 * all a power of two). 701 */ 702 if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size || 703 tail > OA_BUFFER_SIZE || tail % report_size, 704 "Inconsistent OA buffer pointers: head = %u, tail = %u\n", 705 head, tail)) 706 return -EIO; 707 708 709 for (/* none */; 710 (taken = OA_TAKEN(tail, head)); 711 head = (head + report_size) & mask) { 712 u8 *report = oa_buf_base + head; 713 u32 *report32 = (void *)report; 714 u32 ctx_id; 715 u32 reason; 716 717 /* 718 * All the report sizes factor neatly into the buffer 719 * size so we never expect to see a report split 720 * between the beginning and end of the buffer. 721 * 722 * Given the initial alignment check a misalignment 723 * here would imply a driver bug that would result 724 * in an overrun. 725 */ 726 if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) { 727 DRM_ERROR("Spurious OA head ptr: non-integral report offset\n"); 728 break; 729 } 730 731 /* 732 * The reason field includes flags identifying what 733 * triggered this specific report (mostly timer 734 * triggered or e.g. due to a context switch). 735 * 736 * This field is never expected to be zero so we can 737 * check that the report isn't invalid before copying 738 * it to userspace... 739 */ 740 reason = ((report32[0] >> OAREPORT_REASON_SHIFT) & 741 OAREPORT_REASON_MASK); 742 if (reason == 0) { 743 if (__ratelimit(&dev_priv->perf.spurious_report_rs)) 744 DRM_NOTE("Skipping spurious, invalid OA report\n"); 745 continue; 746 } 747 748 ctx_id = report32[2] & stream->specific_ctx_id_mask; 749 750 /* 751 * Squash whatever is in the CTX_ID field if it's marked as 752 * invalid to be sure we avoid false-positive, single-context 753 * filtering below... 754 * 755 * Note: that we don't clear the valid_ctx_bit so userspace can 756 * understand that the ID has been squashed by the kernel. 757 */ 758 if (!(report32[0] & dev_priv->perf.gen8_valid_ctx_bit)) 759 ctx_id = report32[2] = INVALID_CTX_ID; 760 761 /* 762 * NB: For Gen 8 the OA unit no longer supports clock gating 763 * off for a specific context and the kernel can't securely 764 * stop the counters from updating as system-wide / global 765 * values. 766 * 767 * Automatic reports now include a context ID so reports can be 768 * filtered on the cpu but it's not worth trying to 769 * automatically subtract/hide counter progress for other 770 * contexts while filtering since we can't stop userspace 771 * issuing MI_REPORT_PERF_COUNT commands which would still 772 * provide a side-band view of the real values. 773 * 774 * To allow userspace (such as Mesa/GL_INTEL_performance_query) 775 * to normalize counters for a single filtered context then it 776 * needs be forwarded bookend context-switch reports so that it 777 * can track switches in between MI_REPORT_PERF_COUNT commands 778 * and can itself subtract/ignore the progress of counters 779 * associated with other contexts. Note that the hardware 780 * automatically triggers reports when switching to a new 781 * context which are tagged with the ID of the newly active 782 * context. To avoid the complexity (and likely fragility) of 783 * reading ahead while parsing reports to try and minimize 784 * forwarding redundant context switch reports (i.e. between 785 * other, unrelated contexts) we simply elect to forward them 786 * all. 787 * 788 * We don't rely solely on the reason field to identify context 789 * switches since it's not-uncommon for periodic samples to 790 * identify a switch before any 'context switch' report. 791 */ 792 if (!dev_priv->perf.exclusive_stream->ctx || 793 stream->specific_ctx_id == ctx_id || 794 stream->oa_buffer.last_ctx_id == stream->specific_ctx_id || 795 reason & OAREPORT_REASON_CTX_SWITCH) { 796 797 /* 798 * While filtering for a single context we avoid 799 * leaking the IDs of other contexts. 800 */ 801 if (dev_priv->perf.exclusive_stream->ctx && 802 stream->specific_ctx_id != ctx_id) { 803 report32[2] = INVALID_CTX_ID; 804 } 805 806 ret = append_oa_sample(stream, buf, count, offset, 807 report); 808 if (ret) 809 break; 810 811 stream->oa_buffer.last_ctx_id = ctx_id; 812 } 813 814 /* 815 * The above reason field sanity check is based on 816 * the assumption that the OA buffer is initially 817 * zeroed and we reset the field after copying so the 818 * check is still meaningful once old reports start 819 * being overwritten. 820 */ 821 report32[0] = 0; 822 } 823 824 if (start_offset != *offset) { 825 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 826 827 /* 828 * We removed the gtt_offset for the copy loop above, indexing 829 * relative to oa_buf_base so put back here... 830 */ 831 head += gtt_offset; 832 833 I915_WRITE(GEN8_OAHEADPTR, head & GEN8_OAHEADPTR_MASK); 834 stream->oa_buffer.head = head; 835 836 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 837 } 838 839 return ret; 840 } 841 842 /** 843 * gen8_oa_read - copy status records then buffered OA reports 844 * @stream: An i915-perf stream opened for OA metrics 845 * @buf: destination buffer given by userspace 846 * @count: the number of bytes userspace wants to read 847 * @offset: (inout): the current position for writing into @buf 848 * 849 * Checks OA unit status registers and if necessary appends corresponding 850 * status records for userspace (such as for a buffer full condition) and then 851 * initiate appending any buffered OA reports. 852 * 853 * Updates @offset according to the number of bytes successfully copied into 854 * the userspace buffer. 855 * 856 * NB: some data may be successfully copied to the userspace buffer 857 * even if an error is returned, and this is reflected in the 858 * updated @offset. 859 * 860 * Returns: zero on success or a negative error code 861 */ 862 static int gen8_oa_read(struct i915_perf_stream *stream, 863 char __user *buf, 864 size_t count, 865 size_t *offset) 866 { 867 struct drm_i915_private *dev_priv = stream->dev_priv; 868 u32 oastatus; 869 int ret; 870 871 if (WARN_ON(!stream->oa_buffer.vaddr)) 872 return -EIO; 873 874 oastatus = I915_READ(GEN8_OASTATUS); 875 876 /* 877 * We treat OABUFFER_OVERFLOW as a significant error: 878 * 879 * Although theoretically we could handle this more gracefully 880 * sometimes, some Gens don't correctly suppress certain 881 * automatically triggered reports in this condition and so we 882 * have to assume that old reports are now being trampled 883 * over. 884 * 885 * Considering how we don't currently give userspace control 886 * over the OA buffer size and always configure a large 16MB 887 * buffer, then a buffer overflow does anyway likely indicate 888 * that something has gone quite badly wrong. 889 */ 890 if (oastatus & GEN8_OASTATUS_OABUFFER_OVERFLOW) { 891 ret = append_oa_status(stream, buf, count, offset, 892 DRM_I915_PERF_RECORD_OA_BUFFER_LOST); 893 if (ret) 894 return ret; 895 896 DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", 897 stream->period_exponent); 898 899 dev_priv->perf.ops.oa_disable(stream); 900 dev_priv->perf.ops.oa_enable(stream); 901 902 /* 903 * Note: .oa_enable() is expected to re-init the oabuffer and 904 * reset GEN8_OASTATUS for us 905 */ 906 oastatus = I915_READ(GEN8_OASTATUS); 907 } 908 909 if (oastatus & GEN8_OASTATUS_REPORT_LOST) { 910 ret = append_oa_status(stream, buf, count, offset, 911 DRM_I915_PERF_RECORD_OA_REPORT_LOST); 912 if (ret) 913 return ret; 914 I915_WRITE(GEN8_OASTATUS, 915 oastatus & ~GEN8_OASTATUS_REPORT_LOST); 916 } 917 918 return gen8_append_oa_reports(stream, buf, count, offset); 919 } 920 921 /** 922 * Copies all buffered OA reports into userspace read() buffer. 923 * @stream: An i915-perf stream opened for OA metrics 924 * @buf: destination buffer given by userspace 925 * @count: the number of bytes userspace wants to read 926 * @offset: (inout): the current position for writing into @buf 927 * 928 * Notably any error condition resulting in a short read (-%ENOSPC or 929 * -%EFAULT) will be returned even though one or more records may 930 * have been successfully copied. In this case it's up to the caller 931 * to decide if the error should be squashed before returning to 932 * userspace. 933 * 934 * Note: reports are consumed from the head, and appended to the 935 * tail, so the tail chases the head?... If you think that's mad 936 * and back-to-front you're not alone, but this follows the 937 * Gen PRM naming convention. 938 * 939 * Returns: 0 on success, negative error code on failure. 940 */ 941 static int gen7_append_oa_reports(struct i915_perf_stream *stream, 942 char __user *buf, 943 size_t count, 944 size_t *offset) 945 { 946 struct drm_i915_private *dev_priv = stream->dev_priv; 947 int report_size = stream->oa_buffer.format_size; 948 u8 *oa_buf_base = stream->oa_buffer.vaddr; 949 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); 950 u32 mask = (OA_BUFFER_SIZE - 1); 951 size_t start_offset = *offset; 952 unsigned long flags; 953 unsigned int aged_tail_idx; 954 u32 head, tail; 955 u32 taken; 956 int ret = 0; 957 958 if (WARN_ON(!stream->enabled)) 959 return -EIO; 960 961 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 962 963 head = stream->oa_buffer.head; 964 aged_tail_idx = stream->oa_buffer.aged_tail_idx; 965 tail = stream->oa_buffer.tails[aged_tail_idx].offset; 966 967 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 968 969 /* An invalid tail pointer here means we're still waiting for the poll 970 * hrtimer callback to give us a pointer 971 */ 972 if (tail == INVALID_TAIL_PTR) 973 return -EAGAIN; 974 975 /* NB: oa_buffer.head/tail include the gtt_offset which we don't want 976 * while indexing relative to oa_buf_base. 977 */ 978 head -= gtt_offset; 979 tail -= gtt_offset; 980 981 /* An out of bounds or misaligned head or tail pointer implies a driver 982 * bug since we validate + align the tail pointers we read from the 983 * hardware and we are in full control of the head pointer which should 984 * only be incremented by multiples of the report size (notably also 985 * all a power of two). 986 */ 987 if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size || 988 tail > OA_BUFFER_SIZE || tail % report_size, 989 "Inconsistent OA buffer pointers: head = %u, tail = %u\n", 990 head, tail)) 991 return -EIO; 992 993 994 for (/* none */; 995 (taken = OA_TAKEN(tail, head)); 996 head = (head + report_size) & mask) { 997 u8 *report = oa_buf_base + head; 998 u32 *report32 = (void *)report; 999 1000 /* All the report sizes factor neatly into the buffer 1001 * size so we never expect to see a report split 1002 * between the beginning and end of the buffer. 1003 * 1004 * Given the initial alignment check a misalignment 1005 * here would imply a driver bug that would result 1006 * in an overrun. 1007 */ 1008 if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) { 1009 DRM_ERROR("Spurious OA head ptr: non-integral report offset\n"); 1010 break; 1011 } 1012 1013 /* The report-ID field for periodic samples includes 1014 * some undocumented flags related to what triggered 1015 * the report and is never expected to be zero so we 1016 * can check that the report isn't invalid before 1017 * copying it to userspace... 1018 */ 1019 if (report32[0] == 0) { 1020 if (__ratelimit(&dev_priv->perf.spurious_report_rs)) 1021 DRM_NOTE("Skipping spurious, invalid OA report\n"); 1022 continue; 1023 } 1024 1025 ret = append_oa_sample(stream, buf, count, offset, report); 1026 if (ret) 1027 break; 1028 1029 /* The above report-id field sanity check is based on 1030 * the assumption that the OA buffer is initially 1031 * zeroed and we reset the field after copying so the 1032 * check is still meaningful once old reports start 1033 * being overwritten. 1034 */ 1035 report32[0] = 0; 1036 } 1037 1038 if (start_offset != *offset) { 1039 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 1040 1041 /* We removed the gtt_offset for the copy loop above, indexing 1042 * relative to oa_buf_base so put back here... 1043 */ 1044 head += gtt_offset; 1045 1046 I915_WRITE(GEN7_OASTATUS2, 1047 ((head & GEN7_OASTATUS2_HEAD_MASK) | 1048 GEN7_OASTATUS2_MEM_SELECT_GGTT)); 1049 stream->oa_buffer.head = head; 1050 1051 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 1052 } 1053 1054 return ret; 1055 } 1056 1057 /** 1058 * gen7_oa_read - copy status records then buffered OA reports 1059 * @stream: An i915-perf stream opened for OA metrics 1060 * @buf: destination buffer given by userspace 1061 * @count: the number of bytes userspace wants to read 1062 * @offset: (inout): the current position for writing into @buf 1063 * 1064 * Checks Gen 7 specific OA unit status registers and if necessary appends 1065 * corresponding status records for userspace (such as for a buffer full 1066 * condition) and then initiate appending any buffered OA reports. 1067 * 1068 * Updates @offset according to the number of bytes successfully copied into 1069 * the userspace buffer. 1070 * 1071 * Returns: zero on success or a negative error code 1072 */ 1073 static int gen7_oa_read(struct i915_perf_stream *stream, 1074 char __user *buf, 1075 size_t count, 1076 size_t *offset) 1077 { 1078 struct drm_i915_private *dev_priv = stream->dev_priv; 1079 u32 oastatus1; 1080 int ret; 1081 1082 if (WARN_ON(!stream->oa_buffer.vaddr)) 1083 return -EIO; 1084 1085 oastatus1 = I915_READ(GEN7_OASTATUS1); 1086 1087 /* XXX: On Haswell we don't have a safe way to clear oastatus1 1088 * bits while the OA unit is enabled (while the tail pointer 1089 * may be updated asynchronously) so we ignore status bits 1090 * that have already been reported to userspace. 1091 */ 1092 oastatus1 &= ~dev_priv->perf.gen7_latched_oastatus1; 1093 1094 /* We treat OABUFFER_OVERFLOW as a significant error: 1095 * 1096 * - The status can be interpreted to mean that the buffer is 1097 * currently full (with a higher precedence than OA_TAKEN() 1098 * which will start to report a near-empty buffer after an 1099 * overflow) but it's awkward that we can't clear the status 1100 * on Haswell, so without a reset we won't be able to catch 1101 * the state again. 1102 * 1103 * - Since it also implies the HW has started overwriting old 1104 * reports it may also affect our sanity checks for invalid 1105 * reports when copying to userspace that assume new reports 1106 * are being written to cleared memory. 1107 * 1108 * - In the future we may want to introduce a flight recorder 1109 * mode where the driver will automatically maintain a safe 1110 * guard band between head/tail, avoiding this overflow 1111 * condition, but we avoid the added driver complexity for 1112 * now. 1113 */ 1114 if (unlikely(oastatus1 & GEN7_OASTATUS1_OABUFFER_OVERFLOW)) { 1115 ret = append_oa_status(stream, buf, count, offset, 1116 DRM_I915_PERF_RECORD_OA_BUFFER_LOST); 1117 if (ret) 1118 return ret; 1119 1120 DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", 1121 stream->period_exponent); 1122 1123 dev_priv->perf.ops.oa_disable(stream); 1124 dev_priv->perf.ops.oa_enable(stream); 1125 1126 oastatus1 = I915_READ(GEN7_OASTATUS1); 1127 } 1128 1129 if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) { 1130 ret = append_oa_status(stream, buf, count, offset, 1131 DRM_I915_PERF_RECORD_OA_REPORT_LOST); 1132 if (ret) 1133 return ret; 1134 dev_priv->perf.gen7_latched_oastatus1 |= 1135 GEN7_OASTATUS1_REPORT_LOST; 1136 } 1137 1138 return gen7_append_oa_reports(stream, buf, count, offset); 1139 } 1140 1141 /** 1142 * i915_oa_wait_unlocked - handles blocking IO until OA data available 1143 * @stream: An i915-perf stream opened for OA metrics 1144 * 1145 * Called when userspace tries to read() from a blocking stream FD opened 1146 * for OA metrics. It waits until the hrtimer callback finds a non-empty 1147 * OA buffer and wakes us. 1148 * 1149 * Note: it's acceptable to have this return with some false positives 1150 * since any subsequent read handling will return -EAGAIN if there isn't 1151 * really data ready for userspace yet. 1152 * 1153 * Returns: zero on success or a negative error code 1154 */ 1155 static int i915_oa_wait_unlocked(struct i915_perf_stream *stream) 1156 { 1157 /* We would wait indefinitely if periodic sampling is not enabled */ 1158 if (!stream->periodic) 1159 return -EIO; 1160 1161 return wait_event_interruptible(stream->poll_wq, 1162 oa_buffer_check_unlocked(stream)); 1163 } 1164 1165 /** 1166 * i915_oa_poll_wait - call poll_wait() for an OA stream poll() 1167 * @stream: An i915-perf stream opened for OA metrics 1168 * @file: An i915 perf stream file 1169 * @wait: poll() state table 1170 * 1171 * For handling userspace polling on an i915 perf stream opened for OA metrics, 1172 * this starts a poll_wait with the wait queue that our hrtimer callback wakes 1173 * when it sees data ready to read in the circular OA buffer. 1174 */ 1175 static void i915_oa_poll_wait(struct i915_perf_stream *stream, 1176 struct file *file, 1177 poll_table *wait) 1178 { 1179 poll_wait(file, &stream->poll_wq, wait); 1180 } 1181 1182 /** 1183 * i915_oa_read - just calls through to &i915_oa_ops->read 1184 * @stream: An i915-perf stream opened for OA metrics 1185 * @buf: destination buffer given by userspace 1186 * @count: the number of bytes userspace wants to read 1187 * @offset: (inout): the current position for writing into @buf 1188 * 1189 * Updates @offset according to the number of bytes successfully copied into 1190 * the userspace buffer. 1191 * 1192 * Returns: zero on success or a negative error code 1193 */ 1194 static int i915_oa_read(struct i915_perf_stream *stream, 1195 char __user *buf, 1196 size_t count, 1197 size_t *offset) 1198 { 1199 struct drm_i915_private *dev_priv = stream->dev_priv; 1200 1201 return dev_priv->perf.ops.read(stream, buf, count, offset); 1202 } 1203 1204 static struct intel_context *oa_pin_context(struct i915_perf_stream *stream) 1205 { 1206 struct i915_gem_engines_iter it; 1207 struct i915_gem_context *ctx = stream->ctx; 1208 struct intel_context *ce; 1209 int err; 1210 1211 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { 1212 if (ce->engine->class != RENDER_CLASS) 1213 continue; 1214 1215 /* 1216 * As the ID is the gtt offset of the context's vma we 1217 * pin the vma to ensure the ID remains fixed. 1218 */ 1219 err = intel_context_pin(ce); 1220 if (err == 0) { 1221 stream->pinned_ctx = ce; 1222 break; 1223 } 1224 } 1225 i915_gem_context_unlock_engines(ctx); 1226 1227 return stream->pinned_ctx; 1228 } 1229 1230 /** 1231 * oa_get_render_ctx_id - determine and hold ctx hw id 1232 * @stream: An i915-perf stream opened for OA metrics 1233 * 1234 * Determine the render context hw id, and ensure it remains fixed for the 1235 * lifetime of the stream. This ensures that we don't have to worry about 1236 * updating the context ID in OACONTROL on the fly. 1237 * 1238 * Returns: zero on success or a negative error code 1239 */ 1240 static int oa_get_render_ctx_id(struct i915_perf_stream *stream) 1241 { 1242 struct drm_i915_private *i915 = stream->dev_priv; 1243 struct intel_context *ce; 1244 1245 ce = oa_pin_context(stream); 1246 if (IS_ERR(ce)) 1247 return PTR_ERR(ce); 1248 1249 switch (INTEL_GEN(i915)) { 1250 case 7: { 1251 /* 1252 * On Haswell we don't do any post processing of the reports 1253 * and don't need to use the mask. 1254 */ 1255 stream->specific_ctx_id = i915_ggtt_offset(ce->state); 1256 stream->specific_ctx_id_mask = 0; 1257 break; 1258 } 1259 1260 case 8: 1261 case 9: 1262 case 10: 1263 if (USES_GUC_SUBMISSION(i915)) { 1264 /* 1265 * When using GuC, the context descriptor we write in 1266 * i915 is read by GuC and rewritten before it's 1267 * actually written into the hardware. The LRCA is 1268 * what is put into the context id field of the 1269 * context descriptor by GuC. Because it's aligned to 1270 * a page, the lower 12bits are always at 0 and 1271 * dropped by GuC. They won't be part of the context 1272 * ID in the OA reports, so squash those lower bits. 1273 */ 1274 stream->specific_ctx_id = 1275 lower_32_bits(ce->lrc_desc) >> 12; 1276 1277 /* 1278 * GuC uses the top bit to signal proxy submission, so 1279 * ignore that bit. 1280 */ 1281 stream->specific_ctx_id_mask = 1282 (1U << (GEN8_CTX_ID_WIDTH - 1)) - 1; 1283 } else { 1284 stream->specific_ctx_id_mask = 1285 (1U << GEN8_CTX_ID_WIDTH) - 1; 1286 stream->specific_ctx_id = stream->specific_ctx_id_mask; 1287 } 1288 break; 1289 1290 case 11: 1291 case 12: { 1292 stream->specific_ctx_id_mask = 1293 ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32); 1294 stream->specific_ctx_id = stream->specific_ctx_id_mask; 1295 break; 1296 } 1297 1298 default: 1299 MISSING_CASE(INTEL_GEN(i915)); 1300 } 1301 1302 ce->tag = stream->specific_ctx_id_mask; 1303 1304 DRM_DEBUG_DRIVER("filtering on ctx_id=0x%x ctx_id_mask=0x%x\n", 1305 stream->specific_ctx_id, 1306 stream->specific_ctx_id_mask); 1307 1308 return 0; 1309 } 1310 1311 /** 1312 * oa_put_render_ctx_id - counterpart to oa_get_render_ctx_id releases hold 1313 * @stream: An i915-perf stream opened for OA metrics 1314 * 1315 * In case anything needed doing to ensure the context HW ID would remain valid 1316 * for the lifetime of the stream, then that can be undone here. 1317 */ 1318 static void oa_put_render_ctx_id(struct i915_perf_stream *stream) 1319 { 1320 struct intel_context *ce; 1321 1322 ce = fetch_and_zero(&stream->pinned_ctx); 1323 if (ce) { 1324 ce->tag = 0; /* recomputed on next submission after parking */ 1325 intel_context_unpin(ce); 1326 } 1327 1328 stream->specific_ctx_id = INVALID_CTX_ID; 1329 stream->specific_ctx_id_mask = 0; 1330 } 1331 1332 static void 1333 free_oa_buffer(struct i915_perf_stream *stream) 1334 { 1335 i915_vma_unpin_and_release(&stream->oa_buffer.vma, 1336 I915_VMA_RELEASE_MAP); 1337 1338 stream->oa_buffer.vaddr = NULL; 1339 } 1340 1341 static void i915_oa_stream_destroy(struct i915_perf_stream *stream) 1342 { 1343 struct drm_i915_private *dev_priv = stream->dev_priv; 1344 1345 BUG_ON(stream != dev_priv->perf.exclusive_stream); 1346 1347 /* 1348 * Unset exclusive_stream first, it will be checked while disabling 1349 * the metric set on gen8+. 1350 */ 1351 mutex_lock(&dev_priv->drm.struct_mutex); 1352 dev_priv->perf.exclusive_stream = NULL; 1353 dev_priv->perf.ops.disable_metric_set(stream); 1354 mutex_unlock(&dev_priv->drm.struct_mutex); 1355 1356 free_oa_buffer(stream); 1357 1358 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); 1359 intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref); 1360 1361 if (stream->ctx) 1362 oa_put_render_ctx_id(stream); 1363 1364 put_oa_config(dev_priv, stream->oa_config); 1365 1366 if (dev_priv->perf.spurious_report_rs.missed) { 1367 DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n", 1368 dev_priv->perf.spurious_report_rs.missed); 1369 } 1370 } 1371 1372 static void gen7_init_oa_buffer(struct i915_perf_stream *stream) 1373 { 1374 struct drm_i915_private *dev_priv = stream->dev_priv; 1375 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); 1376 unsigned long flags; 1377 1378 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 1379 1380 /* Pre-DevBDW: OABUFFER must be set with counters off, 1381 * before OASTATUS1, but after OASTATUS2 1382 */ 1383 I915_WRITE(GEN7_OASTATUS2, 1384 gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT); /* head */ 1385 stream->oa_buffer.head = gtt_offset; 1386 1387 I915_WRITE(GEN7_OABUFFER, gtt_offset); 1388 1389 I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */ 1390 1391 /* Mark that we need updated tail pointers to read from... */ 1392 stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR; 1393 stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR; 1394 1395 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 1396 1397 /* On Haswell we have to track which OASTATUS1 flags we've 1398 * already seen since they can't be cleared while periodic 1399 * sampling is enabled. 1400 */ 1401 dev_priv->perf.gen7_latched_oastatus1 = 0; 1402 1403 /* NB: although the OA buffer will initially be allocated 1404 * zeroed via shmfs (and so this memset is redundant when 1405 * first allocating), we may re-init the OA buffer, either 1406 * when re-enabling a stream or in error/reset paths. 1407 * 1408 * The reason we clear the buffer for each re-init is for the 1409 * sanity check in gen7_append_oa_reports() that looks at the 1410 * report-id field to make sure it's non-zero which relies on 1411 * the assumption that new reports are being written to zeroed 1412 * memory... 1413 */ 1414 memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE); 1415 1416 /* Maybe make ->pollin per-stream state if we support multiple 1417 * concurrent streams in the future. 1418 */ 1419 stream->pollin = false; 1420 } 1421 1422 static void gen8_init_oa_buffer(struct i915_perf_stream *stream) 1423 { 1424 struct drm_i915_private *dev_priv = stream->dev_priv; 1425 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); 1426 unsigned long flags; 1427 1428 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 1429 1430 I915_WRITE(GEN8_OASTATUS, 0); 1431 I915_WRITE(GEN8_OAHEADPTR, gtt_offset); 1432 stream->oa_buffer.head = gtt_offset; 1433 1434 I915_WRITE(GEN8_OABUFFER_UDW, 0); 1435 1436 /* 1437 * PRM says: 1438 * 1439 * "This MMIO must be set before the OATAILPTR 1440 * register and after the OAHEADPTR register. This is 1441 * to enable proper functionality of the overflow 1442 * bit." 1443 */ 1444 I915_WRITE(GEN8_OABUFFER, gtt_offset | 1445 OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT); 1446 I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK); 1447 1448 /* Mark that we need updated tail pointers to read from... */ 1449 stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR; 1450 stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR; 1451 1452 /* 1453 * Reset state used to recognise context switches, affecting which 1454 * reports we will forward to userspace while filtering for a single 1455 * context. 1456 */ 1457 stream->oa_buffer.last_ctx_id = INVALID_CTX_ID; 1458 1459 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 1460 1461 /* 1462 * NB: although the OA buffer will initially be allocated 1463 * zeroed via shmfs (and so this memset is redundant when 1464 * first allocating), we may re-init the OA buffer, either 1465 * when re-enabling a stream or in error/reset paths. 1466 * 1467 * The reason we clear the buffer for each re-init is for the 1468 * sanity check in gen8_append_oa_reports() that looks at the 1469 * reason field to make sure it's non-zero which relies on 1470 * the assumption that new reports are being written to zeroed 1471 * memory... 1472 */ 1473 memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE); 1474 1475 /* 1476 * Maybe make ->pollin per-stream state if we support multiple 1477 * concurrent streams in the future. 1478 */ 1479 stream->pollin = false; 1480 } 1481 1482 static int alloc_oa_buffer(struct i915_perf_stream *stream) 1483 { 1484 struct drm_i915_gem_object *bo; 1485 struct drm_i915_private *dev_priv = stream->dev_priv; 1486 struct i915_vma *vma; 1487 int ret; 1488 1489 if (WARN_ON(stream->oa_buffer.vma)) 1490 return -ENODEV; 1491 1492 BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE); 1493 BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M); 1494 1495 bo = i915_gem_object_create_shmem(dev_priv, OA_BUFFER_SIZE); 1496 if (IS_ERR(bo)) { 1497 DRM_ERROR("Failed to allocate OA buffer\n"); 1498 return PTR_ERR(bo); 1499 } 1500 1501 i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC); 1502 1503 /* PreHSW required 512K alignment, HSW requires 16M */ 1504 vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0); 1505 if (IS_ERR(vma)) { 1506 ret = PTR_ERR(vma); 1507 goto err_unref; 1508 } 1509 stream->oa_buffer.vma = vma; 1510 1511 stream->oa_buffer.vaddr = 1512 i915_gem_object_pin_map(bo, I915_MAP_WB); 1513 if (IS_ERR(stream->oa_buffer.vaddr)) { 1514 ret = PTR_ERR(stream->oa_buffer.vaddr); 1515 goto err_unpin; 1516 } 1517 1518 DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n", 1519 i915_ggtt_offset(stream->oa_buffer.vma), 1520 stream->oa_buffer.vaddr); 1521 1522 return 0; 1523 1524 err_unpin: 1525 __i915_vma_unpin(vma); 1526 1527 err_unref: 1528 i915_gem_object_put(bo); 1529 1530 stream->oa_buffer.vaddr = NULL; 1531 stream->oa_buffer.vma = NULL; 1532 1533 return ret; 1534 } 1535 1536 static void config_oa_regs(struct drm_i915_private *dev_priv, 1537 const struct i915_oa_reg *regs, 1538 u32 n_regs) 1539 { 1540 u32 i; 1541 1542 for (i = 0; i < n_regs; i++) { 1543 const struct i915_oa_reg *reg = regs + i; 1544 1545 I915_WRITE(reg->addr, reg->value); 1546 } 1547 } 1548 1549 static void delay_after_mux(void) 1550 { 1551 /* 1552 * It apparently takes a fairly long time for a new MUX 1553 * configuration to be be applied after these register writes. 1554 * This delay duration was derived empirically based on the 1555 * render_basic config but hopefully it covers the maximum 1556 * configuration latency. 1557 * 1558 * As a fallback, the checks in _append_oa_reports() to skip 1559 * invalid OA reports do also seem to work to discard reports 1560 * generated before this config has completed - albeit not 1561 * silently. 1562 * 1563 * Unfortunately this is essentially a magic number, since we 1564 * don't currently know of a reliable mechanism for predicting 1565 * how long the MUX config will take to apply and besides 1566 * seeing invalid reports we don't know of a reliable way to 1567 * explicitly check that the MUX config has landed. 1568 * 1569 * It's even possible we've miss characterized the underlying 1570 * problem - it just seems like the simplest explanation why 1571 * a delay at this location would mitigate any invalid reports. 1572 */ 1573 usleep_range(15000, 20000); 1574 } 1575 1576 static int hsw_enable_metric_set(struct i915_perf_stream *stream) 1577 { 1578 struct drm_i915_private *dev_priv = stream->dev_priv; 1579 const struct i915_oa_config *oa_config = stream->oa_config; 1580 1581 /* 1582 * PRM: 1583 * 1584 * OA unit is using “crclk” for its functionality. When trunk 1585 * level clock gating takes place, OA clock would be gated, 1586 * unable to count the events from non-render clock domain. 1587 * Render clock gating must be disabled when OA is enabled to 1588 * count the events from non-render domain. Unit level clock 1589 * gating for RCS should also be disabled. 1590 */ 1591 I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & 1592 ~GEN7_DOP_CLOCK_GATE_ENABLE)); 1593 I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) | 1594 GEN6_CSUNIT_CLOCK_GATE_DISABLE)); 1595 1596 config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len); 1597 delay_after_mux(); 1598 1599 config_oa_regs(dev_priv, oa_config->b_counter_regs, 1600 oa_config->b_counter_regs_len); 1601 1602 return 0; 1603 } 1604 1605 static void hsw_disable_metric_set(struct i915_perf_stream *stream) 1606 { 1607 struct drm_i915_private *dev_priv = stream->dev_priv; 1608 1609 I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) & 1610 ~GEN6_CSUNIT_CLOCK_GATE_DISABLE)); 1611 I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) | 1612 GEN7_DOP_CLOCK_GATE_ENABLE)); 1613 1614 I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) & 1615 ~GT_NOA_ENABLE)); 1616 } 1617 1618 static u32 oa_config_flex_reg(const struct i915_oa_config *oa_config, 1619 i915_reg_t reg) 1620 { 1621 u32 mmio = i915_mmio_reg_offset(reg); 1622 int i; 1623 1624 /* 1625 * This arbitrary default will select the 'EU FPU0 Pipeline 1626 * Active' event. In the future it's anticipated that there 1627 * will be an explicit 'No Event' we can select, but not yet... 1628 */ 1629 if (!oa_config) 1630 return 0; 1631 1632 for (i = 0; i < oa_config->flex_regs_len; i++) { 1633 if (i915_mmio_reg_offset(oa_config->flex_regs[i].addr) == mmio) 1634 return oa_config->flex_regs[i].value; 1635 } 1636 1637 return 0; 1638 } 1639 /* 1640 * NB: It must always remain pointer safe to run this even if the OA unit 1641 * has been disabled. 1642 * 1643 * It's fine to put out-of-date values into these per-context registers 1644 * in the case that the OA unit has been disabled. 1645 */ 1646 static void 1647 gen8_update_reg_state_unlocked(const struct intel_context *ce, 1648 const struct i915_perf_stream *stream) 1649 { 1650 struct drm_i915_private *i915 = ce->engine->i915; 1651 u32 ctx_oactxctrl = i915->perf.ctx_oactxctrl_offset; 1652 u32 ctx_flexeu0 = i915->perf.ctx_flexeu0_offset; 1653 /* The MMIO offsets for Flex EU registers aren't contiguous */ 1654 i915_reg_t flex_regs[] = { 1655 EU_PERF_CNTL0, 1656 EU_PERF_CNTL1, 1657 EU_PERF_CNTL2, 1658 EU_PERF_CNTL3, 1659 EU_PERF_CNTL4, 1660 EU_PERF_CNTL5, 1661 EU_PERF_CNTL6, 1662 }; 1663 u32 *reg_state = ce->lrc_reg_state; 1664 int i; 1665 1666 reg_state[ctx_oactxctrl + 1] = 1667 (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | 1668 (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | 1669 GEN8_OA_COUNTER_RESUME; 1670 1671 for (i = 0; i < ARRAY_SIZE(flex_regs); i++) 1672 reg_state[ctx_flexeu0 + i * 2 + 1] = 1673 oa_config_flex_reg(stream->oa_config, flex_regs[i]); 1674 1675 reg_state[CTX_R_PWR_CLK_STATE] = intel_sseu_make_rpcs(i915, &ce->sseu); 1676 } 1677 1678 struct flex { 1679 i915_reg_t reg; 1680 u32 offset; 1681 u32 value; 1682 }; 1683 1684 static int 1685 gen8_store_flex(struct i915_request *rq, 1686 struct intel_context *ce, 1687 const struct flex *flex, unsigned int count) 1688 { 1689 u32 offset; 1690 u32 *cs; 1691 1692 cs = intel_ring_begin(rq, 4 * count); 1693 if (IS_ERR(cs)) 1694 return PTR_ERR(cs); 1695 1696 offset = i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE; 1697 do { 1698 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1699 *cs++ = offset + flex->offset * sizeof(u32); 1700 *cs++ = 0; 1701 *cs++ = flex->value; 1702 } while (flex++, --count); 1703 1704 intel_ring_advance(rq, cs); 1705 1706 return 0; 1707 } 1708 1709 static int 1710 gen8_load_flex(struct i915_request *rq, 1711 struct intel_context *ce, 1712 const struct flex *flex, unsigned int count) 1713 { 1714 u32 *cs; 1715 1716 GEM_BUG_ON(!count || count > 63); 1717 1718 cs = intel_ring_begin(rq, 2 * count + 2); 1719 if (IS_ERR(cs)) 1720 return PTR_ERR(cs); 1721 1722 *cs++ = MI_LOAD_REGISTER_IMM(count); 1723 do { 1724 *cs++ = i915_mmio_reg_offset(flex->reg); 1725 *cs++ = flex->value; 1726 } while (flex++, --count); 1727 *cs++ = MI_NOOP; 1728 1729 intel_ring_advance(rq, cs); 1730 1731 return 0; 1732 } 1733 1734 static int gen8_modify_context(struct intel_context *ce, 1735 const struct flex *flex, unsigned int count) 1736 { 1737 struct i915_request *rq; 1738 int err; 1739 1740 lockdep_assert_held(&ce->pin_mutex); 1741 1742 rq = i915_request_create(ce->engine->kernel_context); 1743 if (IS_ERR(rq)) 1744 return PTR_ERR(rq); 1745 1746 /* Serialise with the remote context */ 1747 err = intel_context_prepare_remote_request(ce, rq); 1748 if (err == 0) 1749 err = gen8_store_flex(rq, ce, flex, count); 1750 1751 i915_request_add(rq); 1752 return err; 1753 } 1754 1755 static int gen8_modify_self(struct intel_context *ce, 1756 const struct flex *flex, unsigned int count) 1757 { 1758 struct i915_request *rq; 1759 int err; 1760 1761 rq = i915_request_create(ce); 1762 if (IS_ERR(rq)) 1763 return PTR_ERR(rq); 1764 1765 err = gen8_load_flex(rq, ce, flex, count); 1766 1767 i915_request_add(rq); 1768 return err; 1769 } 1770 1771 static int gen8_configure_context(struct i915_gem_context *ctx, 1772 struct flex *flex, unsigned int count) 1773 { 1774 struct i915_gem_engines_iter it; 1775 struct intel_context *ce; 1776 int err = 0; 1777 1778 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { 1779 GEM_BUG_ON(ce == ce->engine->kernel_context); 1780 1781 if (ce->engine->class != RENDER_CLASS) 1782 continue; 1783 1784 err = intel_context_lock_pinned(ce); 1785 if (err) 1786 break; 1787 1788 flex->value = intel_sseu_make_rpcs(ctx->i915, &ce->sseu); 1789 1790 /* Otherwise OA settings will be set upon first use */ 1791 if (intel_context_is_pinned(ce)) 1792 err = gen8_modify_context(ce, flex, count); 1793 1794 intel_context_unlock_pinned(ce); 1795 if (err) 1796 break; 1797 } 1798 i915_gem_context_unlock_engines(ctx); 1799 1800 return err; 1801 } 1802 1803 /* 1804 * Manages updating the per-context aspects of the OA stream 1805 * configuration across all contexts. 1806 * 1807 * The awkward consideration here is that OACTXCONTROL controls the 1808 * exponent for periodic sampling which is primarily used for system 1809 * wide profiling where we'd like a consistent sampling period even in 1810 * the face of context switches. 1811 * 1812 * Our approach of updating the register state context (as opposed to 1813 * say using a workaround batch buffer) ensures that the hardware 1814 * won't automatically reload an out-of-date timer exponent even 1815 * transiently before a WA BB could be parsed. 1816 * 1817 * This function needs to: 1818 * - Ensure the currently running context's per-context OA state is 1819 * updated 1820 * - Ensure that all existing contexts will have the correct per-context 1821 * OA state if they are scheduled for use. 1822 * - Ensure any new contexts will be initialized with the correct 1823 * per-context OA state. 1824 * 1825 * Note: it's only the RCS/Render context that has any OA state. 1826 */ 1827 static int gen8_configure_all_contexts(struct i915_perf_stream *stream, 1828 const struct i915_oa_config *oa_config) 1829 { 1830 struct drm_i915_private *i915 = stream->dev_priv; 1831 /* The MMIO offsets for Flex EU registers aren't contiguous */ 1832 const u32 ctx_flexeu0 = i915->perf.ctx_flexeu0_offset; 1833 #define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N) + 1) 1834 struct flex regs[] = { 1835 { 1836 GEN8_R_PWR_CLK_STATE, 1837 CTX_R_PWR_CLK_STATE, 1838 }, 1839 { 1840 GEN8_OACTXCONTROL, 1841 i915->perf.ctx_oactxctrl_offset + 1, 1842 ((stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | 1843 (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | 1844 GEN8_OA_COUNTER_RESUME) 1845 }, 1846 { EU_PERF_CNTL0, ctx_flexeuN(0) }, 1847 { EU_PERF_CNTL1, ctx_flexeuN(1) }, 1848 { EU_PERF_CNTL2, ctx_flexeuN(2) }, 1849 { EU_PERF_CNTL3, ctx_flexeuN(3) }, 1850 { EU_PERF_CNTL4, ctx_flexeuN(4) }, 1851 { EU_PERF_CNTL5, ctx_flexeuN(5) }, 1852 { EU_PERF_CNTL6, ctx_flexeuN(6) }, 1853 }; 1854 #undef ctx_flexeuN 1855 struct intel_engine_cs *engine; 1856 struct i915_gem_context *ctx, *cn; 1857 int i, err; 1858 1859 for (i = 2; i < ARRAY_SIZE(regs); i++) 1860 regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg); 1861 1862 lockdep_assert_held(&i915->drm.struct_mutex); 1863 1864 /* 1865 * The OA register config is setup through the context image. This image 1866 * might be written to by the GPU on context switch (in particular on 1867 * lite-restore). This means we can't safely update a context's image, 1868 * if this context is scheduled/submitted to run on the GPU. 1869 * 1870 * We could emit the OA register config through the batch buffer but 1871 * this might leave small interval of time where the OA unit is 1872 * configured at an invalid sampling period. 1873 * 1874 * Note that since we emit all requests from a single ring, there 1875 * is still an implicit global barrier here that may cause a high 1876 * priority context to wait for an otherwise independent low priority 1877 * context. Contexts idle at the time of reconfiguration are not 1878 * trapped behind the barrier. 1879 */ 1880 spin_lock(&i915->gem.contexts.lock); 1881 list_for_each_entry_safe(ctx, cn, &i915->gem.contexts.list, link) { 1882 if (ctx == i915->kernel_context) 1883 continue; 1884 1885 if (!kref_get_unless_zero(&ctx->ref)) 1886 continue; 1887 1888 spin_unlock(&i915->gem.contexts.lock); 1889 1890 err = gen8_configure_context(ctx, regs, ARRAY_SIZE(regs)); 1891 if (err) { 1892 i915_gem_context_put(ctx); 1893 return err; 1894 } 1895 1896 spin_lock(&i915->gem.contexts.lock); 1897 list_safe_reset_next(ctx, cn, link); 1898 i915_gem_context_put(ctx); 1899 } 1900 spin_unlock(&i915->gem.contexts.lock); 1901 1902 /* 1903 * After updating all other contexts, we need to modify ourselves. 1904 * If we don't modify the kernel_context, we do not get events while 1905 * idle. 1906 */ 1907 for_each_uabi_engine(engine, i915) { 1908 struct intel_context *ce = engine->kernel_context; 1909 1910 if (engine->class != RENDER_CLASS) 1911 continue; 1912 1913 regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu); 1914 1915 err = gen8_modify_self(ce, regs, ARRAY_SIZE(regs)); 1916 if (err) 1917 return err; 1918 } 1919 1920 return 0; 1921 } 1922 1923 static int gen8_enable_metric_set(struct i915_perf_stream *stream) 1924 { 1925 struct drm_i915_private *dev_priv = stream->dev_priv; 1926 const struct i915_oa_config *oa_config = stream->oa_config; 1927 int ret; 1928 1929 /* 1930 * We disable slice/unslice clock ratio change reports on SKL since 1931 * they are too noisy. The HW generates a lot of redundant reports 1932 * where the ratio hasn't really changed causing a lot of redundant 1933 * work to processes and increasing the chances we'll hit buffer 1934 * overruns. 1935 * 1936 * Although we don't currently use the 'disable overrun' OABUFFER 1937 * feature it's worth noting that clock ratio reports have to be 1938 * disabled before considering to use that feature since the HW doesn't 1939 * correctly block these reports. 1940 * 1941 * Currently none of the high-level metrics we have depend on knowing 1942 * this ratio to normalize. 1943 * 1944 * Note: This register is not power context saved and restored, but 1945 * that's OK considering that we disable RC6 while the OA unit is 1946 * enabled. 1947 * 1948 * The _INCLUDE_CLK_RATIO bit allows the slice/unslice frequency to 1949 * be read back from automatically triggered reports, as part of the 1950 * RPT_ID field. 1951 */ 1952 if (IS_GEN_RANGE(dev_priv, 9, 11)) { 1953 I915_WRITE(GEN8_OA_DEBUG, 1954 _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | 1955 GEN9_OA_DEBUG_INCLUDE_CLK_RATIO)); 1956 } 1957 1958 /* 1959 * Update all contexts prior writing the mux configurations as we need 1960 * to make sure all slices/subslices are ON before writing to NOA 1961 * registers. 1962 */ 1963 ret = gen8_configure_all_contexts(stream, oa_config); 1964 if (ret) 1965 return ret; 1966 1967 config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len); 1968 delay_after_mux(); 1969 1970 config_oa_regs(dev_priv, oa_config->b_counter_regs, 1971 oa_config->b_counter_regs_len); 1972 1973 return 0; 1974 } 1975 1976 static void gen8_disable_metric_set(struct i915_perf_stream *stream) 1977 { 1978 struct drm_i915_private *dev_priv = stream->dev_priv; 1979 1980 /* Reset all contexts' slices/subslices configurations. */ 1981 gen8_configure_all_contexts(stream, NULL); 1982 1983 I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) & 1984 ~GT_NOA_ENABLE)); 1985 } 1986 1987 static void gen10_disable_metric_set(struct i915_perf_stream *stream) 1988 { 1989 struct drm_i915_private *dev_priv = stream->dev_priv; 1990 1991 /* Reset all contexts' slices/subslices configurations. */ 1992 gen8_configure_all_contexts(stream, NULL); 1993 1994 /* Make sure we disable noa to save power. */ 1995 I915_WRITE(RPM_CONFIG1, 1996 I915_READ(RPM_CONFIG1) & ~GEN10_GT_NOA_ENABLE); 1997 } 1998 1999 static void gen7_oa_enable(struct i915_perf_stream *stream) 2000 { 2001 struct drm_i915_private *dev_priv = stream->dev_priv; 2002 struct i915_gem_context *ctx = stream->ctx; 2003 u32 ctx_id = stream->specific_ctx_id; 2004 bool periodic = stream->periodic; 2005 u32 period_exponent = stream->period_exponent; 2006 u32 report_format = stream->oa_buffer.format; 2007 2008 /* 2009 * Reset buf pointers so we don't forward reports from before now. 2010 * 2011 * Think carefully if considering trying to avoid this, since it 2012 * also ensures status flags and the buffer itself are cleared 2013 * in error paths, and we have checks for invalid reports based 2014 * on the assumption that certain fields are written to zeroed 2015 * memory which this helps maintains. 2016 */ 2017 gen7_init_oa_buffer(stream); 2018 2019 I915_WRITE(GEN7_OACONTROL, 2020 (ctx_id & GEN7_OACONTROL_CTX_MASK) | 2021 (period_exponent << 2022 GEN7_OACONTROL_TIMER_PERIOD_SHIFT) | 2023 (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) | 2024 (report_format << GEN7_OACONTROL_FORMAT_SHIFT) | 2025 (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) | 2026 GEN7_OACONTROL_ENABLE); 2027 } 2028 2029 static void gen8_oa_enable(struct i915_perf_stream *stream) 2030 { 2031 struct drm_i915_private *dev_priv = stream->dev_priv; 2032 u32 report_format = stream->oa_buffer.format; 2033 2034 /* 2035 * Reset buf pointers so we don't forward reports from before now. 2036 * 2037 * Think carefully if considering trying to avoid this, since it 2038 * also ensures status flags and the buffer itself are cleared 2039 * in error paths, and we have checks for invalid reports based 2040 * on the assumption that certain fields are written to zeroed 2041 * memory which this helps maintains. 2042 */ 2043 gen8_init_oa_buffer(stream); 2044 2045 /* 2046 * Note: we don't rely on the hardware to perform single context 2047 * filtering and instead filter on the cpu based on the context-id 2048 * field of reports 2049 */ 2050 I915_WRITE(GEN8_OACONTROL, (report_format << 2051 GEN8_OA_REPORT_FORMAT_SHIFT) | 2052 GEN8_OA_COUNTER_ENABLE); 2053 } 2054 2055 /** 2056 * i915_oa_stream_enable - handle `I915_PERF_IOCTL_ENABLE` for OA stream 2057 * @stream: An i915 perf stream opened for OA metrics 2058 * 2059 * [Re]enables hardware periodic sampling according to the period configured 2060 * when opening the stream. This also starts a hrtimer that will periodically 2061 * check for data in the circular OA buffer for notifying userspace (e.g. 2062 * during a read() or poll()). 2063 */ 2064 static void i915_oa_stream_enable(struct i915_perf_stream *stream) 2065 { 2066 struct drm_i915_private *dev_priv = stream->dev_priv; 2067 2068 dev_priv->perf.ops.oa_enable(stream); 2069 2070 if (stream->periodic) 2071 hrtimer_start(&stream->poll_check_timer, 2072 ns_to_ktime(POLL_PERIOD), 2073 HRTIMER_MODE_REL_PINNED); 2074 } 2075 2076 static void gen7_oa_disable(struct i915_perf_stream *stream) 2077 { 2078 struct intel_uncore *uncore = &stream->dev_priv->uncore; 2079 2080 intel_uncore_write(uncore, GEN7_OACONTROL, 0); 2081 if (intel_wait_for_register(uncore, 2082 GEN7_OACONTROL, GEN7_OACONTROL_ENABLE, 0, 2083 50)) 2084 DRM_ERROR("wait for OA to be disabled timed out\n"); 2085 } 2086 2087 static void gen8_oa_disable(struct i915_perf_stream *stream) 2088 { 2089 struct intel_uncore *uncore = &stream->dev_priv->uncore; 2090 2091 intel_uncore_write(uncore, GEN8_OACONTROL, 0); 2092 if (intel_wait_for_register(uncore, 2093 GEN8_OACONTROL, GEN8_OA_COUNTER_ENABLE, 0, 2094 50)) 2095 DRM_ERROR("wait for OA to be disabled timed out\n"); 2096 } 2097 2098 /** 2099 * i915_oa_stream_disable - handle `I915_PERF_IOCTL_DISABLE` for OA stream 2100 * @stream: An i915 perf stream opened for OA metrics 2101 * 2102 * Stops the OA unit from periodically writing counter reports into the 2103 * circular OA buffer. This also stops the hrtimer that periodically checks for 2104 * data in the circular OA buffer, for notifying userspace. 2105 */ 2106 static void i915_oa_stream_disable(struct i915_perf_stream *stream) 2107 { 2108 struct drm_i915_private *dev_priv = stream->dev_priv; 2109 2110 dev_priv->perf.ops.oa_disable(stream); 2111 2112 if (stream->periodic) 2113 hrtimer_cancel(&stream->poll_check_timer); 2114 } 2115 2116 static const struct i915_perf_stream_ops i915_oa_stream_ops = { 2117 .destroy = i915_oa_stream_destroy, 2118 .enable = i915_oa_stream_enable, 2119 .disable = i915_oa_stream_disable, 2120 .wait_unlocked = i915_oa_wait_unlocked, 2121 .poll_wait = i915_oa_poll_wait, 2122 .read = i915_oa_read, 2123 }; 2124 2125 /** 2126 * i915_oa_stream_init - validate combined props for OA stream and init 2127 * @stream: An i915 perf stream 2128 * @param: The open parameters passed to `DRM_I915_PERF_OPEN` 2129 * @props: The property state that configures stream (individually validated) 2130 * 2131 * While read_properties_unlocked() validates properties in isolation it 2132 * doesn't ensure that the combination necessarily makes sense. 2133 * 2134 * At this point it has been determined that userspace wants a stream of 2135 * OA metrics, but still we need to further validate the combined 2136 * properties are OK. 2137 * 2138 * If the configuration makes sense then we can allocate memory for 2139 * a circular OA buffer and apply the requested metric set configuration. 2140 * 2141 * Returns: zero on success or a negative error code. 2142 */ 2143 static int i915_oa_stream_init(struct i915_perf_stream *stream, 2144 struct drm_i915_perf_open_param *param, 2145 struct perf_open_properties *props) 2146 { 2147 struct drm_i915_private *dev_priv = stream->dev_priv; 2148 int format_size; 2149 int ret; 2150 2151 /* If the sysfs metrics/ directory wasn't registered for some 2152 * reason then don't let userspace try their luck with config 2153 * IDs 2154 */ 2155 if (!dev_priv->perf.metrics_kobj) { 2156 DRM_DEBUG("OA metrics weren't advertised via sysfs\n"); 2157 return -EINVAL; 2158 } 2159 2160 if (!(props->sample_flags & SAMPLE_OA_REPORT)) { 2161 DRM_DEBUG("Only OA report sampling supported\n"); 2162 return -EINVAL; 2163 } 2164 2165 if (!dev_priv->perf.ops.enable_metric_set) { 2166 DRM_DEBUG("OA unit not supported\n"); 2167 return -ENODEV; 2168 } 2169 2170 /* To avoid the complexity of having to accurately filter 2171 * counter reports and marshal to the appropriate client 2172 * we currently only allow exclusive access 2173 */ 2174 if (dev_priv->perf.exclusive_stream) { 2175 DRM_DEBUG("OA unit already in use\n"); 2176 return -EBUSY; 2177 } 2178 2179 if (!props->oa_format) { 2180 DRM_DEBUG("OA report format not specified\n"); 2181 return -EINVAL; 2182 } 2183 2184 stream->sample_size = sizeof(struct drm_i915_perf_record_header); 2185 2186 format_size = dev_priv->perf.oa_formats[props->oa_format].size; 2187 2188 stream->sample_flags |= SAMPLE_OA_REPORT; 2189 stream->sample_size += format_size; 2190 2191 stream->oa_buffer.format_size = format_size; 2192 if (WARN_ON(stream->oa_buffer.format_size == 0)) 2193 return -EINVAL; 2194 2195 stream->oa_buffer.format = 2196 dev_priv->perf.oa_formats[props->oa_format].format; 2197 2198 stream->periodic = props->oa_periodic; 2199 if (stream->periodic) 2200 stream->period_exponent = props->oa_period_exponent; 2201 2202 if (stream->ctx) { 2203 ret = oa_get_render_ctx_id(stream); 2204 if (ret) { 2205 DRM_DEBUG("Invalid context id to filter with\n"); 2206 return ret; 2207 } 2208 } 2209 2210 ret = get_oa_config(dev_priv, props->metrics_set, &stream->oa_config); 2211 if (ret) { 2212 DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set); 2213 goto err_config; 2214 } 2215 2216 /* PRM - observability performance counters: 2217 * 2218 * OACONTROL, performance counter enable, note: 2219 * 2220 * "When this bit is set, in order to have coherent counts, 2221 * RC6 power state and trunk clock gating must be disabled. 2222 * This can be achieved by programming MMIO registers as 2223 * 0xA094=0 and 0xA090[31]=1" 2224 * 2225 * In our case we are expecting that taking pm + FORCEWAKE 2226 * references will effectively disable RC6. 2227 */ 2228 stream->wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); 2229 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); 2230 2231 ret = alloc_oa_buffer(stream); 2232 if (ret) 2233 goto err_oa_buf_alloc; 2234 2235 ret = i915_mutex_lock_interruptible(&dev_priv->drm); 2236 if (ret) 2237 goto err_lock; 2238 2239 stream->ops = &i915_oa_stream_ops; 2240 dev_priv->perf.exclusive_stream = stream; 2241 2242 ret = dev_priv->perf.ops.enable_metric_set(stream); 2243 if (ret) { 2244 DRM_DEBUG("Unable to enable metric set\n"); 2245 goto err_enable; 2246 } 2247 2248 mutex_unlock(&dev_priv->drm.struct_mutex); 2249 2250 hrtimer_init(&stream->poll_check_timer, 2251 CLOCK_MONOTONIC, HRTIMER_MODE_REL); 2252 stream->poll_check_timer.function = oa_poll_check_timer_cb; 2253 init_waitqueue_head(&stream->poll_wq); 2254 spin_lock_init(&stream->oa_buffer.ptr_lock); 2255 2256 return 0; 2257 2258 err_enable: 2259 dev_priv->perf.exclusive_stream = NULL; 2260 dev_priv->perf.ops.disable_metric_set(stream); 2261 mutex_unlock(&dev_priv->drm.struct_mutex); 2262 2263 err_lock: 2264 free_oa_buffer(stream); 2265 2266 err_oa_buf_alloc: 2267 put_oa_config(dev_priv, stream->oa_config); 2268 2269 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); 2270 intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref); 2271 2272 err_config: 2273 if (stream->ctx) 2274 oa_put_render_ctx_id(stream); 2275 2276 return ret; 2277 } 2278 2279 void i915_oa_init_reg_state(const struct intel_context *ce, 2280 const struct intel_engine_cs *engine) 2281 { 2282 struct i915_perf_stream *stream; 2283 2284 /* perf.exclusive_stream serialised by gen8_configure_all_contexts() */ 2285 lockdep_assert_held(&ce->pin_mutex); 2286 2287 if (engine->class != RENDER_CLASS) 2288 return; 2289 2290 stream = engine->i915->perf.exclusive_stream; 2291 if (stream) 2292 gen8_update_reg_state_unlocked(ce, stream); 2293 } 2294 2295 /** 2296 * i915_perf_read_locked - &i915_perf_stream_ops->read with error normalisation 2297 * @stream: An i915 perf stream 2298 * @file: An i915 perf stream file 2299 * @buf: destination buffer given by userspace 2300 * @count: the number of bytes userspace wants to read 2301 * @ppos: (inout) file seek position (unused) 2302 * 2303 * Besides wrapping &i915_perf_stream_ops->read this provides a common place to 2304 * ensure that if we've successfully copied any data then reporting that takes 2305 * precedence over any internal error status, so the data isn't lost. 2306 * 2307 * For example ret will be -ENOSPC whenever there is more buffered data than 2308 * can be copied to userspace, but that's only interesting if we weren't able 2309 * to copy some data because it implies the userspace buffer is too small to 2310 * receive a single record (and we never split records). 2311 * 2312 * Another case with ret == -EFAULT is more of a grey area since it would seem 2313 * like bad form for userspace to ask us to overrun its buffer, but the user 2314 * knows best: 2315 * 2316 * http://yarchive.net/comp/linux/partial_reads_writes.html 2317 * 2318 * Returns: The number of bytes copied or a negative error code on failure. 2319 */ 2320 static ssize_t i915_perf_read_locked(struct i915_perf_stream *stream, 2321 struct file *file, 2322 char __user *buf, 2323 size_t count, 2324 loff_t *ppos) 2325 { 2326 /* Note we keep the offset (aka bytes read) separate from any 2327 * error status so that the final check for whether we return 2328 * the bytes read with a higher precedence than any error (see 2329 * comment below) doesn't need to be handled/duplicated in 2330 * stream->ops->read() implementations. 2331 */ 2332 size_t offset = 0; 2333 int ret = stream->ops->read(stream, buf, count, &offset); 2334 2335 return offset ?: (ret ?: -EAGAIN); 2336 } 2337 2338 /** 2339 * i915_perf_read - handles read() FOP for i915 perf stream FDs 2340 * @file: An i915 perf stream file 2341 * @buf: destination buffer given by userspace 2342 * @count: the number of bytes userspace wants to read 2343 * @ppos: (inout) file seek position (unused) 2344 * 2345 * The entry point for handling a read() on a stream file descriptor from 2346 * userspace. Most of the work is left to the i915_perf_read_locked() and 2347 * &i915_perf_stream_ops->read but to save having stream implementations (of 2348 * which we might have multiple later) we handle blocking read here. 2349 * 2350 * We can also consistently treat trying to read from a disabled stream 2351 * as an IO error so implementations can assume the stream is enabled 2352 * while reading. 2353 * 2354 * Returns: The number of bytes copied or a negative error code on failure. 2355 */ 2356 static ssize_t i915_perf_read(struct file *file, 2357 char __user *buf, 2358 size_t count, 2359 loff_t *ppos) 2360 { 2361 struct i915_perf_stream *stream = file->private_data; 2362 struct drm_i915_private *dev_priv = stream->dev_priv; 2363 ssize_t ret; 2364 2365 /* To ensure it's handled consistently we simply treat all reads of a 2366 * disabled stream as an error. In particular it might otherwise lead 2367 * to a deadlock for blocking file descriptors... 2368 */ 2369 if (!stream->enabled) 2370 return -EIO; 2371 2372 if (!(file->f_flags & O_NONBLOCK)) { 2373 /* There's the small chance of false positives from 2374 * stream->ops->wait_unlocked. 2375 * 2376 * E.g. with single context filtering since we only wait until 2377 * oabuffer has >= 1 report we don't immediately know whether 2378 * any reports really belong to the current context 2379 */ 2380 do { 2381 ret = stream->ops->wait_unlocked(stream); 2382 if (ret) 2383 return ret; 2384 2385 mutex_lock(&dev_priv->perf.lock); 2386 ret = i915_perf_read_locked(stream, file, 2387 buf, count, ppos); 2388 mutex_unlock(&dev_priv->perf.lock); 2389 } while (ret == -EAGAIN); 2390 } else { 2391 mutex_lock(&dev_priv->perf.lock); 2392 ret = i915_perf_read_locked(stream, file, buf, count, ppos); 2393 mutex_unlock(&dev_priv->perf.lock); 2394 } 2395 2396 /* We allow the poll checking to sometimes report false positive EPOLLIN 2397 * events where we might actually report EAGAIN on read() if there's 2398 * not really any data available. In this situation though we don't 2399 * want to enter a busy loop between poll() reporting a EPOLLIN event 2400 * and read() returning -EAGAIN. Clearing the oa.pollin state here 2401 * effectively ensures we back off until the next hrtimer callback 2402 * before reporting another EPOLLIN event. 2403 */ 2404 if (ret >= 0 || ret == -EAGAIN) { 2405 /* Maybe make ->pollin per-stream state if we support multiple 2406 * concurrent streams in the future. 2407 */ 2408 stream->pollin = false; 2409 } 2410 2411 return ret; 2412 } 2413 2414 static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer) 2415 { 2416 struct i915_perf_stream *stream = 2417 container_of(hrtimer, typeof(*stream), poll_check_timer); 2418 2419 if (oa_buffer_check_unlocked(stream)) { 2420 stream->pollin = true; 2421 wake_up(&stream->poll_wq); 2422 } 2423 2424 hrtimer_forward_now(hrtimer, ns_to_ktime(POLL_PERIOD)); 2425 2426 return HRTIMER_RESTART; 2427 } 2428 2429 /** 2430 * i915_perf_poll_locked - poll_wait() with a suitable wait queue for stream 2431 * @dev_priv: i915 device instance 2432 * @stream: An i915 perf stream 2433 * @file: An i915 perf stream file 2434 * @wait: poll() state table 2435 * 2436 * For handling userspace polling on an i915 perf stream, this calls through to 2437 * &i915_perf_stream_ops->poll_wait to call poll_wait() with a wait queue that 2438 * will be woken for new stream data. 2439 * 2440 * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize 2441 * with any non-file-operation driver hooks. 2442 * 2443 * Returns: any poll events that are ready without sleeping 2444 */ 2445 static __poll_t i915_perf_poll_locked(struct drm_i915_private *dev_priv, 2446 struct i915_perf_stream *stream, 2447 struct file *file, 2448 poll_table *wait) 2449 { 2450 __poll_t events = 0; 2451 2452 stream->ops->poll_wait(stream, file, wait); 2453 2454 /* Note: we don't explicitly check whether there's something to read 2455 * here since this path may be very hot depending on what else 2456 * userspace is polling, or on the timeout in use. We rely solely on 2457 * the hrtimer/oa_poll_check_timer_cb to notify us when there are 2458 * samples to read. 2459 */ 2460 if (stream->pollin) 2461 events |= EPOLLIN; 2462 2463 return events; 2464 } 2465 2466 /** 2467 * i915_perf_poll - call poll_wait() with a suitable wait queue for stream 2468 * @file: An i915 perf stream file 2469 * @wait: poll() state table 2470 * 2471 * For handling userspace polling on an i915 perf stream, this ensures 2472 * poll_wait() gets called with a wait queue that will be woken for new stream 2473 * data. 2474 * 2475 * Note: Implementation deferred to i915_perf_poll_locked() 2476 * 2477 * Returns: any poll events that are ready without sleeping 2478 */ 2479 static __poll_t i915_perf_poll(struct file *file, poll_table *wait) 2480 { 2481 struct i915_perf_stream *stream = file->private_data; 2482 struct drm_i915_private *dev_priv = stream->dev_priv; 2483 __poll_t ret; 2484 2485 mutex_lock(&dev_priv->perf.lock); 2486 ret = i915_perf_poll_locked(dev_priv, stream, file, wait); 2487 mutex_unlock(&dev_priv->perf.lock); 2488 2489 return ret; 2490 } 2491 2492 /** 2493 * i915_perf_enable_locked - handle `I915_PERF_IOCTL_ENABLE` ioctl 2494 * @stream: A disabled i915 perf stream 2495 * 2496 * [Re]enables the associated capture of data for this stream. 2497 * 2498 * If a stream was previously enabled then there's currently no intention 2499 * to provide userspace any guarantee about the preservation of previously 2500 * buffered data. 2501 */ 2502 static void i915_perf_enable_locked(struct i915_perf_stream *stream) 2503 { 2504 if (stream->enabled) 2505 return; 2506 2507 /* Allow stream->ops->enable() to refer to this */ 2508 stream->enabled = true; 2509 2510 if (stream->ops->enable) 2511 stream->ops->enable(stream); 2512 } 2513 2514 /** 2515 * i915_perf_disable_locked - handle `I915_PERF_IOCTL_DISABLE` ioctl 2516 * @stream: An enabled i915 perf stream 2517 * 2518 * Disables the associated capture of data for this stream. 2519 * 2520 * The intention is that disabling an re-enabling a stream will ideally be 2521 * cheaper than destroying and re-opening a stream with the same configuration, 2522 * though there are no formal guarantees about what state or buffered data 2523 * must be retained between disabling and re-enabling a stream. 2524 * 2525 * Note: while a stream is disabled it's considered an error for userspace 2526 * to attempt to read from the stream (-EIO). 2527 */ 2528 static void i915_perf_disable_locked(struct i915_perf_stream *stream) 2529 { 2530 if (!stream->enabled) 2531 return; 2532 2533 /* Allow stream->ops->disable() to refer to this */ 2534 stream->enabled = false; 2535 2536 if (stream->ops->disable) 2537 stream->ops->disable(stream); 2538 } 2539 2540 /** 2541 * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs 2542 * @stream: An i915 perf stream 2543 * @cmd: the ioctl request 2544 * @arg: the ioctl data 2545 * 2546 * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize 2547 * with any non-file-operation driver hooks. 2548 * 2549 * Returns: zero on success or a negative error code. Returns -EINVAL for 2550 * an unknown ioctl request. 2551 */ 2552 static long i915_perf_ioctl_locked(struct i915_perf_stream *stream, 2553 unsigned int cmd, 2554 unsigned long arg) 2555 { 2556 switch (cmd) { 2557 case I915_PERF_IOCTL_ENABLE: 2558 i915_perf_enable_locked(stream); 2559 return 0; 2560 case I915_PERF_IOCTL_DISABLE: 2561 i915_perf_disable_locked(stream); 2562 return 0; 2563 } 2564 2565 return -EINVAL; 2566 } 2567 2568 /** 2569 * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs 2570 * @file: An i915 perf stream file 2571 * @cmd: the ioctl request 2572 * @arg: the ioctl data 2573 * 2574 * Implementation deferred to i915_perf_ioctl_locked(). 2575 * 2576 * Returns: zero on success or a negative error code. Returns -EINVAL for 2577 * an unknown ioctl request. 2578 */ 2579 static long i915_perf_ioctl(struct file *file, 2580 unsigned int cmd, 2581 unsigned long arg) 2582 { 2583 struct i915_perf_stream *stream = file->private_data; 2584 struct drm_i915_private *dev_priv = stream->dev_priv; 2585 long ret; 2586 2587 mutex_lock(&dev_priv->perf.lock); 2588 ret = i915_perf_ioctl_locked(stream, cmd, arg); 2589 mutex_unlock(&dev_priv->perf.lock); 2590 2591 return ret; 2592 } 2593 2594 /** 2595 * i915_perf_destroy_locked - destroy an i915 perf stream 2596 * @stream: An i915 perf stream 2597 * 2598 * Frees all resources associated with the given i915 perf @stream, disabling 2599 * any associated data capture in the process. 2600 * 2601 * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize 2602 * with any non-file-operation driver hooks. 2603 */ 2604 static void i915_perf_destroy_locked(struct i915_perf_stream *stream) 2605 { 2606 if (stream->enabled) 2607 i915_perf_disable_locked(stream); 2608 2609 if (stream->ops->destroy) 2610 stream->ops->destroy(stream); 2611 2612 list_del(&stream->link); 2613 2614 if (stream->ctx) 2615 i915_gem_context_put(stream->ctx); 2616 2617 kfree(stream); 2618 } 2619 2620 /** 2621 * i915_perf_release - handles userspace close() of a stream file 2622 * @inode: anonymous inode associated with file 2623 * @file: An i915 perf stream file 2624 * 2625 * Cleans up any resources associated with an open i915 perf stream file. 2626 * 2627 * NB: close() can't really fail from the userspace point of view. 2628 * 2629 * Returns: zero on success or a negative error code. 2630 */ 2631 static int i915_perf_release(struct inode *inode, struct file *file) 2632 { 2633 struct i915_perf_stream *stream = file->private_data; 2634 struct drm_i915_private *dev_priv = stream->dev_priv; 2635 2636 mutex_lock(&dev_priv->perf.lock); 2637 i915_perf_destroy_locked(stream); 2638 mutex_unlock(&dev_priv->perf.lock); 2639 2640 /* Release the reference the perf stream kept on the driver. */ 2641 drm_dev_put(&dev_priv->drm); 2642 2643 return 0; 2644 } 2645 2646 2647 static const struct file_operations fops = { 2648 .owner = THIS_MODULE, 2649 .llseek = no_llseek, 2650 .release = i915_perf_release, 2651 .poll = i915_perf_poll, 2652 .read = i915_perf_read, 2653 .unlocked_ioctl = i915_perf_ioctl, 2654 /* Our ioctl have no arguments, so it's safe to use the same function 2655 * to handle 32bits compatibility. 2656 */ 2657 .compat_ioctl = i915_perf_ioctl, 2658 }; 2659 2660 2661 /** 2662 * i915_perf_open_ioctl_locked - DRM ioctl() for userspace to open a stream FD 2663 * @dev_priv: i915 device instance 2664 * @param: The open parameters passed to 'DRM_I915_PERF_OPEN` 2665 * @props: individually validated u64 property value pairs 2666 * @file: drm file 2667 * 2668 * See i915_perf_ioctl_open() for interface details. 2669 * 2670 * Implements further stream config validation and stream initialization on 2671 * behalf of i915_perf_open_ioctl() with the &drm_i915_private->perf.lock mutex 2672 * taken to serialize with any non-file-operation driver hooks. 2673 * 2674 * Note: at this point the @props have only been validated in isolation and 2675 * it's still necessary to validate that the combination of properties makes 2676 * sense. 2677 * 2678 * In the case where userspace is interested in OA unit metrics then further 2679 * config validation and stream initialization details will be handled by 2680 * i915_oa_stream_init(). The code here should only validate config state that 2681 * will be relevant to all stream types / backends. 2682 * 2683 * Returns: zero on success or a negative error code. 2684 */ 2685 static int 2686 i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, 2687 struct drm_i915_perf_open_param *param, 2688 struct perf_open_properties *props, 2689 struct drm_file *file) 2690 { 2691 struct i915_gem_context *specific_ctx = NULL; 2692 struct i915_perf_stream *stream = NULL; 2693 unsigned long f_flags = 0; 2694 bool privileged_op = true; 2695 int stream_fd; 2696 int ret; 2697 2698 if (props->single_context) { 2699 u32 ctx_handle = props->ctx_handle; 2700 struct drm_i915_file_private *file_priv = file->driver_priv; 2701 2702 specific_ctx = i915_gem_context_lookup(file_priv, ctx_handle); 2703 if (!specific_ctx) { 2704 DRM_DEBUG("Failed to look up context with ID %u for opening perf stream\n", 2705 ctx_handle); 2706 ret = -ENOENT; 2707 goto err; 2708 } 2709 } 2710 2711 /* 2712 * On Haswell the OA unit supports clock gating off for a specific 2713 * context and in this mode there's no visibility of metrics for the 2714 * rest of the system, which we consider acceptable for a 2715 * non-privileged client. 2716 * 2717 * For Gen8+ the OA unit no longer supports clock gating off for a 2718 * specific context and the kernel can't securely stop the counters 2719 * from updating as system-wide / global values. Even though we can 2720 * filter reports based on the included context ID we can't block 2721 * clients from seeing the raw / global counter values via 2722 * MI_REPORT_PERF_COUNT commands and so consider it a privileged op to 2723 * enable the OA unit by default. 2724 */ 2725 if (IS_HASWELL(dev_priv) && specific_ctx) 2726 privileged_op = false; 2727 2728 /* Similar to perf's kernel.perf_paranoid_cpu sysctl option 2729 * we check a dev.i915.perf_stream_paranoid sysctl option 2730 * to determine if it's ok to access system wide OA counters 2731 * without CAP_SYS_ADMIN privileges. 2732 */ 2733 if (privileged_op && 2734 i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) { 2735 DRM_DEBUG("Insufficient privileges to open system-wide i915 perf stream\n"); 2736 ret = -EACCES; 2737 goto err_ctx; 2738 } 2739 2740 stream = kzalloc(sizeof(*stream), GFP_KERNEL); 2741 if (!stream) { 2742 ret = -ENOMEM; 2743 goto err_ctx; 2744 } 2745 2746 stream->dev_priv = dev_priv; 2747 stream->ctx = specific_ctx; 2748 2749 ret = i915_oa_stream_init(stream, param, props); 2750 if (ret) 2751 goto err_alloc; 2752 2753 /* we avoid simply assigning stream->sample_flags = props->sample_flags 2754 * to have _stream_init check the combination of sample flags more 2755 * thoroughly, but still this is the expected result at this point. 2756 */ 2757 if (WARN_ON(stream->sample_flags != props->sample_flags)) { 2758 ret = -ENODEV; 2759 goto err_flags; 2760 } 2761 2762 list_add(&stream->link, &dev_priv->perf.streams); 2763 2764 if (param->flags & I915_PERF_FLAG_FD_CLOEXEC) 2765 f_flags |= O_CLOEXEC; 2766 if (param->flags & I915_PERF_FLAG_FD_NONBLOCK) 2767 f_flags |= O_NONBLOCK; 2768 2769 stream_fd = anon_inode_getfd("[i915_perf]", &fops, stream, f_flags); 2770 if (stream_fd < 0) { 2771 ret = stream_fd; 2772 goto err_open; 2773 } 2774 2775 if (!(param->flags & I915_PERF_FLAG_DISABLED)) 2776 i915_perf_enable_locked(stream); 2777 2778 /* Take a reference on the driver that will be kept with stream_fd 2779 * until its release. 2780 */ 2781 drm_dev_get(&dev_priv->drm); 2782 2783 return stream_fd; 2784 2785 err_open: 2786 list_del(&stream->link); 2787 err_flags: 2788 if (stream->ops->destroy) 2789 stream->ops->destroy(stream); 2790 err_alloc: 2791 kfree(stream); 2792 err_ctx: 2793 if (specific_ctx) 2794 i915_gem_context_put(specific_ctx); 2795 err: 2796 return ret; 2797 } 2798 2799 static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent) 2800 { 2801 return div64_u64(1000000000ULL * (2ULL << exponent), 2802 1000ULL * RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz); 2803 } 2804 2805 /** 2806 * read_properties_unlocked - validate + copy userspace stream open properties 2807 * @dev_priv: i915 device instance 2808 * @uprops: The array of u64 key value pairs given by userspace 2809 * @n_props: The number of key value pairs expected in @uprops 2810 * @props: The stream configuration built up while validating properties 2811 * 2812 * Note this function only validates properties in isolation it doesn't 2813 * validate that the combination of properties makes sense or that all 2814 * properties necessary for a particular kind of stream have been set. 2815 * 2816 * Note that there currently aren't any ordering requirements for properties so 2817 * we shouldn't validate or assume anything about ordering here. This doesn't 2818 * rule out defining new properties with ordering requirements in the future. 2819 */ 2820 static int read_properties_unlocked(struct drm_i915_private *dev_priv, 2821 u64 __user *uprops, 2822 u32 n_props, 2823 struct perf_open_properties *props) 2824 { 2825 u64 __user *uprop = uprops; 2826 u32 i; 2827 2828 memset(props, 0, sizeof(struct perf_open_properties)); 2829 2830 if (!n_props) { 2831 DRM_DEBUG("No i915 perf properties given\n"); 2832 return -EINVAL; 2833 } 2834 2835 /* Considering that ID = 0 is reserved and assuming that we don't 2836 * (currently) expect any configurations to ever specify duplicate 2837 * values for a particular property ID then the last _PROP_MAX value is 2838 * one greater than the maximum number of properties we expect to get 2839 * from userspace. 2840 */ 2841 if (n_props >= DRM_I915_PERF_PROP_MAX) { 2842 DRM_DEBUG("More i915 perf properties specified than exist\n"); 2843 return -EINVAL; 2844 } 2845 2846 for (i = 0; i < n_props; i++) { 2847 u64 oa_period, oa_freq_hz; 2848 u64 id, value; 2849 int ret; 2850 2851 ret = get_user(id, uprop); 2852 if (ret) 2853 return ret; 2854 2855 ret = get_user(value, uprop + 1); 2856 if (ret) 2857 return ret; 2858 2859 if (id == 0 || id >= DRM_I915_PERF_PROP_MAX) { 2860 DRM_DEBUG("Unknown i915 perf property ID\n"); 2861 return -EINVAL; 2862 } 2863 2864 switch ((enum drm_i915_perf_property_id)id) { 2865 case DRM_I915_PERF_PROP_CTX_HANDLE: 2866 props->single_context = 1; 2867 props->ctx_handle = value; 2868 break; 2869 case DRM_I915_PERF_PROP_SAMPLE_OA: 2870 if (value) 2871 props->sample_flags |= SAMPLE_OA_REPORT; 2872 break; 2873 case DRM_I915_PERF_PROP_OA_METRICS_SET: 2874 if (value == 0) { 2875 DRM_DEBUG("Unknown OA metric set ID\n"); 2876 return -EINVAL; 2877 } 2878 props->metrics_set = value; 2879 break; 2880 case DRM_I915_PERF_PROP_OA_FORMAT: 2881 if (value == 0 || value >= I915_OA_FORMAT_MAX) { 2882 DRM_DEBUG("Out-of-range OA report format %llu\n", 2883 value); 2884 return -EINVAL; 2885 } 2886 if (!dev_priv->perf.oa_formats[value].size) { 2887 DRM_DEBUG("Unsupported OA report format %llu\n", 2888 value); 2889 return -EINVAL; 2890 } 2891 props->oa_format = value; 2892 break; 2893 case DRM_I915_PERF_PROP_OA_EXPONENT: 2894 if (value > OA_EXPONENT_MAX) { 2895 DRM_DEBUG("OA timer exponent too high (> %u)\n", 2896 OA_EXPONENT_MAX); 2897 return -EINVAL; 2898 } 2899 2900 /* Theoretically we can program the OA unit to sample 2901 * e.g. every 160ns for HSW, 167ns for BDW/SKL or 104ns 2902 * for BXT. We don't allow such high sampling 2903 * frequencies by default unless root. 2904 */ 2905 2906 BUILD_BUG_ON(sizeof(oa_period) != 8); 2907 oa_period = oa_exponent_to_ns(dev_priv, value); 2908 2909 /* This check is primarily to ensure that oa_period <= 2910 * UINT32_MAX (before passing to do_div which only 2911 * accepts a u32 denominator), but we can also skip 2912 * checking anything < 1Hz which implicitly can't be 2913 * limited via an integer oa_max_sample_rate. 2914 */ 2915 if (oa_period <= NSEC_PER_SEC) { 2916 u64 tmp = NSEC_PER_SEC; 2917 do_div(tmp, oa_period); 2918 oa_freq_hz = tmp; 2919 } else 2920 oa_freq_hz = 0; 2921 2922 if (oa_freq_hz > i915_oa_max_sample_rate && 2923 !capable(CAP_SYS_ADMIN)) { 2924 DRM_DEBUG("OA exponent would exceed the max sampling frequency (sysctl dev.i915.oa_max_sample_rate) %uHz without root privileges\n", 2925 i915_oa_max_sample_rate); 2926 return -EACCES; 2927 } 2928 2929 props->oa_periodic = true; 2930 props->oa_period_exponent = value; 2931 break; 2932 case DRM_I915_PERF_PROP_MAX: 2933 MISSING_CASE(id); 2934 return -EINVAL; 2935 } 2936 2937 uprop += 2; 2938 } 2939 2940 return 0; 2941 } 2942 2943 /** 2944 * i915_perf_open_ioctl - DRM ioctl() for userspace to open a stream FD 2945 * @dev: drm device 2946 * @data: ioctl data copied from userspace (unvalidated) 2947 * @file: drm file 2948 * 2949 * Validates the stream open parameters given by userspace including flags 2950 * and an array of u64 key, value pair properties. 2951 * 2952 * Very little is assumed up front about the nature of the stream being 2953 * opened (for instance we don't assume it's for periodic OA unit metrics). An 2954 * i915-perf stream is expected to be a suitable interface for other forms of 2955 * buffered data written by the GPU besides periodic OA metrics. 2956 * 2957 * Note we copy the properties from userspace outside of the i915 perf 2958 * mutex to avoid an awkward lockdep with mmap_sem. 2959 * 2960 * Most of the implementation details are handled by 2961 * i915_perf_open_ioctl_locked() after taking the &drm_i915_private->perf.lock 2962 * mutex for serializing with any non-file-operation driver hooks. 2963 * 2964 * Return: A newly opened i915 Perf stream file descriptor or negative 2965 * error code on failure. 2966 */ 2967 int i915_perf_open_ioctl(struct drm_device *dev, void *data, 2968 struct drm_file *file) 2969 { 2970 struct drm_i915_private *dev_priv = dev->dev_private; 2971 struct drm_i915_perf_open_param *param = data; 2972 struct perf_open_properties props; 2973 u32 known_open_flags; 2974 int ret; 2975 2976 if (!dev_priv->perf.initialized) { 2977 DRM_DEBUG("i915 perf interface not available for this system\n"); 2978 return -ENOTSUPP; 2979 } 2980 2981 known_open_flags = I915_PERF_FLAG_FD_CLOEXEC | 2982 I915_PERF_FLAG_FD_NONBLOCK | 2983 I915_PERF_FLAG_DISABLED; 2984 if (param->flags & ~known_open_flags) { 2985 DRM_DEBUG("Unknown drm_i915_perf_open_param flag\n"); 2986 return -EINVAL; 2987 } 2988 2989 ret = read_properties_unlocked(dev_priv, 2990 u64_to_user_ptr(param->properties_ptr), 2991 param->num_properties, 2992 &props); 2993 if (ret) 2994 return ret; 2995 2996 mutex_lock(&dev_priv->perf.lock); 2997 ret = i915_perf_open_ioctl_locked(dev_priv, param, &props, file); 2998 mutex_unlock(&dev_priv->perf.lock); 2999 3000 return ret; 3001 } 3002 3003 /** 3004 * i915_perf_register - exposes i915-perf to userspace 3005 * @dev_priv: i915 device instance 3006 * 3007 * In particular OA metric sets are advertised under a sysfs metrics/ 3008 * directory allowing userspace to enumerate valid IDs that can be 3009 * used to open an i915-perf stream. 3010 */ 3011 void i915_perf_register(struct drm_i915_private *dev_priv) 3012 { 3013 int ret; 3014 3015 if (!dev_priv->perf.initialized) 3016 return; 3017 3018 /* To be sure we're synchronized with an attempted 3019 * i915_perf_open_ioctl(); considering that we register after 3020 * being exposed to userspace. 3021 */ 3022 mutex_lock(&dev_priv->perf.lock); 3023 3024 dev_priv->perf.metrics_kobj = 3025 kobject_create_and_add("metrics", 3026 &dev_priv->drm.primary->kdev->kobj); 3027 if (!dev_priv->perf.metrics_kobj) 3028 goto exit; 3029 3030 sysfs_attr_init(&dev_priv->perf.test_config.sysfs_metric_id.attr); 3031 3032 if (INTEL_GEN(dev_priv) >= 11) { 3033 i915_perf_load_test_config_icl(dev_priv); 3034 } else if (IS_CANNONLAKE(dev_priv)) { 3035 i915_perf_load_test_config_cnl(dev_priv); 3036 } else if (IS_COFFEELAKE(dev_priv)) { 3037 if (IS_CFL_GT2(dev_priv)) 3038 i915_perf_load_test_config_cflgt2(dev_priv); 3039 if (IS_CFL_GT3(dev_priv)) 3040 i915_perf_load_test_config_cflgt3(dev_priv); 3041 } else if (IS_GEMINILAKE(dev_priv)) { 3042 i915_perf_load_test_config_glk(dev_priv); 3043 } else if (IS_KABYLAKE(dev_priv)) { 3044 if (IS_KBL_GT2(dev_priv)) 3045 i915_perf_load_test_config_kblgt2(dev_priv); 3046 else if (IS_KBL_GT3(dev_priv)) 3047 i915_perf_load_test_config_kblgt3(dev_priv); 3048 } else if (IS_BROXTON(dev_priv)) { 3049 i915_perf_load_test_config_bxt(dev_priv); 3050 } else if (IS_SKYLAKE(dev_priv)) { 3051 if (IS_SKL_GT2(dev_priv)) 3052 i915_perf_load_test_config_sklgt2(dev_priv); 3053 else if (IS_SKL_GT3(dev_priv)) 3054 i915_perf_load_test_config_sklgt3(dev_priv); 3055 else if (IS_SKL_GT4(dev_priv)) 3056 i915_perf_load_test_config_sklgt4(dev_priv); 3057 } else if (IS_CHERRYVIEW(dev_priv)) { 3058 i915_perf_load_test_config_chv(dev_priv); 3059 } else if (IS_BROADWELL(dev_priv)) { 3060 i915_perf_load_test_config_bdw(dev_priv); 3061 } else if (IS_HASWELL(dev_priv)) { 3062 i915_perf_load_test_config_hsw(dev_priv); 3063 } 3064 3065 if (dev_priv->perf.test_config.id == 0) 3066 goto sysfs_error; 3067 3068 ret = sysfs_create_group(dev_priv->perf.metrics_kobj, 3069 &dev_priv->perf.test_config.sysfs_metric); 3070 if (ret) 3071 goto sysfs_error; 3072 3073 atomic_set(&dev_priv->perf.test_config.ref_count, 1); 3074 3075 goto exit; 3076 3077 sysfs_error: 3078 kobject_put(dev_priv->perf.metrics_kobj); 3079 dev_priv->perf.metrics_kobj = NULL; 3080 3081 exit: 3082 mutex_unlock(&dev_priv->perf.lock); 3083 } 3084 3085 /** 3086 * i915_perf_unregister - hide i915-perf from userspace 3087 * @dev_priv: i915 device instance 3088 * 3089 * i915-perf state cleanup is split up into an 'unregister' and 3090 * 'deinit' phase where the interface is first hidden from 3091 * userspace by i915_perf_unregister() before cleaning up 3092 * remaining state in i915_perf_fini(). 3093 */ 3094 void i915_perf_unregister(struct drm_i915_private *dev_priv) 3095 { 3096 if (!dev_priv->perf.metrics_kobj) 3097 return; 3098 3099 sysfs_remove_group(dev_priv->perf.metrics_kobj, 3100 &dev_priv->perf.test_config.sysfs_metric); 3101 3102 kobject_put(dev_priv->perf.metrics_kobj); 3103 dev_priv->perf.metrics_kobj = NULL; 3104 } 3105 3106 static bool gen8_is_valid_flex_addr(struct drm_i915_private *dev_priv, u32 addr) 3107 { 3108 static const i915_reg_t flex_eu_regs[] = { 3109 EU_PERF_CNTL0, 3110 EU_PERF_CNTL1, 3111 EU_PERF_CNTL2, 3112 EU_PERF_CNTL3, 3113 EU_PERF_CNTL4, 3114 EU_PERF_CNTL5, 3115 EU_PERF_CNTL6, 3116 }; 3117 int i; 3118 3119 for (i = 0; i < ARRAY_SIZE(flex_eu_regs); i++) { 3120 if (i915_mmio_reg_offset(flex_eu_regs[i]) == addr) 3121 return true; 3122 } 3123 return false; 3124 } 3125 3126 static bool gen7_is_valid_b_counter_addr(struct drm_i915_private *dev_priv, u32 addr) 3127 { 3128 return (addr >= i915_mmio_reg_offset(OASTARTTRIG1) && 3129 addr <= i915_mmio_reg_offset(OASTARTTRIG8)) || 3130 (addr >= i915_mmio_reg_offset(OAREPORTTRIG1) && 3131 addr <= i915_mmio_reg_offset(OAREPORTTRIG8)) || 3132 (addr >= i915_mmio_reg_offset(OACEC0_0) && 3133 addr <= i915_mmio_reg_offset(OACEC7_1)); 3134 } 3135 3136 static bool gen7_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) 3137 { 3138 return addr == i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) || 3139 (addr >= i915_mmio_reg_offset(MICRO_BP0_0) && 3140 addr <= i915_mmio_reg_offset(NOA_WRITE)) || 3141 (addr >= i915_mmio_reg_offset(OA_PERFCNT1_LO) && 3142 addr <= i915_mmio_reg_offset(OA_PERFCNT2_HI)) || 3143 (addr >= i915_mmio_reg_offset(OA_PERFMATRIX_LO) && 3144 addr <= i915_mmio_reg_offset(OA_PERFMATRIX_HI)); 3145 } 3146 3147 static bool gen8_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) 3148 { 3149 return gen7_is_valid_mux_addr(dev_priv, addr) || 3150 addr == i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) || 3151 (addr >= i915_mmio_reg_offset(RPM_CONFIG0) && 3152 addr <= i915_mmio_reg_offset(NOA_CONFIG(8))); 3153 } 3154 3155 static bool gen10_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) 3156 { 3157 return gen8_is_valid_mux_addr(dev_priv, addr) || 3158 addr == i915_mmio_reg_offset(GEN10_NOA_WRITE_HIGH) || 3159 (addr >= i915_mmio_reg_offset(OA_PERFCNT3_LO) && 3160 addr <= i915_mmio_reg_offset(OA_PERFCNT4_HI)); 3161 } 3162 3163 static bool hsw_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) 3164 { 3165 return gen7_is_valid_mux_addr(dev_priv, addr) || 3166 (addr >= 0x25100 && addr <= 0x2FF90) || 3167 (addr >= i915_mmio_reg_offset(HSW_MBVID2_NOA0) && 3168 addr <= i915_mmio_reg_offset(HSW_MBVID2_NOA9)) || 3169 addr == i915_mmio_reg_offset(HSW_MBVID2_MISR0); 3170 } 3171 3172 static bool chv_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) 3173 { 3174 return gen7_is_valid_mux_addr(dev_priv, addr) || 3175 (addr >= 0x182300 && addr <= 0x1823A4); 3176 } 3177 3178 static u32 mask_reg_value(u32 reg, u32 val) 3179 { 3180 /* HALF_SLICE_CHICKEN2 is programmed with a the 3181 * WaDisableSTUnitPowerOptimization workaround. Make sure the value 3182 * programmed by userspace doesn't change this. 3183 */ 3184 if (i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) == reg) 3185 val = val & ~_MASKED_BIT_ENABLE(GEN8_ST_PO_DISABLE); 3186 3187 /* WAIT_FOR_RC6_EXIT has only one bit fullfilling the function 3188 * indicated by its name and a bunch of selection fields used by OA 3189 * configs. 3190 */ 3191 if (i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) == reg) 3192 val = val & ~_MASKED_BIT_ENABLE(HSW_WAIT_FOR_RC6_EXIT_ENABLE); 3193 3194 return val; 3195 } 3196 3197 static struct i915_oa_reg *alloc_oa_regs(struct drm_i915_private *dev_priv, 3198 bool (*is_valid)(struct drm_i915_private *dev_priv, u32 addr), 3199 u32 __user *regs, 3200 u32 n_regs) 3201 { 3202 struct i915_oa_reg *oa_regs; 3203 int err; 3204 u32 i; 3205 3206 if (!n_regs) 3207 return NULL; 3208 3209 if (!access_ok(regs, n_regs * sizeof(u32) * 2)) 3210 return ERR_PTR(-EFAULT); 3211 3212 /* No is_valid function means we're not allowing any register to be programmed. */ 3213 GEM_BUG_ON(!is_valid); 3214 if (!is_valid) 3215 return ERR_PTR(-EINVAL); 3216 3217 oa_regs = kmalloc_array(n_regs, sizeof(*oa_regs), GFP_KERNEL); 3218 if (!oa_regs) 3219 return ERR_PTR(-ENOMEM); 3220 3221 for (i = 0; i < n_regs; i++) { 3222 u32 addr, value; 3223 3224 err = get_user(addr, regs); 3225 if (err) 3226 goto addr_err; 3227 3228 if (!is_valid(dev_priv, addr)) { 3229 DRM_DEBUG("Invalid oa_reg address: %X\n", addr); 3230 err = -EINVAL; 3231 goto addr_err; 3232 } 3233 3234 err = get_user(value, regs + 1); 3235 if (err) 3236 goto addr_err; 3237 3238 oa_regs[i].addr = _MMIO(addr); 3239 oa_regs[i].value = mask_reg_value(addr, value); 3240 3241 regs += 2; 3242 } 3243 3244 return oa_regs; 3245 3246 addr_err: 3247 kfree(oa_regs); 3248 return ERR_PTR(err); 3249 } 3250 3251 static ssize_t show_dynamic_id(struct device *dev, 3252 struct device_attribute *attr, 3253 char *buf) 3254 { 3255 struct i915_oa_config *oa_config = 3256 container_of(attr, typeof(*oa_config), sysfs_metric_id); 3257 3258 return sprintf(buf, "%d\n", oa_config->id); 3259 } 3260 3261 static int create_dynamic_oa_sysfs_entry(struct drm_i915_private *dev_priv, 3262 struct i915_oa_config *oa_config) 3263 { 3264 sysfs_attr_init(&oa_config->sysfs_metric_id.attr); 3265 oa_config->sysfs_metric_id.attr.name = "id"; 3266 oa_config->sysfs_metric_id.attr.mode = S_IRUGO; 3267 oa_config->sysfs_metric_id.show = show_dynamic_id; 3268 oa_config->sysfs_metric_id.store = NULL; 3269 3270 oa_config->attrs[0] = &oa_config->sysfs_metric_id.attr; 3271 oa_config->attrs[1] = NULL; 3272 3273 oa_config->sysfs_metric.name = oa_config->uuid; 3274 oa_config->sysfs_metric.attrs = oa_config->attrs; 3275 3276 return sysfs_create_group(dev_priv->perf.metrics_kobj, 3277 &oa_config->sysfs_metric); 3278 } 3279 3280 /** 3281 * i915_perf_add_config_ioctl - DRM ioctl() for userspace to add a new OA config 3282 * @dev: drm device 3283 * @data: ioctl data (pointer to struct drm_i915_perf_oa_config) copied from 3284 * userspace (unvalidated) 3285 * @file: drm file 3286 * 3287 * Validates the submitted OA register to be saved into a new OA config that 3288 * can then be used for programming the OA unit and its NOA network. 3289 * 3290 * Returns: A new allocated config number to be used with the perf open ioctl 3291 * or a negative error code on failure. 3292 */ 3293 int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, 3294 struct drm_file *file) 3295 { 3296 struct drm_i915_private *dev_priv = dev->dev_private; 3297 struct drm_i915_perf_oa_config *args = data; 3298 struct i915_oa_config *oa_config, *tmp; 3299 int err, id; 3300 3301 if (!dev_priv->perf.initialized) { 3302 DRM_DEBUG("i915 perf interface not available for this system\n"); 3303 return -ENOTSUPP; 3304 } 3305 3306 if (!dev_priv->perf.metrics_kobj) { 3307 DRM_DEBUG("OA metrics weren't advertised via sysfs\n"); 3308 return -EINVAL; 3309 } 3310 3311 if (i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) { 3312 DRM_DEBUG("Insufficient privileges to add i915 OA config\n"); 3313 return -EACCES; 3314 } 3315 3316 if ((!args->mux_regs_ptr || !args->n_mux_regs) && 3317 (!args->boolean_regs_ptr || !args->n_boolean_regs) && 3318 (!args->flex_regs_ptr || !args->n_flex_regs)) { 3319 DRM_DEBUG("No OA registers given\n"); 3320 return -EINVAL; 3321 } 3322 3323 oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL); 3324 if (!oa_config) { 3325 DRM_DEBUG("Failed to allocate memory for the OA config\n"); 3326 return -ENOMEM; 3327 } 3328 3329 atomic_set(&oa_config->ref_count, 1); 3330 3331 if (!uuid_is_valid(args->uuid)) { 3332 DRM_DEBUG("Invalid uuid format for OA config\n"); 3333 err = -EINVAL; 3334 goto reg_err; 3335 } 3336 3337 /* Last character in oa_config->uuid will be 0 because oa_config is 3338 * kzalloc. 3339 */ 3340 memcpy(oa_config->uuid, args->uuid, sizeof(args->uuid)); 3341 3342 oa_config->mux_regs_len = args->n_mux_regs; 3343 oa_config->mux_regs = 3344 alloc_oa_regs(dev_priv, 3345 dev_priv->perf.ops.is_valid_mux_reg, 3346 u64_to_user_ptr(args->mux_regs_ptr), 3347 args->n_mux_regs); 3348 3349 if (IS_ERR(oa_config->mux_regs)) { 3350 DRM_DEBUG("Failed to create OA config for mux_regs\n"); 3351 err = PTR_ERR(oa_config->mux_regs); 3352 goto reg_err; 3353 } 3354 3355 oa_config->b_counter_regs_len = args->n_boolean_regs; 3356 oa_config->b_counter_regs = 3357 alloc_oa_regs(dev_priv, 3358 dev_priv->perf.ops.is_valid_b_counter_reg, 3359 u64_to_user_ptr(args->boolean_regs_ptr), 3360 args->n_boolean_regs); 3361 3362 if (IS_ERR(oa_config->b_counter_regs)) { 3363 DRM_DEBUG("Failed to create OA config for b_counter_regs\n"); 3364 err = PTR_ERR(oa_config->b_counter_regs); 3365 goto reg_err; 3366 } 3367 3368 if (INTEL_GEN(dev_priv) < 8) { 3369 if (args->n_flex_regs != 0) { 3370 err = -EINVAL; 3371 goto reg_err; 3372 } 3373 } else { 3374 oa_config->flex_regs_len = args->n_flex_regs; 3375 oa_config->flex_regs = 3376 alloc_oa_regs(dev_priv, 3377 dev_priv->perf.ops.is_valid_flex_reg, 3378 u64_to_user_ptr(args->flex_regs_ptr), 3379 args->n_flex_regs); 3380 3381 if (IS_ERR(oa_config->flex_regs)) { 3382 DRM_DEBUG("Failed to create OA config for flex_regs\n"); 3383 err = PTR_ERR(oa_config->flex_regs); 3384 goto reg_err; 3385 } 3386 } 3387 3388 err = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); 3389 if (err) 3390 goto reg_err; 3391 3392 /* We shouldn't have too many configs, so this iteration shouldn't be 3393 * too costly. 3394 */ 3395 idr_for_each_entry(&dev_priv->perf.metrics_idr, tmp, id) { 3396 if (!strcmp(tmp->uuid, oa_config->uuid)) { 3397 DRM_DEBUG("OA config already exists with this uuid\n"); 3398 err = -EADDRINUSE; 3399 goto sysfs_err; 3400 } 3401 } 3402 3403 err = create_dynamic_oa_sysfs_entry(dev_priv, oa_config); 3404 if (err) { 3405 DRM_DEBUG("Failed to create sysfs entry for OA config\n"); 3406 goto sysfs_err; 3407 } 3408 3409 /* Config id 0 is invalid, id 1 for kernel stored test config. */ 3410 oa_config->id = idr_alloc(&dev_priv->perf.metrics_idr, 3411 oa_config, 2, 3412 0, GFP_KERNEL); 3413 if (oa_config->id < 0) { 3414 DRM_DEBUG("Failed to create sysfs entry for OA config\n"); 3415 err = oa_config->id; 3416 goto sysfs_err; 3417 } 3418 3419 mutex_unlock(&dev_priv->perf.metrics_lock); 3420 3421 DRM_DEBUG("Added config %s id=%i\n", oa_config->uuid, oa_config->id); 3422 3423 return oa_config->id; 3424 3425 sysfs_err: 3426 mutex_unlock(&dev_priv->perf.metrics_lock); 3427 reg_err: 3428 put_oa_config(dev_priv, oa_config); 3429 DRM_DEBUG("Failed to add new OA config\n"); 3430 return err; 3431 } 3432 3433 /** 3434 * i915_perf_remove_config_ioctl - DRM ioctl() for userspace to remove an OA config 3435 * @dev: drm device 3436 * @data: ioctl data (pointer to u64 integer) copied from userspace 3437 * @file: drm file 3438 * 3439 * Configs can be removed while being used, the will stop appearing in sysfs 3440 * and their content will be freed when the stream using the config is closed. 3441 * 3442 * Returns: 0 on success or a negative error code on failure. 3443 */ 3444 int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, 3445 struct drm_file *file) 3446 { 3447 struct drm_i915_private *dev_priv = dev->dev_private; 3448 u64 *arg = data; 3449 struct i915_oa_config *oa_config; 3450 int ret; 3451 3452 if (!dev_priv->perf.initialized) { 3453 DRM_DEBUG("i915 perf interface not available for this system\n"); 3454 return -ENOTSUPP; 3455 } 3456 3457 if (i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) { 3458 DRM_DEBUG("Insufficient privileges to remove i915 OA config\n"); 3459 return -EACCES; 3460 } 3461 3462 ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); 3463 if (ret) 3464 goto lock_err; 3465 3466 oa_config = idr_find(&dev_priv->perf.metrics_idr, *arg); 3467 if (!oa_config) { 3468 DRM_DEBUG("Failed to remove unknown OA config\n"); 3469 ret = -ENOENT; 3470 goto config_err; 3471 } 3472 3473 GEM_BUG_ON(*arg != oa_config->id); 3474 3475 sysfs_remove_group(dev_priv->perf.metrics_kobj, 3476 &oa_config->sysfs_metric); 3477 3478 idr_remove(&dev_priv->perf.metrics_idr, *arg); 3479 3480 DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id); 3481 3482 put_oa_config(dev_priv, oa_config); 3483 3484 config_err: 3485 mutex_unlock(&dev_priv->perf.metrics_lock); 3486 lock_err: 3487 return ret; 3488 } 3489 3490 static struct ctl_table oa_table[] = { 3491 { 3492 .procname = "perf_stream_paranoid", 3493 .data = &i915_perf_stream_paranoid, 3494 .maxlen = sizeof(i915_perf_stream_paranoid), 3495 .mode = 0644, 3496 .proc_handler = proc_dointvec_minmax, 3497 .extra1 = SYSCTL_ZERO, 3498 .extra2 = SYSCTL_ONE, 3499 }, 3500 { 3501 .procname = "oa_max_sample_rate", 3502 .data = &i915_oa_max_sample_rate, 3503 .maxlen = sizeof(i915_oa_max_sample_rate), 3504 .mode = 0644, 3505 .proc_handler = proc_dointvec_minmax, 3506 .extra1 = SYSCTL_ZERO, 3507 .extra2 = &oa_sample_rate_hard_limit, 3508 }, 3509 {} 3510 }; 3511 3512 static struct ctl_table i915_root[] = { 3513 { 3514 .procname = "i915", 3515 .maxlen = 0, 3516 .mode = 0555, 3517 .child = oa_table, 3518 }, 3519 {} 3520 }; 3521 3522 static struct ctl_table dev_root[] = { 3523 { 3524 .procname = "dev", 3525 .maxlen = 0, 3526 .mode = 0555, 3527 .child = i915_root, 3528 }, 3529 {} 3530 }; 3531 3532 /** 3533 * i915_perf_init - initialize i915-perf state on module load 3534 * @dev_priv: i915 device instance 3535 * 3536 * Initializes i915-perf state without exposing anything to userspace. 3537 * 3538 * Note: i915-perf initialization is split into an 'init' and 'register' 3539 * phase with the i915_perf_register() exposing state to userspace. 3540 */ 3541 void i915_perf_init(struct drm_i915_private *dev_priv) 3542 { 3543 if (IS_HASWELL(dev_priv)) { 3544 dev_priv->perf.ops.is_valid_b_counter_reg = 3545 gen7_is_valid_b_counter_addr; 3546 dev_priv->perf.ops.is_valid_mux_reg = 3547 hsw_is_valid_mux_addr; 3548 dev_priv->perf.ops.is_valid_flex_reg = NULL; 3549 dev_priv->perf.ops.enable_metric_set = hsw_enable_metric_set; 3550 dev_priv->perf.ops.disable_metric_set = hsw_disable_metric_set; 3551 dev_priv->perf.ops.oa_enable = gen7_oa_enable; 3552 dev_priv->perf.ops.oa_disable = gen7_oa_disable; 3553 dev_priv->perf.ops.read = gen7_oa_read; 3554 dev_priv->perf.ops.oa_hw_tail_read = 3555 gen7_oa_hw_tail_read; 3556 3557 dev_priv->perf.oa_formats = hsw_oa_formats; 3558 } else if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { 3559 /* Note: that although we could theoretically also support the 3560 * legacy ringbuffer mode on BDW (and earlier iterations of 3561 * this driver, before upstreaming did this) it didn't seem 3562 * worth the complexity to maintain now that BDW+ enable 3563 * execlist mode by default. 3564 */ 3565 dev_priv->perf.oa_formats = gen8_plus_oa_formats; 3566 3567 dev_priv->perf.ops.oa_enable = gen8_oa_enable; 3568 dev_priv->perf.ops.oa_disable = gen8_oa_disable; 3569 dev_priv->perf.ops.read = gen8_oa_read; 3570 dev_priv->perf.ops.oa_hw_tail_read = gen8_oa_hw_tail_read; 3571 3572 if (IS_GEN_RANGE(dev_priv, 8, 9)) { 3573 dev_priv->perf.ops.is_valid_b_counter_reg = 3574 gen7_is_valid_b_counter_addr; 3575 dev_priv->perf.ops.is_valid_mux_reg = 3576 gen8_is_valid_mux_addr; 3577 dev_priv->perf.ops.is_valid_flex_reg = 3578 gen8_is_valid_flex_addr; 3579 3580 if (IS_CHERRYVIEW(dev_priv)) { 3581 dev_priv->perf.ops.is_valid_mux_reg = 3582 chv_is_valid_mux_addr; 3583 } 3584 3585 dev_priv->perf.ops.enable_metric_set = gen8_enable_metric_set; 3586 dev_priv->perf.ops.disable_metric_set = gen8_disable_metric_set; 3587 3588 if (IS_GEN(dev_priv, 8)) { 3589 dev_priv->perf.ctx_oactxctrl_offset = 0x120; 3590 dev_priv->perf.ctx_flexeu0_offset = 0x2ce; 3591 3592 dev_priv->perf.gen8_valid_ctx_bit = BIT(25); 3593 } else { 3594 dev_priv->perf.ctx_oactxctrl_offset = 0x128; 3595 dev_priv->perf.ctx_flexeu0_offset = 0x3de; 3596 3597 dev_priv->perf.gen8_valid_ctx_bit = BIT(16); 3598 } 3599 } else if (IS_GEN_RANGE(dev_priv, 10, 11)) { 3600 dev_priv->perf.ops.is_valid_b_counter_reg = 3601 gen7_is_valid_b_counter_addr; 3602 dev_priv->perf.ops.is_valid_mux_reg = 3603 gen10_is_valid_mux_addr; 3604 dev_priv->perf.ops.is_valid_flex_reg = 3605 gen8_is_valid_flex_addr; 3606 3607 dev_priv->perf.ops.enable_metric_set = gen8_enable_metric_set; 3608 dev_priv->perf.ops.disable_metric_set = gen10_disable_metric_set; 3609 3610 if (IS_GEN(dev_priv, 10)) { 3611 dev_priv->perf.ctx_oactxctrl_offset = 0x128; 3612 dev_priv->perf.ctx_flexeu0_offset = 0x3de; 3613 } else { 3614 dev_priv->perf.ctx_oactxctrl_offset = 0x124; 3615 dev_priv->perf.ctx_flexeu0_offset = 0x78e; 3616 } 3617 dev_priv->perf.gen8_valid_ctx_bit = BIT(16); 3618 } 3619 } 3620 3621 if (dev_priv->perf.ops.enable_metric_set) { 3622 INIT_LIST_HEAD(&dev_priv->perf.streams); 3623 mutex_init(&dev_priv->perf.lock); 3624 3625 oa_sample_rate_hard_limit = 1000 * 3626 (RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz / 2); 3627 dev_priv->perf.sysctl_header = register_sysctl_table(dev_root); 3628 3629 mutex_init(&dev_priv->perf.metrics_lock); 3630 idr_init(&dev_priv->perf.metrics_idr); 3631 3632 /* We set up some ratelimit state to potentially throttle any 3633 * _NOTES about spurious, invalid OA reports which we don't 3634 * forward to userspace. 3635 * 3636 * We print a _NOTE about any throttling when closing the 3637 * stream instead of waiting until driver _fini which no one 3638 * would ever see. 3639 * 3640 * Using the same limiting factors as printk_ratelimit() 3641 */ 3642 ratelimit_state_init(&dev_priv->perf.spurious_report_rs, 3643 5 * HZ, 10); 3644 /* Since we use a DRM_NOTE for spurious reports it would be 3645 * inconsistent to let __ratelimit() automatically print a 3646 * warning for throttling. 3647 */ 3648 ratelimit_set_flags(&dev_priv->perf.spurious_report_rs, 3649 RATELIMIT_MSG_ON_RELEASE); 3650 3651 dev_priv->perf.initialized = true; 3652 } 3653 } 3654 3655 static int destroy_config(int id, void *p, void *data) 3656 { 3657 struct drm_i915_private *dev_priv = data; 3658 struct i915_oa_config *oa_config = p; 3659 3660 put_oa_config(dev_priv, oa_config); 3661 3662 return 0; 3663 } 3664 3665 /** 3666 * i915_perf_fini - Counter part to i915_perf_init() 3667 * @dev_priv: i915 device instance 3668 */ 3669 void i915_perf_fini(struct drm_i915_private *dev_priv) 3670 { 3671 if (!dev_priv->perf.initialized) 3672 return; 3673 3674 idr_for_each(&dev_priv->perf.metrics_idr, destroy_config, dev_priv); 3675 idr_destroy(&dev_priv->perf.metrics_idr); 3676 3677 unregister_sysctl_table(dev_priv->perf.sysctl_header); 3678 3679 memset(&dev_priv->perf.ops, 0, sizeof(dev_priv->perf.ops)); 3680 3681 dev_priv->perf.initialized = false; 3682 } 3683