1 /* 2 * Copyright © 2015-2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Robert Bragg <robert@sixbynine.org> 25 */ 26 27 28 /** 29 * DOC: i915 Perf Overview 30 * 31 * Gen graphics supports a large number of performance counters that can help 32 * driver and application developers understand and optimize their use of the 33 * GPU. 34 * 35 * This i915 perf interface enables userspace to configure and open a file 36 * descriptor representing a stream of GPU metrics which can then be read() as 37 * a stream of sample records. 38 * 39 * The interface is particularly suited to exposing buffered metrics that are 40 * captured by DMA from the GPU, unsynchronized with and unrelated to the CPU. 41 * 42 * Streams representing a single context are accessible to applications with a 43 * corresponding drm file descriptor, such that OpenGL can use the interface 44 * without special privileges. Access to system-wide metrics requires root 45 * privileges by default, unless changed via the dev.i915.perf_event_paranoid 46 * sysctl option. 47 * 48 */ 49 50 /** 51 * DOC: i915 Perf History and Comparison with Core Perf 52 * 53 * The interface was initially inspired by the core Perf infrastructure but 54 * some notable differences are: 55 * 56 * i915 perf file descriptors represent a "stream" instead of an "event"; where 57 * a perf event primarily corresponds to a single 64bit value, while a stream 58 * might sample sets of tightly-coupled counters, depending on the 59 * configuration. For example the Gen OA unit isn't designed to support 60 * orthogonal configurations of individual counters; it's configured for a set 61 * of related counters. Samples for an i915 perf stream capturing OA metrics 62 * will include a set of counter values packed in a compact HW specific format. 63 * The OA unit supports a number of different packing formats which can be 64 * selected by the user opening the stream. Perf has support for grouping 65 * events, but each event in the group is configured, validated and 66 * authenticated individually with separate system calls. 67 * 68 * i915 perf stream configurations are provided as an array of u64 (key,value) 69 * pairs, instead of a fixed struct with multiple miscellaneous config members, 70 * interleaved with event-type specific members. 71 * 72 * i915 perf doesn't support exposing metrics via an mmap'd circular buffer. 73 * The supported metrics are being written to memory by the GPU unsynchronized 74 * with the CPU, using HW specific packing formats for counter sets. Sometimes 75 * the constraints on HW configuration require reports to be filtered before it 76 * would be acceptable to expose them to unprivileged applications - to hide 77 * the metrics of other processes/contexts. For these use cases a read() based 78 * interface is a good fit, and provides an opportunity to filter data as it 79 * gets copied from the GPU mapped buffers to userspace buffers. 80 * 81 * 82 * Issues hit with first prototype based on Core Perf 83 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 84 * 85 * The first prototype of this driver was based on the core perf 86 * infrastructure, and while we did make that mostly work, with some changes to 87 * perf, we found we were breaking or working around too many assumptions baked 88 * into perf's currently cpu centric design. 89 * 90 * In the end we didn't see a clear benefit to making perf's implementation and 91 * interface more complex by changing design assumptions while we knew we still 92 * wouldn't be able to use any existing perf based userspace tools. 93 * 94 * Also considering the Gen specific nature of the Observability hardware and 95 * how userspace will sometimes need to combine i915 perf OA metrics with 96 * side-band OA data captured via MI_REPORT_PERF_COUNT commands; we're 97 * expecting the interface to be used by a platform specific userspace such as 98 * OpenGL or tools. This is to say; we aren't inherently missing out on having 99 * a standard vendor/architecture agnostic interface by not using perf. 100 * 101 * 102 * For posterity, in case we might re-visit trying to adapt core perf to be 103 * better suited to exposing i915 metrics these were the main pain points we 104 * hit: 105 * 106 * - The perf based OA PMU driver broke some significant design assumptions: 107 * 108 * Existing perf pmus are used for profiling work on a cpu and we were 109 * introducing the idea of _IS_DEVICE pmus with different security 110 * implications, the need to fake cpu-related data (such as user/kernel 111 * registers) to fit with perf's current design, and adding _DEVICE records 112 * as a way to forward device-specific status records. 113 * 114 * The OA unit writes reports of counters into a circular buffer, without 115 * involvement from the CPU, making our PMU driver the first of a kind. 116 * 117 * Given the way we were periodically forward data from the GPU-mapped, OA 118 * buffer to perf's buffer, those bursts of sample writes looked to perf like 119 * we were sampling too fast and so we had to subvert its throttling checks. 120 * 121 * Perf supports groups of counters and allows those to be read via 122 * transactions internally but transactions currently seem designed to be 123 * explicitly initiated from the cpu (say in response to a userspace read()) 124 * and while we could pull a report out of the OA buffer we can't 125 * trigger a report from the cpu on demand. 126 * 127 * Related to being report based; the OA counters are configured in HW as a 128 * set while perf generally expects counter configurations to be orthogonal. 129 * Although counters can be associated with a group leader as they are 130 * opened, there's no clear precedent for being able to provide group-wide 131 * configuration attributes (for example we want to let userspace choose the 132 * OA unit report format used to capture all counters in a set, or specify a 133 * GPU context to filter metrics on). We avoided using perf's grouping 134 * feature and forwarded OA reports to userspace via perf's 'raw' sample 135 * field. This suited our userspace well considering how coupled the counters 136 * are when dealing with normalizing. It would be inconvenient to split 137 * counters up into separate events, only to require userspace to recombine 138 * them. For Mesa it's also convenient to be forwarded raw, periodic reports 139 * for combining with the side-band raw reports it captures using 140 * MI_REPORT_PERF_COUNT commands. 141 * 142 * - As a side note on perf's grouping feature; there was also some concern 143 * that using PERF_FORMAT_GROUP as a way to pack together counter values 144 * would quite drastically inflate our sample sizes, which would likely 145 * lower the effective sampling resolutions we could use when the available 146 * memory bandwidth is limited. 147 * 148 * With the OA unit's report formats, counters are packed together as 32 149 * or 40bit values, with the largest report size being 256 bytes. 150 * 151 * PERF_FORMAT_GROUP values are 64bit, but there doesn't appear to be a 152 * documented ordering to the values, implying PERF_FORMAT_ID must also be 153 * used to add a 64bit ID before each value; giving 16 bytes per counter. 154 * 155 * Related to counter orthogonality; we can't time share the OA unit, while 156 * event scheduling is a central design idea within perf for allowing 157 * userspace to open + enable more events than can be configured in HW at any 158 * one time. The OA unit is not designed to allow re-configuration while in 159 * use. We can't reconfigure the OA unit without losing internal OA unit 160 * state which we can't access explicitly to save and restore. Reconfiguring 161 * the OA unit is also relatively slow, involving ~100 register writes. From 162 * userspace Mesa also depends on a stable OA configuration when emitting 163 * MI_REPORT_PERF_COUNT commands and importantly the OA unit can't be 164 * disabled while there are outstanding MI_RPC commands lest we hang the 165 * command streamer. 166 * 167 * The contents of sample records aren't extensible by device drivers (i.e. 168 * the sample_type bits). As an example; Sourab Gupta had been looking to 169 * attach GPU timestamps to our OA samples. We were shoehorning OA reports 170 * into sample records by using the 'raw' field, but it's tricky to pack more 171 * than one thing into this field because events/core.c currently only lets a 172 * pmu give a single raw data pointer plus len which will be copied into the 173 * ring buffer. To include more than the OA report we'd have to copy the 174 * report into an intermediate larger buffer. I'd been considering allowing a 175 * vector of data+len values to be specified for copying the raw data, but 176 * it felt like a kludge to being using the raw field for this purpose. 177 * 178 * - It felt like our perf based PMU was making some technical compromises 179 * just for the sake of using perf: 180 * 181 * perf_event_open() requires events to either relate to a pid or a specific 182 * cpu core, while our device pmu related to neither. Events opened with a 183 * pid will be automatically enabled/disabled according to the scheduling of 184 * that process - so not appropriate for us. When an event is related to a 185 * cpu id, perf ensures pmu methods will be invoked via an inter process 186 * interrupt on that core. To avoid invasive changes our userspace opened OA 187 * perf events for a specific cpu. This was workable but it meant the 188 * majority of the OA driver ran in atomic context, including all OA report 189 * forwarding, which wasn't really necessary in our case and seems to make 190 * our locking requirements somewhat complex as we handled the interaction 191 * with the rest of the i915 driver. 192 */ 193 194 #include <linux/anon_inodes.h> 195 #include <linux/sizes.h> 196 #include <linux/uuid.h> 197 198 #include "gem/i915_gem_context.h" 199 #include "gem/i915_gem_pm.h" 200 #include "gt/intel_lrc_reg.h" 201 202 #include "i915_drv.h" 203 #include "i915_perf.h" 204 #include "oa/i915_oa_hsw.h" 205 #include "oa/i915_oa_bdw.h" 206 #include "oa/i915_oa_chv.h" 207 #include "oa/i915_oa_sklgt2.h" 208 #include "oa/i915_oa_sklgt3.h" 209 #include "oa/i915_oa_sklgt4.h" 210 #include "oa/i915_oa_bxt.h" 211 #include "oa/i915_oa_kblgt2.h" 212 #include "oa/i915_oa_kblgt3.h" 213 #include "oa/i915_oa_glk.h" 214 #include "oa/i915_oa_cflgt2.h" 215 #include "oa/i915_oa_cflgt3.h" 216 #include "oa/i915_oa_cnl.h" 217 #include "oa/i915_oa_icl.h" 218 219 /* HW requires this to be a power of two, between 128k and 16M, though driver 220 * is currently generally designed assuming the largest 16M size is used such 221 * that the overflow cases are unlikely in normal operation. 222 */ 223 #define OA_BUFFER_SIZE SZ_16M 224 225 #define OA_TAKEN(tail, head) ((tail - head) & (OA_BUFFER_SIZE - 1)) 226 227 /** 228 * DOC: OA Tail Pointer Race 229 * 230 * There's a HW race condition between OA unit tail pointer register updates and 231 * writes to memory whereby the tail pointer can sometimes get ahead of what's 232 * been written out to the OA buffer so far (in terms of what's visible to the 233 * CPU). 234 * 235 * Although this can be observed explicitly while copying reports to userspace 236 * by checking for a zeroed report-id field in tail reports, we want to account 237 * for this earlier, as part of the oa_buffer_check to avoid lots of redundant 238 * read() attempts. 239 * 240 * In effect we define a tail pointer for reading that lags the real tail 241 * pointer by at least %OA_TAIL_MARGIN_NSEC nanoseconds, which gives enough 242 * time for the corresponding reports to become visible to the CPU. 243 * 244 * To manage this we actually track two tail pointers: 245 * 1) An 'aging' tail with an associated timestamp that is tracked until we 246 * can trust the corresponding data is visible to the CPU; at which point 247 * it is considered 'aged'. 248 * 2) An 'aged' tail that can be used for read()ing. 249 * 250 * The two separate pointers let us decouple read()s from tail pointer aging. 251 * 252 * The tail pointers are checked and updated at a limited rate within a hrtimer 253 * callback (the same callback that is used for delivering EPOLLIN events) 254 * 255 * Initially the tails are marked invalid with %INVALID_TAIL_PTR which 256 * indicates that an updated tail pointer is needed. 257 * 258 * Most of the implementation details for this workaround are in 259 * oa_buffer_check_unlocked() and _append_oa_reports() 260 * 261 * Note for posterity: previously the driver used to define an effective tail 262 * pointer that lagged the real pointer by a 'tail margin' measured in bytes 263 * derived from %OA_TAIL_MARGIN_NSEC and the configured sampling frequency. 264 * This was flawed considering that the OA unit may also automatically generate 265 * non-periodic reports (such as on context switch) or the OA unit may be 266 * enabled without any periodic sampling. 267 */ 268 #define OA_TAIL_MARGIN_NSEC 100000ULL 269 #define INVALID_TAIL_PTR 0xffffffff 270 271 /* frequency for checking whether the OA unit has written new reports to the 272 * circular OA buffer... 273 */ 274 #define POLL_FREQUENCY 200 275 #define POLL_PERIOD (NSEC_PER_SEC / POLL_FREQUENCY) 276 277 /* for sysctl proc_dointvec_minmax of dev.i915.perf_stream_paranoid */ 278 static u32 i915_perf_stream_paranoid = true; 279 280 /* The maximum exponent the hardware accepts is 63 (essentially it selects one 281 * of the 64bit timestamp bits to trigger reports from) but there's currently 282 * no known use case for sampling as infrequently as once per 47 thousand years. 283 * 284 * Since the timestamps included in OA reports are only 32bits it seems 285 * reasonable to limit the OA exponent where it's still possible to account for 286 * overflow in OA report timestamps. 287 */ 288 #define OA_EXPONENT_MAX 31 289 290 #define INVALID_CTX_ID 0xffffffff 291 292 /* On Gen8+ automatically triggered OA reports include a 'reason' field... */ 293 #define OAREPORT_REASON_MASK 0x3f 294 #define OAREPORT_REASON_SHIFT 19 295 #define OAREPORT_REASON_TIMER (1<<0) 296 #define OAREPORT_REASON_CTX_SWITCH (1<<3) 297 #define OAREPORT_REASON_CLK_RATIO (1<<5) 298 299 300 /* For sysctl proc_dointvec_minmax of i915_oa_max_sample_rate 301 * 302 * The highest sampling frequency we can theoretically program the OA unit 303 * with is always half the timestamp frequency: E.g. 6.25Mhz for Haswell. 304 * 305 * Initialized just before we register the sysctl parameter. 306 */ 307 static int oa_sample_rate_hard_limit; 308 309 /* Theoretically we can program the OA unit to sample every 160ns but don't 310 * allow that by default unless root... 311 * 312 * The default threshold of 100000Hz is based on perf's similar 313 * kernel.perf_event_max_sample_rate sysctl parameter. 314 */ 315 static u32 i915_oa_max_sample_rate = 100000; 316 317 /* XXX: beware if future OA HW adds new report formats that the current 318 * code assumes all reports have a power-of-two size and ~(size - 1) can 319 * be used as a mask to align the OA tail pointer. 320 */ 321 static const struct i915_oa_format hsw_oa_formats[I915_OA_FORMAT_MAX] = { 322 [I915_OA_FORMAT_A13] = { 0, 64 }, 323 [I915_OA_FORMAT_A29] = { 1, 128 }, 324 [I915_OA_FORMAT_A13_B8_C8] = { 2, 128 }, 325 /* A29_B8_C8 Disallowed as 192 bytes doesn't factor into buffer size */ 326 [I915_OA_FORMAT_B4_C8] = { 4, 64 }, 327 [I915_OA_FORMAT_A45_B8_C8] = { 5, 256 }, 328 [I915_OA_FORMAT_B4_C8_A16] = { 6, 128 }, 329 [I915_OA_FORMAT_C4_B8] = { 7, 64 }, 330 }; 331 332 static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = { 333 [I915_OA_FORMAT_A12] = { 0, 64 }, 334 [I915_OA_FORMAT_A12_B8_C8] = { 2, 128 }, 335 [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, 336 [I915_OA_FORMAT_C4_B8] = { 7, 64 }, 337 }; 338 339 #define SAMPLE_OA_REPORT (1<<0) 340 341 /** 342 * struct perf_open_properties - for validated properties given to open a stream 343 * @sample_flags: `DRM_I915_PERF_PROP_SAMPLE_*` properties are tracked as flags 344 * @single_context: Whether a single or all gpu contexts should be monitored 345 * @ctx_handle: A gem ctx handle for use with @single_context 346 * @metrics_set: An ID for an OA unit metric set advertised via sysfs 347 * @oa_format: An OA unit HW report format 348 * @oa_periodic: Whether to enable periodic OA unit sampling 349 * @oa_period_exponent: The OA unit sampling period is derived from this 350 * 351 * As read_properties_unlocked() enumerates and validates the properties given 352 * to open a stream of metrics the configuration is built up in the structure 353 * which starts out zero initialized. 354 */ 355 struct perf_open_properties { 356 u32 sample_flags; 357 358 u64 single_context:1; 359 u64 ctx_handle; 360 361 /* OA sampling state */ 362 int metrics_set; 363 int oa_format; 364 bool oa_periodic; 365 int oa_period_exponent; 366 }; 367 368 static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer); 369 370 static void free_oa_config(struct drm_i915_private *dev_priv, 371 struct i915_oa_config *oa_config) 372 { 373 if (!PTR_ERR(oa_config->flex_regs)) 374 kfree(oa_config->flex_regs); 375 if (!PTR_ERR(oa_config->b_counter_regs)) 376 kfree(oa_config->b_counter_regs); 377 if (!PTR_ERR(oa_config->mux_regs)) 378 kfree(oa_config->mux_regs); 379 kfree(oa_config); 380 } 381 382 static void put_oa_config(struct drm_i915_private *dev_priv, 383 struct i915_oa_config *oa_config) 384 { 385 if (!atomic_dec_and_test(&oa_config->ref_count)) 386 return; 387 388 free_oa_config(dev_priv, oa_config); 389 } 390 391 static int get_oa_config(struct drm_i915_private *dev_priv, 392 int metrics_set, 393 struct i915_oa_config **out_config) 394 { 395 int ret; 396 397 if (metrics_set == 1) { 398 *out_config = &dev_priv->perf.test_config; 399 atomic_inc(&dev_priv->perf.test_config.ref_count); 400 return 0; 401 } 402 403 ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); 404 if (ret) 405 return ret; 406 407 *out_config = idr_find(&dev_priv->perf.metrics_idr, metrics_set); 408 if (!*out_config) 409 ret = -EINVAL; 410 else 411 atomic_inc(&(*out_config)->ref_count); 412 413 mutex_unlock(&dev_priv->perf.metrics_lock); 414 415 return ret; 416 } 417 418 static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream) 419 { 420 struct drm_i915_private *dev_priv = stream->dev_priv; 421 422 return I915_READ(GEN8_OATAILPTR) & GEN8_OATAILPTR_MASK; 423 } 424 425 static u32 gen7_oa_hw_tail_read(struct i915_perf_stream *stream) 426 { 427 struct drm_i915_private *dev_priv = stream->dev_priv; 428 u32 oastatus1 = I915_READ(GEN7_OASTATUS1); 429 430 return oastatus1 & GEN7_OASTATUS1_TAIL_MASK; 431 } 432 433 /** 434 * oa_buffer_check_unlocked - check for data and update tail ptr state 435 * @stream: i915 stream instance 436 * 437 * This is either called via fops (for blocking reads in user ctx) or the poll 438 * check hrtimer (atomic ctx) to check the OA buffer tail pointer and check 439 * if there is data available for userspace to read. 440 * 441 * This function is central to providing a workaround for the OA unit tail 442 * pointer having a race with respect to what data is visible to the CPU. 443 * It is responsible for reading tail pointers from the hardware and giving 444 * the pointers time to 'age' before they are made available for reading. 445 * (See description of OA_TAIL_MARGIN_NSEC above for further details.) 446 * 447 * Besides returning true when there is data available to read() this function 448 * also has the side effect of updating the oa_buffer.tails[], .aging_timestamp 449 * and .aged_tail_idx state used for reading. 450 * 451 * Note: It's safe to read OA config state here unlocked, assuming that this is 452 * only called while the stream is enabled, while the global OA configuration 453 * can't be modified. 454 * 455 * Returns: %true if the OA buffer contains data, else %false 456 */ 457 static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) 458 { 459 struct drm_i915_private *dev_priv = stream->dev_priv; 460 int report_size = stream->oa_buffer.format_size; 461 unsigned long flags; 462 unsigned int aged_idx; 463 u32 head, hw_tail, aged_tail, aging_tail; 464 u64 now; 465 466 /* We have to consider the (unlikely) possibility that read() errors 467 * could result in an OA buffer reset which might reset the head, 468 * tails[] and aged_tail state. 469 */ 470 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 471 472 /* NB: The head we observe here might effectively be a little out of 473 * date (between head and tails[aged_idx].offset if there is currently 474 * a read() in progress. 475 */ 476 head = stream->oa_buffer.head; 477 478 aged_idx = stream->oa_buffer.aged_tail_idx; 479 aged_tail = stream->oa_buffer.tails[aged_idx].offset; 480 aging_tail = stream->oa_buffer.tails[!aged_idx].offset; 481 482 hw_tail = dev_priv->perf.ops.oa_hw_tail_read(stream); 483 484 /* The tail pointer increases in 64 byte increments, 485 * not in report_size steps... 486 */ 487 hw_tail &= ~(report_size - 1); 488 489 now = ktime_get_mono_fast_ns(); 490 491 /* Update the aged tail 492 * 493 * Flip the tail pointer available for read()s once the aging tail is 494 * old enough to trust that the corresponding data will be visible to 495 * the CPU... 496 * 497 * Do this before updating the aging pointer in case we may be able to 498 * immediately start aging a new pointer too (if new data has become 499 * available) without needing to wait for a later hrtimer callback. 500 */ 501 if (aging_tail != INVALID_TAIL_PTR && 502 ((now - stream->oa_buffer.aging_timestamp) > 503 OA_TAIL_MARGIN_NSEC)) { 504 505 aged_idx ^= 1; 506 stream->oa_buffer.aged_tail_idx = aged_idx; 507 508 aged_tail = aging_tail; 509 510 /* Mark that we need a new pointer to start aging... */ 511 stream->oa_buffer.tails[!aged_idx].offset = INVALID_TAIL_PTR; 512 aging_tail = INVALID_TAIL_PTR; 513 } 514 515 /* Update the aging tail 516 * 517 * We throttle aging tail updates until we have a new tail that 518 * represents >= one report more data than is already available for 519 * reading. This ensures there will be enough data for a successful 520 * read once this new pointer has aged and ensures we will give the new 521 * pointer time to age. 522 */ 523 if (aging_tail == INVALID_TAIL_PTR && 524 (aged_tail == INVALID_TAIL_PTR || 525 OA_TAKEN(hw_tail, aged_tail) >= report_size)) { 526 struct i915_vma *vma = stream->oa_buffer.vma; 527 u32 gtt_offset = i915_ggtt_offset(vma); 528 529 /* Be paranoid and do a bounds check on the pointer read back 530 * from hardware, just in case some spurious hardware condition 531 * could put the tail out of bounds... 532 */ 533 if (hw_tail >= gtt_offset && 534 hw_tail < (gtt_offset + OA_BUFFER_SIZE)) { 535 stream->oa_buffer.tails[!aged_idx].offset = 536 aging_tail = hw_tail; 537 stream->oa_buffer.aging_timestamp = now; 538 } else { 539 DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n", 540 hw_tail); 541 } 542 } 543 544 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 545 546 return aged_tail == INVALID_TAIL_PTR ? 547 false : OA_TAKEN(aged_tail, head) >= report_size; 548 } 549 550 /** 551 * append_oa_status - Appends a status record to a userspace read() buffer. 552 * @stream: An i915-perf stream opened for OA metrics 553 * @buf: destination buffer given by userspace 554 * @count: the number of bytes userspace wants to read 555 * @offset: (inout): the current position for writing into @buf 556 * @type: The kind of status to report to userspace 557 * 558 * Writes a status record (such as `DRM_I915_PERF_RECORD_OA_REPORT_LOST`) 559 * into the userspace read() buffer. 560 * 561 * The @buf @offset will only be updated on success. 562 * 563 * Returns: 0 on success, negative error code on failure. 564 */ 565 static int append_oa_status(struct i915_perf_stream *stream, 566 char __user *buf, 567 size_t count, 568 size_t *offset, 569 enum drm_i915_perf_record_type type) 570 { 571 struct drm_i915_perf_record_header header = { type, 0, sizeof(header) }; 572 573 if ((count - *offset) < header.size) 574 return -ENOSPC; 575 576 if (copy_to_user(buf + *offset, &header, sizeof(header))) 577 return -EFAULT; 578 579 (*offset) += header.size; 580 581 return 0; 582 } 583 584 /** 585 * append_oa_sample - Copies single OA report into userspace read() buffer. 586 * @stream: An i915-perf stream opened for OA metrics 587 * @buf: destination buffer given by userspace 588 * @count: the number of bytes userspace wants to read 589 * @offset: (inout): the current position for writing into @buf 590 * @report: A single OA report to (optionally) include as part of the sample 591 * 592 * The contents of a sample are configured through `DRM_I915_PERF_PROP_SAMPLE_*` 593 * properties when opening a stream, tracked as `stream->sample_flags`. This 594 * function copies the requested components of a single sample to the given 595 * read() @buf. 596 * 597 * The @buf @offset will only be updated on success. 598 * 599 * Returns: 0 on success, negative error code on failure. 600 */ 601 static int append_oa_sample(struct i915_perf_stream *stream, 602 char __user *buf, 603 size_t count, 604 size_t *offset, 605 const u8 *report) 606 { 607 int report_size = stream->oa_buffer.format_size; 608 struct drm_i915_perf_record_header header; 609 u32 sample_flags = stream->sample_flags; 610 611 header.type = DRM_I915_PERF_RECORD_SAMPLE; 612 header.pad = 0; 613 header.size = stream->sample_size; 614 615 if ((count - *offset) < header.size) 616 return -ENOSPC; 617 618 buf += *offset; 619 if (copy_to_user(buf, &header, sizeof(header))) 620 return -EFAULT; 621 buf += sizeof(header); 622 623 if (sample_flags & SAMPLE_OA_REPORT) { 624 if (copy_to_user(buf, report, report_size)) 625 return -EFAULT; 626 } 627 628 (*offset) += header.size; 629 630 return 0; 631 } 632 633 /** 634 * Copies all buffered OA reports into userspace read() buffer. 635 * @stream: An i915-perf stream opened for OA metrics 636 * @buf: destination buffer given by userspace 637 * @count: the number of bytes userspace wants to read 638 * @offset: (inout): the current position for writing into @buf 639 * 640 * Notably any error condition resulting in a short read (-%ENOSPC or 641 * -%EFAULT) will be returned even though one or more records may 642 * have been successfully copied. In this case it's up to the caller 643 * to decide if the error should be squashed before returning to 644 * userspace. 645 * 646 * Note: reports are consumed from the head, and appended to the 647 * tail, so the tail chases the head?... If you think that's mad 648 * and back-to-front you're not alone, but this follows the 649 * Gen PRM naming convention. 650 * 651 * Returns: 0 on success, negative error code on failure. 652 */ 653 static int gen8_append_oa_reports(struct i915_perf_stream *stream, 654 char __user *buf, 655 size_t count, 656 size_t *offset) 657 { 658 struct drm_i915_private *dev_priv = stream->dev_priv; 659 int report_size = stream->oa_buffer.format_size; 660 u8 *oa_buf_base = stream->oa_buffer.vaddr; 661 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); 662 u32 mask = (OA_BUFFER_SIZE - 1); 663 size_t start_offset = *offset; 664 unsigned long flags; 665 unsigned int aged_tail_idx; 666 u32 head, tail; 667 u32 taken; 668 int ret = 0; 669 670 if (WARN_ON(!stream->enabled)) 671 return -EIO; 672 673 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 674 675 head = stream->oa_buffer.head; 676 aged_tail_idx = stream->oa_buffer.aged_tail_idx; 677 tail = stream->oa_buffer.tails[aged_tail_idx].offset; 678 679 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 680 681 /* 682 * An invalid tail pointer here means we're still waiting for the poll 683 * hrtimer callback to give us a pointer 684 */ 685 if (tail == INVALID_TAIL_PTR) 686 return -EAGAIN; 687 688 /* 689 * NB: oa_buffer.head/tail include the gtt_offset which we don't want 690 * while indexing relative to oa_buf_base. 691 */ 692 head -= gtt_offset; 693 tail -= gtt_offset; 694 695 /* 696 * An out of bounds or misaligned head or tail pointer implies a driver 697 * bug since we validate + align the tail pointers we read from the 698 * hardware and we are in full control of the head pointer which should 699 * only be incremented by multiples of the report size (notably also 700 * all a power of two). 701 */ 702 if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size || 703 tail > OA_BUFFER_SIZE || tail % report_size, 704 "Inconsistent OA buffer pointers: head = %u, tail = %u\n", 705 head, tail)) 706 return -EIO; 707 708 709 for (/* none */; 710 (taken = OA_TAKEN(tail, head)); 711 head = (head + report_size) & mask) { 712 u8 *report = oa_buf_base + head; 713 u32 *report32 = (void *)report; 714 u32 ctx_id; 715 u32 reason; 716 717 /* 718 * All the report sizes factor neatly into the buffer 719 * size so we never expect to see a report split 720 * between the beginning and end of the buffer. 721 * 722 * Given the initial alignment check a misalignment 723 * here would imply a driver bug that would result 724 * in an overrun. 725 */ 726 if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) { 727 DRM_ERROR("Spurious OA head ptr: non-integral report offset\n"); 728 break; 729 } 730 731 /* 732 * The reason field includes flags identifying what 733 * triggered this specific report (mostly timer 734 * triggered or e.g. due to a context switch). 735 * 736 * This field is never expected to be zero so we can 737 * check that the report isn't invalid before copying 738 * it to userspace... 739 */ 740 reason = ((report32[0] >> OAREPORT_REASON_SHIFT) & 741 OAREPORT_REASON_MASK); 742 if (reason == 0) { 743 if (__ratelimit(&dev_priv->perf.spurious_report_rs)) 744 DRM_NOTE("Skipping spurious, invalid OA report\n"); 745 continue; 746 } 747 748 ctx_id = report32[2] & stream->specific_ctx_id_mask; 749 750 /* 751 * Squash whatever is in the CTX_ID field if it's marked as 752 * invalid to be sure we avoid false-positive, single-context 753 * filtering below... 754 * 755 * Note: that we don't clear the valid_ctx_bit so userspace can 756 * understand that the ID has been squashed by the kernel. 757 */ 758 if (!(report32[0] & dev_priv->perf.gen8_valid_ctx_bit)) 759 ctx_id = report32[2] = INVALID_CTX_ID; 760 761 /* 762 * NB: For Gen 8 the OA unit no longer supports clock gating 763 * off for a specific context and the kernel can't securely 764 * stop the counters from updating as system-wide / global 765 * values. 766 * 767 * Automatic reports now include a context ID so reports can be 768 * filtered on the cpu but it's not worth trying to 769 * automatically subtract/hide counter progress for other 770 * contexts while filtering since we can't stop userspace 771 * issuing MI_REPORT_PERF_COUNT commands which would still 772 * provide a side-band view of the real values. 773 * 774 * To allow userspace (such as Mesa/GL_INTEL_performance_query) 775 * to normalize counters for a single filtered context then it 776 * needs be forwarded bookend context-switch reports so that it 777 * can track switches in between MI_REPORT_PERF_COUNT commands 778 * and can itself subtract/ignore the progress of counters 779 * associated with other contexts. Note that the hardware 780 * automatically triggers reports when switching to a new 781 * context which are tagged with the ID of the newly active 782 * context. To avoid the complexity (and likely fragility) of 783 * reading ahead while parsing reports to try and minimize 784 * forwarding redundant context switch reports (i.e. between 785 * other, unrelated contexts) we simply elect to forward them 786 * all. 787 * 788 * We don't rely solely on the reason field to identify context 789 * switches since it's not-uncommon for periodic samples to 790 * identify a switch before any 'context switch' report. 791 */ 792 if (!dev_priv->perf.exclusive_stream->ctx || 793 stream->specific_ctx_id == ctx_id || 794 stream->oa_buffer.last_ctx_id == stream->specific_ctx_id || 795 reason & OAREPORT_REASON_CTX_SWITCH) { 796 797 /* 798 * While filtering for a single context we avoid 799 * leaking the IDs of other contexts. 800 */ 801 if (dev_priv->perf.exclusive_stream->ctx && 802 stream->specific_ctx_id != ctx_id) { 803 report32[2] = INVALID_CTX_ID; 804 } 805 806 ret = append_oa_sample(stream, buf, count, offset, 807 report); 808 if (ret) 809 break; 810 811 stream->oa_buffer.last_ctx_id = ctx_id; 812 } 813 814 /* 815 * The above reason field sanity check is based on 816 * the assumption that the OA buffer is initially 817 * zeroed and we reset the field after copying so the 818 * check is still meaningful once old reports start 819 * being overwritten. 820 */ 821 report32[0] = 0; 822 } 823 824 if (start_offset != *offset) { 825 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 826 827 /* 828 * We removed the gtt_offset for the copy loop above, indexing 829 * relative to oa_buf_base so put back here... 830 */ 831 head += gtt_offset; 832 833 I915_WRITE(GEN8_OAHEADPTR, head & GEN8_OAHEADPTR_MASK); 834 stream->oa_buffer.head = head; 835 836 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 837 } 838 839 return ret; 840 } 841 842 /** 843 * gen8_oa_read - copy status records then buffered OA reports 844 * @stream: An i915-perf stream opened for OA metrics 845 * @buf: destination buffer given by userspace 846 * @count: the number of bytes userspace wants to read 847 * @offset: (inout): the current position for writing into @buf 848 * 849 * Checks OA unit status registers and if necessary appends corresponding 850 * status records for userspace (such as for a buffer full condition) and then 851 * initiate appending any buffered OA reports. 852 * 853 * Updates @offset according to the number of bytes successfully copied into 854 * the userspace buffer. 855 * 856 * NB: some data may be successfully copied to the userspace buffer 857 * even if an error is returned, and this is reflected in the 858 * updated @offset. 859 * 860 * Returns: zero on success or a negative error code 861 */ 862 static int gen8_oa_read(struct i915_perf_stream *stream, 863 char __user *buf, 864 size_t count, 865 size_t *offset) 866 { 867 struct drm_i915_private *dev_priv = stream->dev_priv; 868 u32 oastatus; 869 int ret; 870 871 if (WARN_ON(!stream->oa_buffer.vaddr)) 872 return -EIO; 873 874 oastatus = I915_READ(GEN8_OASTATUS); 875 876 /* 877 * We treat OABUFFER_OVERFLOW as a significant error: 878 * 879 * Although theoretically we could handle this more gracefully 880 * sometimes, some Gens don't correctly suppress certain 881 * automatically triggered reports in this condition and so we 882 * have to assume that old reports are now being trampled 883 * over. 884 * 885 * Considering how we don't currently give userspace control 886 * over the OA buffer size and always configure a large 16MB 887 * buffer, then a buffer overflow does anyway likely indicate 888 * that something has gone quite badly wrong. 889 */ 890 if (oastatus & GEN8_OASTATUS_OABUFFER_OVERFLOW) { 891 ret = append_oa_status(stream, buf, count, offset, 892 DRM_I915_PERF_RECORD_OA_BUFFER_LOST); 893 if (ret) 894 return ret; 895 896 DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", 897 stream->period_exponent); 898 899 dev_priv->perf.ops.oa_disable(stream); 900 dev_priv->perf.ops.oa_enable(stream); 901 902 /* 903 * Note: .oa_enable() is expected to re-init the oabuffer and 904 * reset GEN8_OASTATUS for us 905 */ 906 oastatus = I915_READ(GEN8_OASTATUS); 907 } 908 909 if (oastatus & GEN8_OASTATUS_REPORT_LOST) { 910 ret = append_oa_status(stream, buf, count, offset, 911 DRM_I915_PERF_RECORD_OA_REPORT_LOST); 912 if (ret) 913 return ret; 914 I915_WRITE(GEN8_OASTATUS, 915 oastatus & ~GEN8_OASTATUS_REPORT_LOST); 916 } 917 918 return gen8_append_oa_reports(stream, buf, count, offset); 919 } 920 921 /** 922 * Copies all buffered OA reports into userspace read() buffer. 923 * @stream: An i915-perf stream opened for OA metrics 924 * @buf: destination buffer given by userspace 925 * @count: the number of bytes userspace wants to read 926 * @offset: (inout): the current position for writing into @buf 927 * 928 * Notably any error condition resulting in a short read (-%ENOSPC or 929 * -%EFAULT) will be returned even though one or more records may 930 * have been successfully copied. In this case it's up to the caller 931 * to decide if the error should be squashed before returning to 932 * userspace. 933 * 934 * Note: reports are consumed from the head, and appended to the 935 * tail, so the tail chases the head?... If you think that's mad 936 * and back-to-front you're not alone, but this follows the 937 * Gen PRM naming convention. 938 * 939 * Returns: 0 on success, negative error code on failure. 940 */ 941 static int gen7_append_oa_reports(struct i915_perf_stream *stream, 942 char __user *buf, 943 size_t count, 944 size_t *offset) 945 { 946 struct drm_i915_private *dev_priv = stream->dev_priv; 947 int report_size = stream->oa_buffer.format_size; 948 u8 *oa_buf_base = stream->oa_buffer.vaddr; 949 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); 950 u32 mask = (OA_BUFFER_SIZE - 1); 951 size_t start_offset = *offset; 952 unsigned long flags; 953 unsigned int aged_tail_idx; 954 u32 head, tail; 955 u32 taken; 956 int ret = 0; 957 958 if (WARN_ON(!stream->enabled)) 959 return -EIO; 960 961 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 962 963 head = stream->oa_buffer.head; 964 aged_tail_idx = stream->oa_buffer.aged_tail_idx; 965 tail = stream->oa_buffer.tails[aged_tail_idx].offset; 966 967 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 968 969 /* An invalid tail pointer here means we're still waiting for the poll 970 * hrtimer callback to give us a pointer 971 */ 972 if (tail == INVALID_TAIL_PTR) 973 return -EAGAIN; 974 975 /* NB: oa_buffer.head/tail include the gtt_offset which we don't want 976 * while indexing relative to oa_buf_base. 977 */ 978 head -= gtt_offset; 979 tail -= gtt_offset; 980 981 /* An out of bounds or misaligned head or tail pointer implies a driver 982 * bug since we validate + align the tail pointers we read from the 983 * hardware and we are in full control of the head pointer which should 984 * only be incremented by multiples of the report size (notably also 985 * all a power of two). 986 */ 987 if (WARN_ONCE(head > OA_BUFFER_SIZE || head % report_size || 988 tail > OA_BUFFER_SIZE || tail % report_size, 989 "Inconsistent OA buffer pointers: head = %u, tail = %u\n", 990 head, tail)) 991 return -EIO; 992 993 994 for (/* none */; 995 (taken = OA_TAKEN(tail, head)); 996 head = (head + report_size) & mask) { 997 u8 *report = oa_buf_base + head; 998 u32 *report32 = (void *)report; 999 1000 /* All the report sizes factor neatly into the buffer 1001 * size so we never expect to see a report split 1002 * between the beginning and end of the buffer. 1003 * 1004 * Given the initial alignment check a misalignment 1005 * here would imply a driver bug that would result 1006 * in an overrun. 1007 */ 1008 if (WARN_ON((OA_BUFFER_SIZE - head) < report_size)) { 1009 DRM_ERROR("Spurious OA head ptr: non-integral report offset\n"); 1010 break; 1011 } 1012 1013 /* The report-ID field for periodic samples includes 1014 * some undocumented flags related to what triggered 1015 * the report and is never expected to be zero so we 1016 * can check that the report isn't invalid before 1017 * copying it to userspace... 1018 */ 1019 if (report32[0] == 0) { 1020 if (__ratelimit(&dev_priv->perf.spurious_report_rs)) 1021 DRM_NOTE("Skipping spurious, invalid OA report\n"); 1022 continue; 1023 } 1024 1025 ret = append_oa_sample(stream, buf, count, offset, report); 1026 if (ret) 1027 break; 1028 1029 /* The above report-id field sanity check is based on 1030 * the assumption that the OA buffer is initially 1031 * zeroed and we reset the field after copying so the 1032 * check is still meaningful once old reports start 1033 * being overwritten. 1034 */ 1035 report32[0] = 0; 1036 } 1037 1038 if (start_offset != *offset) { 1039 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 1040 1041 /* We removed the gtt_offset for the copy loop above, indexing 1042 * relative to oa_buf_base so put back here... 1043 */ 1044 head += gtt_offset; 1045 1046 I915_WRITE(GEN7_OASTATUS2, 1047 ((head & GEN7_OASTATUS2_HEAD_MASK) | 1048 GEN7_OASTATUS2_MEM_SELECT_GGTT)); 1049 stream->oa_buffer.head = head; 1050 1051 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 1052 } 1053 1054 return ret; 1055 } 1056 1057 /** 1058 * gen7_oa_read - copy status records then buffered OA reports 1059 * @stream: An i915-perf stream opened for OA metrics 1060 * @buf: destination buffer given by userspace 1061 * @count: the number of bytes userspace wants to read 1062 * @offset: (inout): the current position for writing into @buf 1063 * 1064 * Checks Gen 7 specific OA unit status registers and if necessary appends 1065 * corresponding status records for userspace (such as for a buffer full 1066 * condition) and then initiate appending any buffered OA reports. 1067 * 1068 * Updates @offset according to the number of bytes successfully copied into 1069 * the userspace buffer. 1070 * 1071 * Returns: zero on success or a negative error code 1072 */ 1073 static int gen7_oa_read(struct i915_perf_stream *stream, 1074 char __user *buf, 1075 size_t count, 1076 size_t *offset) 1077 { 1078 struct drm_i915_private *dev_priv = stream->dev_priv; 1079 u32 oastatus1; 1080 int ret; 1081 1082 if (WARN_ON(!stream->oa_buffer.vaddr)) 1083 return -EIO; 1084 1085 oastatus1 = I915_READ(GEN7_OASTATUS1); 1086 1087 /* XXX: On Haswell we don't have a safe way to clear oastatus1 1088 * bits while the OA unit is enabled (while the tail pointer 1089 * may be updated asynchronously) so we ignore status bits 1090 * that have already been reported to userspace. 1091 */ 1092 oastatus1 &= ~dev_priv->perf.gen7_latched_oastatus1; 1093 1094 /* We treat OABUFFER_OVERFLOW as a significant error: 1095 * 1096 * - The status can be interpreted to mean that the buffer is 1097 * currently full (with a higher precedence than OA_TAKEN() 1098 * which will start to report a near-empty buffer after an 1099 * overflow) but it's awkward that we can't clear the status 1100 * on Haswell, so without a reset we won't be able to catch 1101 * the state again. 1102 * 1103 * - Since it also implies the HW has started overwriting old 1104 * reports it may also affect our sanity checks for invalid 1105 * reports when copying to userspace that assume new reports 1106 * are being written to cleared memory. 1107 * 1108 * - In the future we may want to introduce a flight recorder 1109 * mode where the driver will automatically maintain a safe 1110 * guard band between head/tail, avoiding this overflow 1111 * condition, but we avoid the added driver complexity for 1112 * now. 1113 */ 1114 if (unlikely(oastatus1 & GEN7_OASTATUS1_OABUFFER_OVERFLOW)) { 1115 ret = append_oa_status(stream, buf, count, offset, 1116 DRM_I915_PERF_RECORD_OA_BUFFER_LOST); 1117 if (ret) 1118 return ret; 1119 1120 DRM_DEBUG("OA buffer overflow (exponent = %d): force restart\n", 1121 stream->period_exponent); 1122 1123 dev_priv->perf.ops.oa_disable(stream); 1124 dev_priv->perf.ops.oa_enable(stream); 1125 1126 oastatus1 = I915_READ(GEN7_OASTATUS1); 1127 } 1128 1129 if (unlikely(oastatus1 & GEN7_OASTATUS1_REPORT_LOST)) { 1130 ret = append_oa_status(stream, buf, count, offset, 1131 DRM_I915_PERF_RECORD_OA_REPORT_LOST); 1132 if (ret) 1133 return ret; 1134 dev_priv->perf.gen7_latched_oastatus1 |= 1135 GEN7_OASTATUS1_REPORT_LOST; 1136 } 1137 1138 return gen7_append_oa_reports(stream, buf, count, offset); 1139 } 1140 1141 /** 1142 * i915_oa_wait_unlocked - handles blocking IO until OA data available 1143 * @stream: An i915-perf stream opened for OA metrics 1144 * 1145 * Called when userspace tries to read() from a blocking stream FD opened 1146 * for OA metrics. It waits until the hrtimer callback finds a non-empty 1147 * OA buffer and wakes us. 1148 * 1149 * Note: it's acceptable to have this return with some false positives 1150 * since any subsequent read handling will return -EAGAIN if there isn't 1151 * really data ready for userspace yet. 1152 * 1153 * Returns: zero on success or a negative error code 1154 */ 1155 static int i915_oa_wait_unlocked(struct i915_perf_stream *stream) 1156 { 1157 /* We would wait indefinitely if periodic sampling is not enabled */ 1158 if (!stream->periodic) 1159 return -EIO; 1160 1161 return wait_event_interruptible(stream->poll_wq, 1162 oa_buffer_check_unlocked(stream)); 1163 } 1164 1165 /** 1166 * i915_oa_poll_wait - call poll_wait() for an OA stream poll() 1167 * @stream: An i915-perf stream opened for OA metrics 1168 * @file: An i915 perf stream file 1169 * @wait: poll() state table 1170 * 1171 * For handling userspace polling on an i915 perf stream opened for OA metrics, 1172 * this starts a poll_wait with the wait queue that our hrtimer callback wakes 1173 * when it sees data ready to read in the circular OA buffer. 1174 */ 1175 static void i915_oa_poll_wait(struct i915_perf_stream *stream, 1176 struct file *file, 1177 poll_table *wait) 1178 { 1179 poll_wait(file, &stream->poll_wq, wait); 1180 } 1181 1182 /** 1183 * i915_oa_read - just calls through to &i915_oa_ops->read 1184 * @stream: An i915-perf stream opened for OA metrics 1185 * @buf: destination buffer given by userspace 1186 * @count: the number of bytes userspace wants to read 1187 * @offset: (inout): the current position for writing into @buf 1188 * 1189 * Updates @offset according to the number of bytes successfully copied into 1190 * the userspace buffer. 1191 * 1192 * Returns: zero on success or a negative error code 1193 */ 1194 static int i915_oa_read(struct i915_perf_stream *stream, 1195 char __user *buf, 1196 size_t count, 1197 size_t *offset) 1198 { 1199 struct drm_i915_private *dev_priv = stream->dev_priv; 1200 1201 return dev_priv->perf.ops.read(stream, buf, count, offset); 1202 } 1203 1204 static struct intel_context *oa_pin_context(struct i915_perf_stream *stream) 1205 { 1206 struct i915_gem_engines_iter it; 1207 struct drm_i915_private *i915 = stream->dev_priv; 1208 struct i915_gem_context *ctx = stream->ctx; 1209 struct intel_context *ce; 1210 int err; 1211 1212 err = i915_mutex_lock_interruptible(&i915->drm); 1213 if (err) 1214 return ERR_PTR(err); 1215 1216 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { 1217 if (ce->engine->class != RENDER_CLASS) 1218 continue; 1219 1220 /* 1221 * As the ID is the gtt offset of the context's vma we 1222 * pin the vma to ensure the ID remains fixed. 1223 */ 1224 err = intel_context_pin(ce); 1225 if (err == 0) { 1226 stream->pinned_ctx = ce; 1227 break; 1228 } 1229 } 1230 i915_gem_context_unlock_engines(ctx); 1231 1232 mutex_unlock(&i915->drm.struct_mutex); 1233 if (err) 1234 return ERR_PTR(err); 1235 1236 return stream->pinned_ctx; 1237 } 1238 1239 /** 1240 * oa_get_render_ctx_id - determine and hold ctx hw id 1241 * @stream: An i915-perf stream opened for OA metrics 1242 * 1243 * Determine the render context hw id, and ensure it remains fixed for the 1244 * lifetime of the stream. This ensures that we don't have to worry about 1245 * updating the context ID in OACONTROL on the fly. 1246 * 1247 * Returns: zero on success or a negative error code 1248 */ 1249 static int oa_get_render_ctx_id(struct i915_perf_stream *stream) 1250 { 1251 struct drm_i915_private *i915 = stream->dev_priv; 1252 struct intel_context *ce; 1253 1254 ce = oa_pin_context(stream); 1255 if (IS_ERR(ce)) 1256 return PTR_ERR(ce); 1257 1258 switch (INTEL_GEN(i915)) { 1259 case 7: { 1260 /* 1261 * On Haswell we don't do any post processing of the reports 1262 * and don't need to use the mask. 1263 */ 1264 stream->specific_ctx_id = i915_ggtt_offset(ce->state); 1265 stream->specific_ctx_id_mask = 0; 1266 break; 1267 } 1268 1269 case 8: 1270 case 9: 1271 case 10: 1272 if (USES_GUC_SUBMISSION(i915)) { 1273 /* 1274 * When using GuC, the context descriptor we write in 1275 * i915 is read by GuC and rewritten before it's 1276 * actually written into the hardware. The LRCA is 1277 * what is put into the context id field of the 1278 * context descriptor by GuC. Because it's aligned to 1279 * a page, the lower 12bits are always at 0 and 1280 * dropped by GuC. They won't be part of the context 1281 * ID in the OA reports, so squash those lower bits. 1282 */ 1283 stream->specific_ctx_id = 1284 lower_32_bits(ce->lrc_desc) >> 12; 1285 1286 /* 1287 * GuC uses the top bit to signal proxy submission, so 1288 * ignore that bit. 1289 */ 1290 stream->specific_ctx_id_mask = 1291 (1U << (GEN8_CTX_ID_WIDTH - 1)) - 1; 1292 } else { 1293 stream->specific_ctx_id_mask = 1294 (1U << GEN8_CTX_ID_WIDTH) - 1; 1295 stream->specific_ctx_id = 1296 upper_32_bits(ce->lrc_desc); 1297 stream->specific_ctx_id &= 1298 stream->specific_ctx_id_mask; 1299 } 1300 break; 1301 1302 case 11: { 1303 stream->specific_ctx_id_mask = 1304 ((1U << GEN11_SW_CTX_ID_WIDTH) - 1) << (GEN11_SW_CTX_ID_SHIFT - 32) | 1305 ((1U << GEN11_ENGINE_INSTANCE_WIDTH) - 1) << (GEN11_ENGINE_INSTANCE_SHIFT - 32) | 1306 ((1 << GEN11_ENGINE_CLASS_WIDTH) - 1) << (GEN11_ENGINE_CLASS_SHIFT - 32); 1307 stream->specific_ctx_id = upper_32_bits(ce->lrc_desc); 1308 stream->specific_ctx_id &= 1309 stream->specific_ctx_id_mask; 1310 break; 1311 } 1312 1313 default: 1314 MISSING_CASE(INTEL_GEN(i915)); 1315 } 1316 1317 DRM_DEBUG_DRIVER("filtering on ctx_id=0x%x ctx_id_mask=0x%x\n", 1318 stream->specific_ctx_id, 1319 stream->specific_ctx_id_mask); 1320 1321 return 0; 1322 } 1323 1324 /** 1325 * oa_put_render_ctx_id - counterpart to oa_get_render_ctx_id releases hold 1326 * @stream: An i915-perf stream opened for OA metrics 1327 * 1328 * In case anything needed doing to ensure the context HW ID would remain valid 1329 * for the lifetime of the stream, then that can be undone here. 1330 */ 1331 static void oa_put_render_ctx_id(struct i915_perf_stream *stream) 1332 { 1333 struct drm_i915_private *dev_priv = stream->dev_priv; 1334 struct intel_context *ce; 1335 1336 stream->specific_ctx_id = INVALID_CTX_ID; 1337 stream->specific_ctx_id_mask = 0; 1338 1339 ce = fetch_and_zero(&stream->pinned_ctx); 1340 if (ce) { 1341 mutex_lock(&dev_priv->drm.struct_mutex); 1342 intel_context_unpin(ce); 1343 mutex_unlock(&dev_priv->drm.struct_mutex); 1344 } 1345 } 1346 1347 static void 1348 free_oa_buffer(struct i915_perf_stream *stream) 1349 { 1350 struct drm_i915_private *i915 = stream->dev_priv; 1351 1352 mutex_lock(&i915->drm.struct_mutex); 1353 1354 i915_vma_unpin_and_release(&stream->oa_buffer.vma, 1355 I915_VMA_RELEASE_MAP); 1356 1357 mutex_unlock(&i915->drm.struct_mutex); 1358 1359 stream->oa_buffer.vaddr = NULL; 1360 } 1361 1362 static void i915_oa_stream_destroy(struct i915_perf_stream *stream) 1363 { 1364 struct drm_i915_private *dev_priv = stream->dev_priv; 1365 1366 BUG_ON(stream != dev_priv->perf.exclusive_stream); 1367 1368 /* 1369 * Unset exclusive_stream first, it will be checked while disabling 1370 * the metric set on gen8+. 1371 */ 1372 mutex_lock(&dev_priv->drm.struct_mutex); 1373 dev_priv->perf.exclusive_stream = NULL; 1374 dev_priv->perf.ops.disable_metric_set(stream); 1375 mutex_unlock(&dev_priv->drm.struct_mutex); 1376 1377 free_oa_buffer(stream); 1378 1379 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); 1380 intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref); 1381 1382 if (stream->ctx) 1383 oa_put_render_ctx_id(stream); 1384 1385 put_oa_config(dev_priv, stream->oa_config); 1386 1387 if (dev_priv->perf.spurious_report_rs.missed) { 1388 DRM_NOTE("%d spurious OA report notices suppressed due to ratelimiting\n", 1389 dev_priv->perf.spurious_report_rs.missed); 1390 } 1391 } 1392 1393 static void gen7_init_oa_buffer(struct i915_perf_stream *stream) 1394 { 1395 struct drm_i915_private *dev_priv = stream->dev_priv; 1396 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); 1397 unsigned long flags; 1398 1399 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 1400 1401 /* Pre-DevBDW: OABUFFER must be set with counters off, 1402 * before OASTATUS1, but after OASTATUS2 1403 */ 1404 I915_WRITE(GEN7_OASTATUS2, 1405 gtt_offset | GEN7_OASTATUS2_MEM_SELECT_GGTT); /* head */ 1406 stream->oa_buffer.head = gtt_offset; 1407 1408 I915_WRITE(GEN7_OABUFFER, gtt_offset); 1409 1410 I915_WRITE(GEN7_OASTATUS1, gtt_offset | OABUFFER_SIZE_16M); /* tail */ 1411 1412 /* Mark that we need updated tail pointers to read from... */ 1413 stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR; 1414 stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR; 1415 1416 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 1417 1418 /* On Haswell we have to track which OASTATUS1 flags we've 1419 * already seen since they can't be cleared while periodic 1420 * sampling is enabled. 1421 */ 1422 dev_priv->perf.gen7_latched_oastatus1 = 0; 1423 1424 /* NB: although the OA buffer will initially be allocated 1425 * zeroed via shmfs (and so this memset is redundant when 1426 * first allocating), we may re-init the OA buffer, either 1427 * when re-enabling a stream or in error/reset paths. 1428 * 1429 * The reason we clear the buffer for each re-init is for the 1430 * sanity check in gen7_append_oa_reports() that looks at the 1431 * report-id field to make sure it's non-zero which relies on 1432 * the assumption that new reports are being written to zeroed 1433 * memory... 1434 */ 1435 memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE); 1436 1437 /* Maybe make ->pollin per-stream state if we support multiple 1438 * concurrent streams in the future. 1439 */ 1440 stream->pollin = false; 1441 } 1442 1443 static void gen8_init_oa_buffer(struct i915_perf_stream *stream) 1444 { 1445 struct drm_i915_private *dev_priv = stream->dev_priv; 1446 u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma); 1447 unsigned long flags; 1448 1449 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 1450 1451 I915_WRITE(GEN8_OASTATUS, 0); 1452 I915_WRITE(GEN8_OAHEADPTR, gtt_offset); 1453 stream->oa_buffer.head = gtt_offset; 1454 1455 I915_WRITE(GEN8_OABUFFER_UDW, 0); 1456 1457 /* 1458 * PRM says: 1459 * 1460 * "This MMIO must be set before the OATAILPTR 1461 * register and after the OAHEADPTR register. This is 1462 * to enable proper functionality of the overflow 1463 * bit." 1464 */ 1465 I915_WRITE(GEN8_OABUFFER, gtt_offset | 1466 OABUFFER_SIZE_16M | GEN8_OABUFFER_MEM_SELECT_GGTT); 1467 I915_WRITE(GEN8_OATAILPTR, gtt_offset & GEN8_OATAILPTR_MASK); 1468 1469 /* Mark that we need updated tail pointers to read from... */ 1470 stream->oa_buffer.tails[0].offset = INVALID_TAIL_PTR; 1471 stream->oa_buffer.tails[1].offset = INVALID_TAIL_PTR; 1472 1473 /* 1474 * Reset state used to recognise context switches, affecting which 1475 * reports we will forward to userspace while filtering for a single 1476 * context. 1477 */ 1478 stream->oa_buffer.last_ctx_id = INVALID_CTX_ID; 1479 1480 spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 1481 1482 /* 1483 * NB: although the OA buffer will initially be allocated 1484 * zeroed via shmfs (and so this memset is redundant when 1485 * first allocating), we may re-init the OA buffer, either 1486 * when re-enabling a stream or in error/reset paths. 1487 * 1488 * The reason we clear the buffer for each re-init is for the 1489 * sanity check in gen8_append_oa_reports() that looks at the 1490 * reason field to make sure it's non-zero which relies on 1491 * the assumption that new reports are being written to zeroed 1492 * memory... 1493 */ 1494 memset(stream->oa_buffer.vaddr, 0, OA_BUFFER_SIZE); 1495 1496 /* 1497 * Maybe make ->pollin per-stream state if we support multiple 1498 * concurrent streams in the future. 1499 */ 1500 stream->pollin = false; 1501 } 1502 1503 static int alloc_oa_buffer(struct i915_perf_stream *stream) 1504 { 1505 struct drm_i915_gem_object *bo; 1506 struct drm_i915_private *dev_priv = stream->dev_priv; 1507 struct i915_vma *vma; 1508 int ret; 1509 1510 if (WARN_ON(stream->oa_buffer.vma)) 1511 return -ENODEV; 1512 1513 ret = i915_mutex_lock_interruptible(&dev_priv->drm); 1514 if (ret) 1515 return ret; 1516 1517 BUILD_BUG_ON_NOT_POWER_OF_2(OA_BUFFER_SIZE); 1518 BUILD_BUG_ON(OA_BUFFER_SIZE < SZ_128K || OA_BUFFER_SIZE > SZ_16M); 1519 1520 bo = i915_gem_object_create_shmem(dev_priv, OA_BUFFER_SIZE); 1521 if (IS_ERR(bo)) { 1522 DRM_ERROR("Failed to allocate OA buffer\n"); 1523 ret = PTR_ERR(bo); 1524 goto unlock; 1525 } 1526 1527 i915_gem_object_set_cache_coherency(bo, I915_CACHE_LLC); 1528 1529 /* PreHSW required 512K alignment, HSW requires 16M */ 1530 vma = i915_gem_object_ggtt_pin(bo, NULL, 0, SZ_16M, 0); 1531 if (IS_ERR(vma)) { 1532 ret = PTR_ERR(vma); 1533 goto err_unref; 1534 } 1535 stream->oa_buffer.vma = vma; 1536 1537 stream->oa_buffer.vaddr = 1538 i915_gem_object_pin_map(bo, I915_MAP_WB); 1539 if (IS_ERR(stream->oa_buffer.vaddr)) { 1540 ret = PTR_ERR(stream->oa_buffer.vaddr); 1541 goto err_unpin; 1542 } 1543 1544 DRM_DEBUG_DRIVER("OA Buffer initialized, gtt offset = 0x%x, vaddr = %p\n", 1545 i915_ggtt_offset(stream->oa_buffer.vma), 1546 stream->oa_buffer.vaddr); 1547 1548 goto unlock; 1549 1550 err_unpin: 1551 __i915_vma_unpin(vma); 1552 1553 err_unref: 1554 i915_gem_object_put(bo); 1555 1556 stream->oa_buffer.vaddr = NULL; 1557 stream->oa_buffer.vma = NULL; 1558 1559 unlock: 1560 mutex_unlock(&dev_priv->drm.struct_mutex); 1561 return ret; 1562 } 1563 1564 static void config_oa_regs(struct drm_i915_private *dev_priv, 1565 const struct i915_oa_reg *regs, 1566 u32 n_regs) 1567 { 1568 u32 i; 1569 1570 for (i = 0; i < n_regs; i++) { 1571 const struct i915_oa_reg *reg = regs + i; 1572 1573 I915_WRITE(reg->addr, reg->value); 1574 } 1575 } 1576 1577 static void delay_after_mux(void) 1578 { 1579 /* 1580 * It apparently takes a fairly long time for a new MUX 1581 * configuration to be be applied after these register writes. 1582 * This delay duration was derived empirically based on the 1583 * render_basic config but hopefully it covers the maximum 1584 * configuration latency. 1585 * 1586 * As a fallback, the checks in _append_oa_reports() to skip 1587 * invalid OA reports do also seem to work to discard reports 1588 * generated before this config has completed - albeit not 1589 * silently. 1590 * 1591 * Unfortunately this is essentially a magic number, since we 1592 * don't currently know of a reliable mechanism for predicting 1593 * how long the MUX config will take to apply and besides 1594 * seeing invalid reports we don't know of a reliable way to 1595 * explicitly check that the MUX config has landed. 1596 * 1597 * It's even possible we've miss characterized the underlying 1598 * problem - it just seems like the simplest explanation why 1599 * a delay at this location would mitigate any invalid reports. 1600 */ 1601 usleep_range(15000, 20000); 1602 } 1603 1604 static int hsw_enable_metric_set(struct i915_perf_stream *stream) 1605 { 1606 struct drm_i915_private *dev_priv = stream->dev_priv; 1607 const struct i915_oa_config *oa_config = stream->oa_config; 1608 1609 /* 1610 * PRM: 1611 * 1612 * OA unit is using “crclk” for its functionality. When trunk 1613 * level clock gating takes place, OA clock would be gated, 1614 * unable to count the events from non-render clock domain. 1615 * Render clock gating must be disabled when OA is enabled to 1616 * count the events from non-render domain. Unit level clock 1617 * gating for RCS should also be disabled. 1618 */ 1619 I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) & 1620 ~GEN7_DOP_CLOCK_GATE_ENABLE)); 1621 I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) | 1622 GEN6_CSUNIT_CLOCK_GATE_DISABLE)); 1623 1624 config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len); 1625 delay_after_mux(); 1626 1627 config_oa_regs(dev_priv, oa_config->b_counter_regs, 1628 oa_config->b_counter_regs_len); 1629 1630 return 0; 1631 } 1632 1633 static void hsw_disable_metric_set(struct i915_perf_stream *stream) 1634 { 1635 struct drm_i915_private *dev_priv = stream->dev_priv; 1636 1637 I915_WRITE(GEN6_UCGCTL1, (I915_READ(GEN6_UCGCTL1) & 1638 ~GEN6_CSUNIT_CLOCK_GATE_DISABLE)); 1639 I915_WRITE(GEN7_MISCCPCTL, (I915_READ(GEN7_MISCCPCTL) | 1640 GEN7_DOP_CLOCK_GATE_ENABLE)); 1641 1642 I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) & 1643 ~GT_NOA_ENABLE)); 1644 } 1645 1646 static u32 oa_config_flex_reg(const struct i915_oa_config *oa_config, 1647 i915_reg_t reg) 1648 { 1649 u32 mmio = i915_mmio_reg_offset(reg); 1650 int i; 1651 1652 /* 1653 * This arbitrary default will select the 'EU FPU0 Pipeline 1654 * Active' event. In the future it's anticipated that there 1655 * will be an explicit 'No Event' we can select, but not yet... 1656 */ 1657 if (!oa_config) 1658 return 0; 1659 1660 for (i = 0; i < oa_config->flex_regs_len; i++) { 1661 if (i915_mmio_reg_offset(oa_config->flex_regs[i].addr) == mmio) 1662 return oa_config->flex_regs[i].value; 1663 } 1664 1665 return 0; 1666 } 1667 /* 1668 * NB: It must always remain pointer safe to run this even if the OA unit 1669 * has been disabled. 1670 * 1671 * It's fine to put out-of-date values into these per-context registers 1672 * in the case that the OA unit has been disabled. 1673 */ 1674 static void 1675 gen8_update_reg_state_unlocked(struct i915_perf_stream *stream, 1676 struct intel_context *ce, 1677 u32 *reg_state, 1678 const struct i915_oa_config *oa_config) 1679 { 1680 struct drm_i915_private *i915 = ce->engine->i915; 1681 u32 ctx_oactxctrl = i915->perf.ctx_oactxctrl_offset; 1682 u32 ctx_flexeu0 = i915->perf.ctx_flexeu0_offset; 1683 /* The MMIO offsets for Flex EU registers aren't contiguous */ 1684 i915_reg_t flex_regs[] = { 1685 EU_PERF_CNTL0, 1686 EU_PERF_CNTL1, 1687 EU_PERF_CNTL2, 1688 EU_PERF_CNTL3, 1689 EU_PERF_CNTL4, 1690 EU_PERF_CNTL5, 1691 EU_PERF_CNTL6, 1692 }; 1693 int i; 1694 1695 CTX_REG(reg_state, ctx_oactxctrl, GEN8_OACTXCONTROL, 1696 (stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | 1697 (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | 1698 GEN8_OA_COUNTER_RESUME); 1699 1700 for (i = 0; i < ARRAY_SIZE(flex_regs); i++) { 1701 CTX_REG(reg_state, ctx_flexeu0 + i * 2, flex_regs[i], 1702 oa_config_flex_reg(oa_config, flex_regs[i])); 1703 } 1704 1705 CTX_REG(reg_state, 1706 CTX_R_PWR_CLK_STATE, GEN8_R_PWR_CLK_STATE, 1707 intel_sseu_make_rpcs(i915, &ce->sseu)); 1708 } 1709 1710 struct flex { 1711 i915_reg_t reg; 1712 u32 offset; 1713 u32 value; 1714 }; 1715 1716 static int 1717 gen8_store_flex(struct i915_request *rq, 1718 struct intel_context *ce, 1719 const struct flex *flex, unsigned int count) 1720 { 1721 u32 offset; 1722 u32 *cs; 1723 1724 cs = intel_ring_begin(rq, 4 * count); 1725 if (IS_ERR(cs)) 1726 return PTR_ERR(cs); 1727 1728 offset = i915_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE; 1729 do { 1730 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1731 *cs++ = offset + (flex->offset + 1) * sizeof(u32); 1732 *cs++ = 0; 1733 *cs++ = flex->value; 1734 } while (flex++, --count); 1735 1736 intel_ring_advance(rq, cs); 1737 1738 return 0; 1739 } 1740 1741 static int 1742 gen8_load_flex(struct i915_request *rq, 1743 struct intel_context *ce, 1744 const struct flex *flex, unsigned int count) 1745 { 1746 u32 *cs; 1747 1748 GEM_BUG_ON(!count || count > 63); 1749 1750 cs = intel_ring_begin(rq, 2 * count + 2); 1751 if (IS_ERR(cs)) 1752 return PTR_ERR(cs); 1753 1754 *cs++ = MI_LOAD_REGISTER_IMM(count); 1755 do { 1756 *cs++ = i915_mmio_reg_offset(flex->reg); 1757 *cs++ = flex->value; 1758 } while (flex++, --count); 1759 *cs++ = MI_NOOP; 1760 1761 intel_ring_advance(rq, cs); 1762 1763 return 0; 1764 } 1765 1766 static int gen8_modify_context(struct intel_context *ce, 1767 const struct flex *flex, unsigned int count) 1768 { 1769 struct i915_request *rq; 1770 int err; 1771 1772 lockdep_assert_held(&ce->pin_mutex); 1773 1774 rq = i915_request_create(ce->engine->kernel_context); 1775 if (IS_ERR(rq)) 1776 return PTR_ERR(rq); 1777 1778 /* Serialise with the remote context */ 1779 err = intel_context_prepare_remote_request(ce, rq); 1780 if (err == 0) 1781 err = gen8_store_flex(rq, ce, flex, count); 1782 1783 i915_request_add(rq); 1784 return err; 1785 } 1786 1787 static int gen8_modify_self(struct intel_context *ce, 1788 const struct flex *flex, unsigned int count) 1789 { 1790 struct i915_request *rq; 1791 int err; 1792 1793 rq = i915_request_create(ce); 1794 if (IS_ERR(rq)) 1795 return PTR_ERR(rq); 1796 1797 err = gen8_load_flex(rq, ce, flex, count); 1798 1799 i915_request_add(rq); 1800 return err; 1801 } 1802 1803 static int gen8_configure_context(struct i915_gem_context *ctx, 1804 struct flex *flex, unsigned int count) 1805 { 1806 struct i915_gem_engines_iter it; 1807 struct intel_context *ce; 1808 int err = 0; 1809 1810 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { 1811 GEM_BUG_ON(ce == ce->engine->kernel_context); 1812 1813 if (ce->engine->class != RENDER_CLASS) 1814 continue; 1815 1816 err = intel_context_lock_pinned(ce); 1817 if (err) 1818 break; 1819 1820 flex->value = intel_sseu_make_rpcs(ctx->i915, &ce->sseu); 1821 1822 /* Otherwise OA settings will be set upon first use */ 1823 if (intel_context_is_pinned(ce)) 1824 err = gen8_modify_context(ce, flex, count); 1825 1826 intel_context_unlock_pinned(ce); 1827 if (err) 1828 break; 1829 } 1830 i915_gem_context_unlock_engines(ctx); 1831 1832 return err; 1833 } 1834 1835 /* 1836 * Manages updating the per-context aspects of the OA stream 1837 * configuration across all contexts. 1838 * 1839 * The awkward consideration here is that OACTXCONTROL controls the 1840 * exponent for periodic sampling which is primarily used for system 1841 * wide profiling where we'd like a consistent sampling period even in 1842 * the face of context switches. 1843 * 1844 * Our approach of updating the register state context (as opposed to 1845 * say using a workaround batch buffer) ensures that the hardware 1846 * won't automatically reload an out-of-date timer exponent even 1847 * transiently before a WA BB could be parsed. 1848 * 1849 * This function needs to: 1850 * - Ensure the currently running context's per-context OA state is 1851 * updated 1852 * - Ensure that all existing contexts will have the correct per-context 1853 * OA state if they are scheduled for use. 1854 * - Ensure any new contexts will be initialized with the correct 1855 * per-context OA state. 1856 * 1857 * Note: it's only the RCS/Render context that has any OA state. 1858 */ 1859 static int gen8_configure_all_contexts(struct i915_perf_stream *stream, 1860 const struct i915_oa_config *oa_config) 1861 { 1862 struct drm_i915_private *i915 = stream->dev_priv; 1863 /* The MMIO offsets for Flex EU registers aren't contiguous */ 1864 const u32 ctx_flexeu0 = i915->perf.ctx_flexeu0_offset; 1865 #define ctx_flexeuN(N) (ctx_flexeu0 + 2 * (N)) 1866 struct flex regs[] = { 1867 { 1868 GEN8_R_PWR_CLK_STATE, 1869 CTX_R_PWR_CLK_STATE, 1870 }, 1871 { 1872 GEN8_OACTXCONTROL, 1873 i915->perf.ctx_oactxctrl_offset, 1874 ((stream->period_exponent << GEN8_OA_TIMER_PERIOD_SHIFT) | 1875 (stream->periodic ? GEN8_OA_TIMER_ENABLE : 0) | 1876 GEN8_OA_COUNTER_RESUME) 1877 }, 1878 { EU_PERF_CNTL0, ctx_flexeuN(0) }, 1879 { EU_PERF_CNTL1, ctx_flexeuN(1) }, 1880 { EU_PERF_CNTL2, ctx_flexeuN(2) }, 1881 { EU_PERF_CNTL3, ctx_flexeuN(3) }, 1882 { EU_PERF_CNTL4, ctx_flexeuN(4) }, 1883 { EU_PERF_CNTL5, ctx_flexeuN(5) }, 1884 { EU_PERF_CNTL6, ctx_flexeuN(6) }, 1885 }; 1886 #undef ctx_flexeuN 1887 struct intel_engine_cs *engine; 1888 struct i915_gem_context *ctx; 1889 int i; 1890 1891 for (i = 2; i < ARRAY_SIZE(regs); i++) 1892 regs[i].value = oa_config_flex_reg(oa_config, regs[i].reg); 1893 1894 lockdep_assert_held(&i915->drm.struct_mutex); 1895 1896 /* 1897 * The OA register config is setup through the context image. This image 1898 * might be written to by the GPU on context switch (in particular on 1899 * lite-restore). This means we can't safely update a context's image, 1900 * if this context is scheduled/submitted to run on the GPU. 1901 * 1902 * We could emit the OA register config through the batch buffer but 1903 * this might leave small interval of time where the OA unit is 1904 * configured at an invalid sampling period. 1905 * 1906 * Note that since we emit all requests from a single ring, there 1907 * is still an implicit global barrier here that may cause a high 1908 * priority context to wait for an otherwise independent low priority 1909 * context. Contexts idle at the time of reconfiguration are not 1910 * trapped behind the barrier. 1911 */ 1912 list_for_each_entry(ctx, &i915->contexts.list, link) { 1913 int err; 1914 1915 if (ctx == i915->kernel_context) 1916 continue; 1917 1918 err = gen8_configure_context(ctx, regs, ARRAY_SIZE(regs)); 1919 if (err) 1920 return err; 1921 } 1922 1923 /* 1924 * After updating all other contexts, we need to modify ourselves. 1925 * If we don't modify the kernel_context, we do not get events while 1926 * idle. 1927 */ 1928 for_each_uabi_engine(engine, i915) { 1929 struct intel_context *ce = engine->kernel_context; 1930 int err; 1931 1932 if (engine->class != RENDER_CLASS) 1933 continue; 1934 1935 regs[0].value = intel_sseu_make_rpcs(i915, &ce->sseu); 1936 1937 err = gen8_modify_self(ce, regs, ARRAY_SIZE(regs)); 1938 if (err) 1939 return err; 1940 } 1941 1942 return 0; 1943 } 1944 1945 static int gen8_enable_metric_set(struct i915_perf_stream *stream) 1946 { 1947 struct drm_i915_private *dev_priv = stream->dev_priv; 1948 const struct i915_oa_config *oa_config = stream->oa_config; 1949 int ret; 1950 1951 /* 1952 * We disable slice/unslice clock ratio change reports on SKL since 1953 * they are too noisy. The HW generates a lot of redundant reports 1954 * where the ratio hasn't really changed causing a lot of redundant 1955 * work to processes and increasing the chances we'll hit buffer 1956 * overruns. 1957 * 1958 * Although we don't currently use the 'disable overrun' OABUFFER 1959 * feature it's worth noting that clock ratio reports have to be 1960 * disabled before considering to use that feature since the HW doesn't 1961 * correctly block these reports. 1962 * 1963 * Currently none of the high-level metrics we have depend on knowing 1964 * this ratio to normalize. 1965 * 1966 * Note: This register is not power context saved and restored, but 1967 * that's OK considering that we disable RC6 while the OA unit is 1968 * enabled. 1969 * 1970 * The _INCLUDE_CLK_RATIO bit allows the slice/unslice frequency to 1971 * be read back from automatically triggered reports, as part of the 1972 * RPT_ID field. 1973 */ 1974 if (IS_GEN_RANGE(dev_priv, 9, 11)) { 1975 I915_WRITE(GEN8_OA_DEBUG, 1976 _MASKED_BIT_ENABLE(GEN9_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | 1977 GEN9_OA_DEBUG_INCLUDE_CLK_RATIO)); 1978 } 1979 1980 /* 1981 * Update all contexts prior writing the mux configurations as we need 1982 * to make sure all slices/subslices are ON before writing to NOA 1983 * registers. 1984 */ 1985 ret = gen8_configure_all_contexts(stream, oa_config); 1986 if (ret) 1987 return ret; 1988 1989 config_oa_regs(dev_priv, oa_config->mux_regs, oa_config->mux_regs_len); 1990 delay_after_mux(); 1991 1992 config_oa_regs(dev_priv, oa_config->b_counter_regs, 1993 oa_config->b_counter_regs_len); 1994 1995 return 0; 1996 } 1997 1998 static void gen8_disable_metric_set(struct i915_perf_stream *stream) 1999 { 2000 struct drm_i915_private *dev_priv = stream->dev_priv; 2001 2002 /* Reset all contexts' slices/subslices configurations. */ 2003 gen8_configure_all_contexts(stream, NULL); 2004 2005 I915_WRITE(GDT_CHICKEN_BITS, (I915_READ(GDT_CHICKEN_BITS) & 2006 ~GT_NOA_ENABLE)); 2007 } 2008 2009 static void gen10_disable_metric_set(struct i915_perf_stream *stream) 2010 { 2011 struct drm_i915_private *dev_priv = stream->dev_priv; 2012 2013 /* Reset all contexts' slices/subslices configurations. */ 2014 gen8_configure_all_contexts(stream, NULL); 2015 2016 /* Make sure we disable noa to save power. */ 2017 I915_WRITE(RPM_CONFIG1, 2018 I915_READ(RPM_CONFIG1) & ~GEN10_GT_NOA_ENABLE); 2019 } 2020 2021 static void gen7_oa_enable(struct i915_perf_stream *stream) 2022 { 2023 struct drm_i915_private *dev_priv = stream->dev_priv; 2024 struct i915_gem_context *ctx = stream->ctx; 2025 u32 ctx_id = stream->specific_ctx_id; 2026 bool periodic = stream->periodic; 2027 u32 period_exponent = stream->period_exponent; 2028 u32 report_format = stream->oa_buffer.format; 2029 2030 /* 2031 * Reset buf pointers so we don't forward reports from before now. 2032 * 2033 * Think carefully if considering trying to avoid this, since it 2034 * also ensures status flags and the buffer itself are cleared 2035 * in error paths, and we have checks for invalid reports based 2036 * on the assumption that certain fields are written to zeroed 2037 * memory which this helps maintains. 2038 */ 2039 gen7_init_oa_buffer(stream); 2040 2041 I915_WRITE(GEN7_OACONTROL, 2042 (ctx_id & GEN7_OACONTROL_CTX_MASK) | 2043 (period_exponent << 2044 GEN7_OACONTROL_TIMER_PERIOD_SHIFT) | 2045 (periodic ? GEN7_OACONTROL_TIMER_ENABLE : 0) | 2046 (report_format << GEN7_OACONTROL_FORMAT_SHIFT) | 2047 (ctx ? GEN7_OACONTROL_PER_CTX_ENABLE : 0) | 2048 GEN7_OACONTROL_ENABLE); 2049 } 2050 2051 static void gen8_oa_enable(struct i915_perf_stream *stream) 2052 { 2053 struct drm_i915_private *dev_priv = stream->dev_priv; 2054 u32 report_format = stream->oa_buffer.format; 2055 2056 /* 2057 * Reset buf pointers so we don't forward reports from before now. 2058 * 2059 * Think carefully if considering trying to avoid this, since it 2060 * also ensures status flags and the buffer itself are cleared 2061 * in error paths, and we have checks for invalid reports based 2062 * on the assumption that certain fields are written to zeroed 2063 * memory which this helps maintains. 2064 */ 2065 gen8_init_oa_buffer(stream); 2066 2067 /* 2068 * Note: we don't rely on the hardware to perform single context 2069 * filtering and instead filter on the cpu based on the context-id 2070 * field of reports 2071 */ 2072 I915_WRITE(GEN8_OACONTROL, (report_format << 2073 GEN8_OA_REPORT_FORMAT_SHIFT) | 2074 GEN8_OA_COUNTER_ENABLE); 2075 } 2076 2077 /** 2078 * i915_oa_stream_enable - handle `I915_PERF_IOCTL_ENABLE` for OA stream 2079 * @stream: An i915 perf stream opened for OA metrics 2080 * 2081 * [Re]enables hardware periodic sampling according to the period configured 2082 * when opening the stream. This also starts a hrtimer that will periodically 2083 * check for data in the circular OA buffer for notifying userspace (e.g. 2084 * during a read() or poll()). 2085 */ 2086 static void i915_oa_stream_enable(struct i915_perf_stream *stream) 2087 { 2088 struct drm_i915_private *dev_priv = stream->dev_priv; 2089 2090 dev_priv->perf.ops.oa_enable(stream); 2091 2092 if (stream->periodic) 2093 hrtimer_start(&stream->poll_check_timer, 2094 ns_to_ktime(POLL_PERIOD), 2095 HRTIMER_MODE_REL_PINNED); 2096 } 2097 2098 static void gen7_oa_disable(struct i915_perf_stream *stream) 2099 { 2100 struct intel_uncore *uncore = &stream->dev_priv->uncore; 2101 2102 intel_uncore_write(uncore, GEN7_OACONTROL, 0); 2103 if (intel_wait_for_register(uncore, 2104 GEN7_OACONTROL, GEN7_OACONTROL_ENABLE, 0, 2105 50)) 2106 DRM_ERROR("wait for OA to be disabled timed out\n"); 2107 } 2108 2109 static void gen8_oa_disable(struct i915_perf_stream *stream) 2110 { 2111 struct intel_uncore *uncore = &stream->dev_priv->uncore; 2112 2113 intel_uncore_write(uncore, GEN8_OACONTROL, 0); 2114 if (intel_wait_for_register(uncore, 2115 GEN8_OACONTROL, GEN8_OA_COUNTER_ENABLE, 0, 2116 50)) 2117 DRM_ERROR("wait for OA to be disabled timed out\n"); 2118 } 2119 2120 /** 2121 * i915_oa_stream_disable - handle `I915_PERF_IOCTL_DISABLE` for OA stream 2122 * @stream: An i915 perf stream opened for OA metrics 2123 * 2124 * Stops the OA unit from periodically writing counter reports into the 2125 * circular OA buffer. This also stops the hrtimer that periodically checks for 2126 * data in the circular OA buffer, for notifying userspace. 2127 */ 2128 static void i915_oa_stream_disable(struct i915_perf_stream *stream) 2129 { 2130 struct drm_i915_private *dev_priv = stream->dev_priv; 2131 2132 dev_priv->perf.ops.oa_disable(stream); 2133 2134 if (stream->periodic) 2135 hrtimer_cancel(&stream->poll_check_timer); 2136 } 2137 2138 static const struct i915_perf_stream_ops i915_oa_stream_ops = { 2139 .destroy = i915_oa_stream_destroy, 2140 .enable = i915_oa_stream_enable, 2141 .disable = i915_oa_stream_disable, 2142 .wait_unlocked = i915_oa_wait_unlocked, 2143 .poll_wait = i915_oa_poll_wait, 2144 .read = i915_oa_read, 2145 }; 2146 2147 /** 2148 * i915_oa_stream_init - validate combined props for OA stream and init 2149 * @stream: An i915 perf stream 2150 * @param: The open parameters passed to `DRM_I915_PERF_OPEN` 2151 * @props: The property state that configures stream (individually validated) 2152 * 2153 * While read_properties_unlocked() validates properties in isolation it 2154 * doesn't ensure that the combination necessarily makes sense. 2155 * 2156 * At this point it has been determined that userspace wants a stream of 2157 * OA metrics, but still we need to further validate the combined 2158 * properties are OK. 2159 * 2160 * If the configuration makes sense then we can allocate memory for 2161 * a circular OA buffer and apply the requested metric set configuration. 2162 * 2163 * Returns: zero on success or a negative error code. 2164 */ 2165 static int i915_oa_stream_init(struct i915_perf_stream *stream, 2166 struct drm_i915_perf_open_param *param, 2167 struct perf_open_properties *props) 2168 { 2169 struct drm_i915_private *dev_priv = stream->dev_priv; 2170 int format_size; 2171 int ret; 2172 2173 /* If the sysfs metrics/ directory wasn't registered for some 2174 * reason then don't let userspace try their luck with config 2175 * IDs 2176 */ 2177 if (!dev_priv->perf.metrics_kobj) { 2178 DRM_DEBUG("OA metrics weren't advertised via sysfs\n"); 2179 return -EINVAL; 2180 } 2181 2182 if (!(props->sample_flags & SAMPLE_OA_REPORT)) { 2183 DRM_DEBUG("Only OA report sampling supported\n"); 2184 return -EINVAL; 2185 } 2186 2187 if (!dev_priv->perf.ops.enable_metric_set) { 2188 DRM_DEBUG("OA unit not supported\n"); 2189 return -ENODEV; 2190 } 2191 2192 /* To avoid the complexity of having to accurately filter 2193 * counter reports and marshal to the appropriate client 2194 * we currently only allow exclusive access 2195 */ 2196 if (dev_priv->perf.exclusive_stream) { 2197 DRM_DEBUG("OA unit already in use\n"); 2198 return -EBUSY; 2199 } 2200 2201 if (!props->oa_format) { 2202 DRM_DEBUG("OA report format not specified\n"); 2203 return -EINVAL; 2204 } 2205 2206 stream->sample_size = sizeof(struct drm_i915_perf_record_header); 2207 2208 format_size = dev_priv->perf.oa_formats[props->oa_format].size; 2209 2210 stream->sample_flags |= SAMPLE_OA_REPORT; 2211 stream->sample_size += format_size; 2212 2213 stream->oa_buffer.format_size = format_size; 2214 if (WARN_ON(stream->oa_buffer.format_size == 0)) 2215 return -EINVAL; 2216 2217 stream->oa_buffer.format = 2218 dev_priv->perf.oa_formats[props->oa_format].format; 2219 2220 stream->periodic = props->oa_periodic; 2221 if (stream->periodic) 2222 stream->period_exponent = props->oa_period_exponent; 2223 2224 if (stream->ctx) { 2225 ret = oa_get_render_ctx_id(stream); 2226 if (ret) { 2227 DRM_DEBUG("Invalid context id to filter with\n"); 2228 return ret; 2229 } 2230 } 2231 2232 ret = get_oa_config(dev_priv, props->metrics_set, &stream->oa_config); 2233 if (ret) { 2234 DRM_DEBUG("Invalid OA config id=%i\n", props->metrics_set); 2235 goto err_config; 2236 } 2237 2238 /* PRM - observability performance counters: 2239 * 2240 * OACONTROL, performance counter enable, note: 2241 * 2242 * "When this bit is set, in order to have coherent counts, 2243 * RC6 power state and trunk clock gating must be disabled. 2244 * This can be achieved by programming MMIO registers as 2245 * 0xA094=0 and 0xA090[31]=1" 2246 * 2247 * In our case we are expecting that taking pm + FORCEWAKE 2248 * references will effectively disable RC6. 2249 */ 2250 stream->wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); 2251 intel_uncore_forcewake_get(&dev_priv->uncore, FORCEWAKE_ALL); 2252 2253 ret = alloc_oa_buffer(stream); 2254 if (ret) 2255 goto err_oa_buf_alloc; 2256 2257 ret = i915_mutex_lock_interruptible(&dev_priv->drm); 2258 if (ret) 2259 goto err_lock; 2260 2261 stream->ops = &i915_oa_stream_ops; 2262 dev_priv->perf.exclusive_stream = stream; 2263 2264 ret = dev_priv->perf.ops.enable_metric_set(stream); 2265 if (ret) { 2266 DRM_DEBUG("Unable to enable metric set\n"); 2267 goto err_enable; 2268 } 2269 2270 mutex_unlock(&dev_priv->drm.struct_mutex); 2271 2272 hrtimer_init(&stream->poll_check_timer, 2273 CLOCK_MONOTONIC, HRTIMER_MODE_REL); 2274 stream->poll_check_timer.function = oa_poll_check_timer_cb; 2275 init_waitqueue_head(&stream->poll_wq); 2276 spin_lock_init(&stream->oa_buffer.ptr_lock); 2277 2278 return 0; 2279 2280 err_enable: 2281 dev_priv->perf.exclusive_stream = NULL; 2282 dev_priv->perf.ops.disable_metric_set(stream); 2283 mutex_unlock(&dev_priv->drm.struct_mutex); 2284 2285 err_lock: 2286 free_oa_buffer(stream); 2287 2288 err_oa_buf_alloc: 2289 put_oa_config(dev_priv, stream->oa_config); 2290 2291 intel_uncore_forcewake_put(&dev_priv->uncore, FORCEWAKE_ALL); 2292 intel_runtime_pm_put(&dev_priv->runtime_pm, stream->wakeref); 2293 2294 err_config: 2295 if (stream->ctx) 2296 oa_put_render_ctx_id(stream); 2297 2298 return ret; 2299 } 2300 2301 void i915_oa_init_reg_state(struct intel_engine_cs *engine, 2302 struct intel_context *ce, 2303 u32 *regs) 2304 { 2305 struct i915_perf_stream *stream; 2306 2307 if (engine->class != RENDER_CLASS) 2308 return; 2309 2310 stream = engine->i915->perf.exclusive_stream; 2311 if (stream) 2312 gen8_update_reg_state_unlocked(stream, ce, regs, stream->oa_config); 2313 } 2314 2315 /** 2316 * i915_perf_read_locked - &i915_perf_stream_ops->read with error normalisation 2317 * @stream: An i915 perf stream 2318 * @file: An i915 perf stream file 2319 * @buf: destination buffer given by userspace 2320 * @count: the number of bytes userspace wants to read 2321 * @ppos: (inout) file seek position (unused) 2322 * 2323 * Besides wrapping &i915_perf_stream_ops->read this provides a common place to 2324 * ensure that if we've successfully copied any data then reporting that takes 2325 * precedence over any internal error status, so the data isn't lost. 2326 * 2327 * For example ret will be -ENOSPC whenever there is more buffered data than 2328 * can be copied to userspace, but that's only interesting if we weren't able 2329 * to copy some data because it implies the userspace buffer is too small to 2330 * receive a single record (and we never split records). 2331 * 2332 * Another case with ret == -EFAULT is more of a grey area since it would seem 2333 * like bad form for userspace to ask us to overrun its buffer, but the user 2334 * knows best: 2335 * 2336 * http://yarchive.net/comp/linux/partial_reads_writes.html 2337 * 2338 * Returns: The number of bytes copied or a negative error code on failure. 2339 */ 2340 static ssize_t i915_perf_read_locked(struct i915_perf_stream *stream, 2341 struct file *file, 2342 char __user *buf, 2343 size_t count, 2344 loff_t *ppos) 2345 { 2346 /* Note we keep the offset (aka bytes read) separate from any 2347 * error status so that the final check for whether we return 2348 * the bytes read with a higher precedence than any error (see 2349 * comment below) doesn't need to be handled/duplicated in 2350 * stream->ops->read() implementations. 2351 */ 2352 size_t offset = 0; 2353 int ret = stream->ops->read(stream, buf, count, &offset); 2354 2355 return offset ?: (ret ?: -EAGAIN); 2356 } 2357 2358 /** 2359 * i915_perf_read - handles read() FOP for i915 perf stream FDs 2360 * @file: An i915 perf stream file 2361 * @buf: destination buffer given by userspace 2362 * @count: the number of bytes userspace wants to read 2363 * @ppos: (inout) file seek position (unused) 2364 * 2365 * The entry point for handling a read() on a stream file descriptor from 2366 * userspace. Most of the work is left to the i915_perf_read_locked() and 2367 * &i915_perf_stream_ops->read but to save having stream implementations (of 2368 * which we might have multiple later) we handle blocking read here. 2369 * 2370 * We can also consistently treat trying to read from a disabled stream 2371 * as an IO error so implementations can assume the stream is enabled 2372 * while reading. 2373 * 2374 * Returns: The number of bytes copied or a negative error code on failure. 2375 */ 2376 static ssize_t i915_perf_read(struct file *file, 2377 char __user *buf, 2378 size_t count, 2379 loff_t *ppos) 2380 { 2381 struct i915_perf_stream *stream = file->private_data; 2382 struct drm_i915_private *dev_priv = stream->dev_priv; 2383 ssize_t ret; 2384 2385 /* To ensure it's handled consistently we simply treat all reads of a 2386 * disabled stream as an error. In particular it might otherwise lead 2387 * to a deadlock for blocking file descriptors... 2388 */ 2389 if (!stream->enabled) 2390 return -EIO; 2391 2392 if (!(file->f_flags & O_NONBLOCK)) { 2393 /* There's the small chance of false positives from 2394 * stream->ops->wait_unlocked. 2395 * 2396 * E.g. with single context filtering since we only wait until 2397 * oabuffer has >= 1 report we don't immediately know whether 2398 * any reports really belong to the current context 2399 */ 2400 do { 2401 ret = stream->ops->wait_unlocked(stream); 2402 if (ret) 2403 return ret; 2404 2405 mutex_lock(&dev_priv->perf.lock); 2406 ret = i915_perf_read_locked(stream, file, 2407 buf, count, ppos); 2408 mutex_unlock(&dev_priv->perf.lock); 2409 } while (ret == -EAGAIN); 2410 } else { 2411 mutex_lock(&dev_priv->perf.lock); 2412 ret = i915_perf_read_locked(stream, file, buf, count, ppos); 2413 mutex_unlock(&dev_priv->perf.lock); 2414 } 2415 2416 /* We allow the poll checking to sometimes report false positive EPOLLIN 2417 * events where we might actually report EAGAIN on read() if there's 2418 * not really any data available. In this situation though we don't 2419 * want to enter a busy loop between poll() reporting a EPOLLIN event 2420 * and read() returning -EAGAIN. Clearing the oa.pollin state here 2421 * effectively ensures we back off until the next hrtimer callback 2422 * before reporting another EPOLLIN event. 2423 */ 2424 if (ret >= 0 || ret == -EAGAIN) { 2425 /* Maybe make ->pollin per-stream state if we support multiple 2426 * concurrent streams in the future. 2427 */ 2428 stream->pollin = false; 2429 } 2430 2431 return ret; 2432 } 2433 2434 static enum hrtimer_restart oa_poll_check_timer_cb(struct hrtimer *hrtimer) 2435 { 2436 struct i915_perf_stream *stream = 2437 container_of(hrtimer, typeof(*stream), poll_check_timer); 2438 2439 if (oa_buffer_check_unlocked(stream)) { 2440 stream->pollin = true; 2441 wake_up(&stream->poll_wq); 2442 } 2443 2444 hrtimer_forward_now(hrtimer, ns_to_ktime(POLL_PERIOD)); 2445 2446 return HRTIMER_RESTART; 2447 } 2448 2449 /** 2450 * i915_perf_poll_locked - poll_wait() with a suitable wait queue for stream 2451 * @dev_priv: i915 device instance 2452 * @stream: An i915 perf stream 2453 * @file: An i915 perf stream file 2454 * @wait: poll() state table 2455 * 2456 * For handling userspace polling on an i915 perf stream, this calls through to 2457 * &i915_perf_stream_ops->poll_wait to call poll_wait() with a wait queue that 2458 * will be woken for new stream data. 2459 * 2460 * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize 2461 * with any non-file-operation driver hooks. 2462 * 2463 * Returns: any poll events that are ready without sleeping 2464 */ 2465 static __poll_t i915_perf_poll_locked(struct drm_i915_private *dev_priv, 2466 struct i915_perf_stream *stream, 2467 struct file *file, 2468 poll_table *wait) 2469 { 2470 __poll_t events = 0; 2471 2472 stream->ops->poll_wait(stream, file, wait); 2473 2474 /* Note: we don't explicitly check whether there's something to read 2475 * here since this path may be very hot depending on what else 2476 * userspace is polling, or on the timeout in use. We rely solely on 2477 * the hrtimer/oa_poll_check_timer_cb to notify us when there are 2478 * samples to read. 2479 */ 2480 if (stream->pollin) 2481 events |= EPOLLIN; 2482 2483 return events; 2484 } 2485 2486 /** 2487 * i915_perf_poll - call poll_wait() with a suitable wait queue for stream 2488 * @file: An i915 perf stream file 2489 * @wait: poll() state table 2490 * 2491 * For handling userspace polling on an i915 perf stream, this ensures 2492 * poll_wait() gets called with a wait queue that will be woken for new stream 2493 * data. 2494 * 2495 * Note: Implementation deferred to i915_perf_poll_locked() 2496 * 2497 * Returns: any poll events that are ready without sleeping 2498 */ 2499 static __poll_t i915_perf_poll(struct file *file, poll_table *wait) 2500 { 2501 struct i915_perf_stream *stream = file->private_data; 2502 struct drm_i915_private *dev_priv = stream->dev_priv; 2503 __poll_t ret; 2504 2505 mutex_lock(&dev_priv->perf.lock); 2506 ret = i915_perf_poll_locked(dev_priv, stream, file, wait); 2507 mutex_unlock(&dev_priv->perf.lock); 2508 2509 return ret; 2510 } 2511 2512 /** 2513 * i915_perf_enable_locked - handle `I915_PERF_IOCTL_ENABLE` ioctl 2514 * @stream: A disabled i915 perf stream 2515 * 2516 * [Re]enables the associated capture of data for this stream. 2517 * 2518 * If a stream was previously enabled then there's currently no intention 2519 * to provide userspace any guarantee about the preservation of previously 2520 * buffered data. 2521 */ 2522 static void i915_perf_enable_locked(struct i915_perf_stream *stream) 2523 { 2524 if (stream->enabled) 2525 return; 2526 2527 /* Allow stream->ops->enable() to refer to this */ 2528 stream->enabled = true; 2529 2530 if (stream->ops->enable) 2531 stream->ops->enable(stream); 2532 } 2533 2534 /** 2535 * i915_perf_disable_locked - handle `I915_PERF_IOCTL_DISABLE` ioctl 2536 * @stream: An enabled i915 perf stream 2537 * 2538 * Disables the associated capture of data for this stream. 2539 * 2540 * The intention is that disabling an re-enabling a stream will ideally be 2541 * cheaper than destroying and re-opening a stream with the same configuration, 2542 * though there are no formal guarantees about what state or buffered data 2543 * must be retained between disabling and re-enabling a stream. 2544 * 2545 * Note: while a stream is disabled it's considered an error for userspace 2546 * to attempt to read from the stream (-EIO). 2547 */ 2548 static void i915_perf_disable_locked(struct i915_perf_stream *stream) 2549 { 2550 if (!stream->enabled) 2551 return; 2552 2553 /* Allow stream->ops->disable() to refer to this */ 2554 stream->enabled = false; 2555 2556 if (stream->ops->disable) 2557 stream->ops->disable(stream); 2558 } 2559 2560 /** 2561 * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs 2562 * @stream: An i915 perf stream 2563 * @cmd: the ioctl request 2564 * @arg: the ioctl data 2565 * 2566 * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize 2567 * with any non-file-operation driver hooks. 2568 * 2569 * Returns: zero on success or a negative error code. Returns -EINVAL for 2570 * an unknown ioctl request. 2571 */ 2572 static long i915_perf_ioctl_locked(struct i915_perf_stream *stream, 2573 unsigned int cmd, 2574 unsigned long arg) 2575 { 2576 switch (cmd) { 2577 case I915_PERF_IOCTL_ENABLE: 2578 i915_perf_enable_locked(stream); 2579 return 0; 2580 case I915_PERF_IOCTL_DISABLE: 2581 i915_perf_disable_locked(stream); 2582 return 0; 2583 } 2584 2585 return -EINVAL; 2586 } 2587 2588 /** 2589 * i915_perf_ioctl - support ioctl() usage with i915 perf stream FDs 2590 * @file: An i915 perf stream file 2591 * @cmd: the ioctl request 2592 * @arg: the ioctl data 2593 * 2594 * Implementation deferred to i915_perf_ioctl_locked(). 2595 * 2596 * Returns: zero on success or a negative error code. Returns -EINVAL for 2597 * an unknown ioctl request. 2598 */ 2599 static long i915_perf_ioctl(struct file *file, 2600 unsigned int cmd, 2601 unsigned long arg) 2602 { 2603 struct i915_perf_stream *stream = file->private_data; 2604 struct drm_i915_private *dev_priv = stream->dev_priv; 2605 long ret; 2606 2607 mutex_lock(&dev_priv->perf.lock); 2608 ret = i915_perf_ioctl_locked(stream, cmd, arg); 2609 mutex_unlock(&dev_priv->perf.lock); 2610 2611 return ret; 2612 } 2613 2614 /** 2615 * i915_perf_destroy_locked - destroy an i915 perf stream 2616 * @stream: An i915 perf stream 2617 * 2618 * Frees all resources associated with the given i915 perf @stream, disabling 2619 * any associated data capture in the process. 2620 * 2621 * Note: The &drm_i915_private->perf.lock mutex has been taken to serialize 2622 * with any non-file-operation driver hooks. 2623 */ 2624 static void i915_perf_destroy_locked(struct i915_perf_stream *stream) 2625 { 2626 if (stream->enabled) 2627 i915_perf_disable_locked(stream); 2628 2629 if (stream->ops->destroy) 2630 stream->ops->destroy(stream); 2631 2632 list_del(&stream->link); 2633 2634 if (stream->ctx) 2635 i915_gem_context_put(stream->ctx); 2636 2637 kfree(stream); 2638 } 2639 2640 /** 2641 * i915_perf_release - handles userspace close() of a stream file 2642 * @inode: anonymous inode associated with file 2643 * @file: An i915 perf stream file 2644 * 2645 * Cleans up any resources associated with an open i915 perf stream file. 2646 * 2647 * NB: close() can't really fail from the userspace point of view. 2648 * 2649 * Returns: zero on success or a negative error code. 2650 */ 2651 static int i915_perf_release(struct inode *inode, struct file *file) 2652 { 2653 struct i915_perf_stream *stream = file->private_data; 2654 struct drm_i915_private *dev_priv = stream->dev_priv; 2655 2656 mutex_lock(&dev_priv->perf.lock); 2657 i915_perf_destroy_locked(stream); 2658 mutex_unlock(&dev_priv->perf.lock); 2659 2660 /* Release the reference the perf stream kept on the driver. */ 2661 drm_dev_put(&dev_priv->drm); 2662 2663 return 0; 2664 } 2665 2666 2667 static const struct file_operations fops = { 2668 .owner = THIS_MODULE, 2669 .llseek = no_llseek, 2670 .release = i915_perf_release, 2671 .poll = i915_perf_poll, 2672 .read = i915_perf_read, 2673 .unlocked_ioctl = i915_perf_ioctl, 2674 /* Our ioctl have no arguments, so it's safe to use the same function 2675 * to handle 32bits compatibility. 2676 */ 2677 .compat_ioctl = i915_perf_ioctl, 2678 }; 2679 2680 2681 /** 2682 * i915_perf_open_ioctl_locked - DRM ioctl() for userspace to open a stream FD 2683 * @dev_priv: i915 device instance 2684 * @param: The open parameters passed to 'DRM_I915_PERF_OPEN` 2685 * @props: individually validated u64 property value pairs 2686 * @file: drm file 2687 * 2688 * See i915_perf_ioctl_open() for interface details. 2689 * 2690 * Implements further stream config validation and stream initialization on 2691 * behalf of i915_perf_open_ioctl() with the &drm_i915_private->perf.lock mutex 2692 * taken to serialize with any non-file-operation driver hooks. 2693 * 2694 * Note: at this point the @props have only been validated in isolation and 2695 * it's still necessary to validate that the combination of properties makes 2696 * sense. 2697 * 2698 * In the case where userspace is interested in OA unit metrics then further 2699 * config validation and stream initialization details will be handled by 2700 * i915_oa_stream_init(). The code here should only validate config state that 2701 * will be relevant to all stream types / backends. 2702 * 2703 * Returns: zero on success or a negative error code. 2704 */ 2705 static int 2706 i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv, 2707 struct drm_i915_perf_open_param *param, 2708 struct perf_open_properties *props, 2709 struct drm_file *file) 2710 { 2711 struct i915_gem_context *specific_ctx = NULL; 2712 struct i915_perf_stream *stream = NULL; 2713 unsigned long f_flags = 0; 2714 bool privileged_op = true; 2715 int stream_fd; 2716 int ret; 2717 2718 if (props->single_context) { 2719 u32 ctx_handle = props->ctx_handle; 2720 struct drm_i915_file_private *file_priv = file->driver_priv; 2721 2722 specific_ctx = i915_gem_context_lookup(file_priv, ctx_handle); 2723 if (!specific_ctx) { 2724 DRM_DEBUG("Failed to look up context with ID %u for opening perf stream\n", 2725 ctx_handle); 2726 ret = -ENOENT; 2727 goto err; 2728 } 2729 } 2730 2731 /* 2732 * On Haswell the OA unit supports clock gating off for a specific 2733 * context and in this mode there's no visibility of metrics for the 2734 * rest of the system, which we consider acceptable for a 2735 * non-privileged client. 2736 * 2737 * For Gen8+ the OA unit no longer supports clock gating off for a 2738 * specific context and the kernel can't securely stop the counters 2739 * from updating as system-wide / global values. Even though we can 2740 * filter reports based on the included context ID we can't block 2741 * clients from seeing the raw / global counter values via 2742 * MI_REPORT_PERF_COUNT commands and so consider it a privileged op to 2743 * enable the OA unit by default. 2744 */ 2745 if (IS_HASWELL(dev_priv) && specific_ctx) 2746 privileged_op = false; 2747 2748 /* Similar to perf's kernel.perf_paranoid_cpu sysctl option 2749 * we check a dev.i915.perf_stream_paranoid sysctl option 2750 * to determine if it's ok to access system wide OA counters 2751 * without CAP_SYS_ADMIN privileges. 2752 */ 2753 if (privileged_op && 2754 i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) { 2755 DRM_DEBUG("Insufficient privileges to open system-wide i915 perf stream\n"); 2756 ret = -EACCES; 2757 goto err_ctx; 2758 } 2759 2760 stream = kzalloc(sizeof(*stream), GFP_KERNEL); 2761 if (!stream) { 2762 ret = -ENOMEM; 2763 goto err_ctx; 2764 } 2765 2766 stream->dev_priv = dev_priv; 2767 stream->ctx = specific_ctx; 2768 2769 ret = i915_oa_stream_init(stream, param, props); 2770 if (ret) 2771 goto err_alloc; 2772 2773 /* we avoid simply assigning stream->sample_flags = props->sample_flags 2774 * to have _stream_init check the combination of sample flags more 2775 * thoroughly, but still this is the expected result at this point. 2776 */ 2777 if (WARN_ON(stream->sample_flags != props->sample_flags)) { 2778 ret = -ENODEV; 2779 goto err_flags; 2780 } 2781 2782 list_add(&stream->link, &dev_priv->perf.streams); 2783 2784 if (param->flags & I915_PERF_FLAG_FD_CLOEXEC) 2785 f_flags |= O_CLOEXEC; 2786 if (param->flags & I915_PERF_FLAG_FD_NONBLOCK) 2787 f_flags |= O_NONBLOCK; 2788 2789 stream_fd = anon_inode_getfd("[i915_perf]", &fops, stream, f_flags); 2790 if (stream_fd < 0) { 2791 ret = stream_fd; 2792 goto err_open; 2793 } 2794 2795 if (!(param->flags & I915_PERF_FLAG_DISABLED)) 2796 i915_perf_enable_locked(stream); 2797 2798 /* Take a reference on the driver that will be kept with stream_fd 2799 * until its release. 2800 */ 2801 drm_dev_get(&dev_priv->drm); 2802 2803 return stream_fd; 2804 2805 err_open: 2806 list_del(&stream->link); 2807 err_flags: 2808 if (stream->ops->destroy) 2809 stream->ops->destroy(stream); 2810 err_alloc: 2811 kfree(stream); 2812 err_ctx: 2813 if (specific_ctx) 2814 i915_gem_context_put(specific_ctx); 2815 err: 2816 return ret; 2817 } 2818 2819 static u64 oa_exponent_to_ns(struct drm_i915_private *dev_priv, int exponent) 2820 { 2821 return div64_u64(1000000000ULL * (2ULL << exponent), 2822 1000ULL * RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz); 2823 } 2824 2825 /** 2826 * read_properties_unlocked - validate + copy userspace stream open properties 2827 * @dev_priv: i915 device instance 2828 * @uprops: The array of u64 key value pairs given by userspace 2829 * @n_props: The number of key value pairs expected in @uprops 2830 * @props: The stream configuration built up while validating properties 2831 * 2832 * Note this function only validates properties in isolation it doesn't 2833 * validate that the combination of properties makes sense or that all 2834 * properties necessary for a particular kind of stream have been set. 2835 * 2836 * Note that there currently aren't any ordering requirements for properties so 2837 * we shouldn't validate or assume anything about ordering here. This doesn't 2838 * rule out defining new properties with ordering requirements in the future. 2839 */ 2840 static int read_properties_unlocked(struct drm_i915_private *dev_priv, 2841 u64 __user *uprops, 2842 u32 n_props, 2843 struct perf_open_properties *props) 2844 { 2845 u64 __user *uprop = uprops; 2846 u32 i; 2847 2848 memset(props, 0, sizeof(struct perf_open_properties)); 2849 2850 if (!n_props) { 2851 DRM_DEBUG("No i915 perf properties given\n"); 2852 return -EINVAL; 2853 } 2854 2855 /* Considering that ID = 0 is reserved and assuming that we don't 2856 * (currently) expect any configurations to ever specify duplicate 2857 * values for a particular property ID then the last _PROP_MAX value is 2858 * one greater than the maximum number of properties we expect to get 2859 * from userspace. 2860 */ 2861 if (n_props >= DRM_I915_PERF_PROP_MAX) { 2862 DRM_DEBUG("More i915 perf properties specified than exist\n"); 2863 return -EINVAL; 2864 } 2865 2866 for (i = 0; i < n_props; i++) { 2867 u64 oa_period, oa_freq_hz; 2868 u64 id, value; 2869 int ret; 2870 2871 ret = get_user(id, uprop); 2872 if (ret) 2873 return ret; 2874 2875 ret = get_user(value, uprop + 1); 2876 if (ret) 2877 return ret; 2878 2879 if (id == 0 || id >= DRM_I915_PERF_PROP_MAX) { 2880 DRM_DEBUG("Unknown i915 perf property ID\n"); 2881 return -EINVAL; 2882 } 2883 2884 switch ((enum drm_i915_perf_property_id)id) { 2885 case DRM_I915_PERF_PROP_CTX_HANDLE: 2886 props->single_context = 1; 2887 props->ctx_handle = value; 2888 break; 2889 case DRM_I915_PERF_PROP_SAMPLE_OA: 2890 if (value) 2891 props->sample_flags |= SAMPLE_OA_REPORT; 2892 break; 2893 case DRM_I915_PERF_PROP_OA_METRICS_SET: 2894 if (value == 0) { 2895 DRM_DEBUG("Unknown OA metric set ID\n"); 2896 return -EINVAL; 2897 } 2898 props->metrics_set = value; 2899 break; 2900 case DRM_I915_PERF_PROP_OA_FORMAT: 2901 if (value == 0 || value >= I915_OA_FORMAT_MAX) { 2902 DRM_DEBUG("Out-of-range OA report format %llu\n", 2903 value); 2904 return -EINVAL; 2905 } 2906 if (!dev_priv->perf.oa_formats[value].size) { 2907 DRM_DEBUG("Unsupported OA report format %llu\n", 2908 value); 2909 return -EINVAL; 2910 } 2911 props->oa_format = value; 2912 break; 2913 case DRM_I915_PERF_PROP_OA_EXPONENT: 2914 if (value > OA_EXPONENT_MAX) { 2915 DRM_DEBUG("OA timer exponent too high (> %u)\n", 2916 OA_EXPONENT_MAX); 2917 return -EINVAL; 2918 } 2919 2920 /* Theoretically we can program the OA unit to sample 2921 * e.g. every 160ns for HSW, 167ns for BDW/SKL or 104ns 2922 * for BXT. We don't allow such high sampling 2923 * frequencies by default unless root. 2924 */ 2925 2926 BUILD_BUG_ON(sizeof(oa_period) != 8); 2927 oa_period = oa_exponent_to_ns(dev_priv, value); 2928 2929 /* This check is primarily to ensure that oa_period <= 2930 * UINT32_MAX (before passing to do_div which only 2931 * accepts a u32 denominator), but we can also skip 2932 * checking anything < 1Hz which implicitly can't be 2933 * limited via an integer oa_max_sample_rate. 2934 */ 2935 if (oa_period <= NSEC_PER_SEC) { 2936 u64 tmp = NSEC_PER_SEC; 2937 do_div(tmp, oa_period); 2938 oa_freq_hz = tmp; 2939 } else 2940 oa_freq_hz = 0; 2941 2942 if (oa_freq_hz > i915_oa_max_sample_rate && 2943 !capable(CAP_SYS_ADMIN)) { 2944 DRM_DEBUG("OA exponent would exceed the max sampling frequency (sysctl dev.i915.oa_max_sample_rate) %uHz without root privileges\n", 2945 i915_oa_max_sample_rate); 2946 return -EACCES; 2947 } 2948 2949 props->oa_periodic = true; 2950 props->oa_period_exponent = value; 2951 break; 2952 case DRM_I915_PERF_PROP_MAX: 2953 MISSING_CASE(id); 2954 return -EINVAL; 2955 } 2956 2957 uprop += 2; 2958 } 2959 2960 return 0; 2961 } 2962 2963 /** 2964 * i915_perf_open_ioctl - DRM ioctl() for userspace to open a stream FD 2965 * @dev: drm device 2966 * @data: ioctl data copied from userspace (unvalidated) 2967 * @file: drm file 2968 * 2969 * Validates the stream open parameters given by userspace including flags 2970 * and an array of u64 key, value pair properties. 2971 * 2972 * Very little is assumed up front about the nature of the stream being 2973 * opened (for instance we don't assume it's for periodic OA unit metrics). An 2974 * i915-perf stream is expected to be a suitable interface for other forms of 2975 * buffered data written by the GPU besides periodic OA metrics. 2976 * 2977 * Note we copy the properties from userspace outside of the i915 perf 2978 * mutex to avoid an awkward lockdep with mmap_sem. 2979 * 2980 * Most of the implementation details are handled by 2981 * i915_perf_open_ioctl_locked() after taking the &drm_i915_private->perf.lock 2982 * mutex for serializing with any non-file-operation driver hooks. 2983 * 2984 * Return: A newly opened i915 Perf stream file descriptor or negative 2985 * error code on failure. 2986 */ 2987 int i915_perf_open_ioctl(struct drm_device *dev, void *data, 2988 struct drm_file *file) 2989 { 2990 struct drm_i915_private *dev_priv = dev->dev_private; 2991 struct drm_i915_perf_open_param *param = data; 2992 struct perf_open_properties props; 2993 u32 known_open_flags; 2994 int ret; 2995 2996 if (!dev_priv->perf.initialized) { 2997 DRM_DEBUG("i915 perf interface not available for this system\n"); 2998 return -ENOTSUPP; 2999 } 3000 3001 known_open_flags = I915_PERF_FLAG_FD_CLOEXEC | 3002 I915_PERF_FLAG_FD_NONBLOCK | 3003 I915_PERF_FLAG_DISABLED; 3004 if (param->flags & ~known_open_flags) { 3005 DRM_DEBUG("Unknown drm_i915_perf_open_param flag\n"); 3006 return -EINVAL; 3007 } 3008 3009 ret = read_properties_unlocked(dev_priv, 3010 u64_to_user_ptr(param->properties_ptr), 3011 param->num_properties, 3012 &props); 3013 if (ret) 3014 return ret; 3015 3016 mutex_lock(&dev_priv->perf.lock); 3017 ret = i915_perf_open_ioctl_locked(dev_priv, param, &props, file); 3018 mutex_unlock(&dev_priv->perf.lock); 3019 3020 return ret; 3021 } 3022 3023 /** 3024 * i915_perf_register - exposes i915-perf to userspace 3025 * @dev_priv: i915 device instance 3026 * 3027 * In particular OA metric sets are advertised under a sysfs metrics/ 3028 * directory allowing userspace to enumerate valid IDs that can be 3029 * used to open an i915-perf stream. 3030 */ 3031 void i915_perf_register(struct drm_i915_private *dev_priv) 3032 { 3033 int ret; 3034 3035 if (!dev_priv->perf.initialized) 3036 return; 3037 3038 /* To be sure we're synchronized with an attempted 3039 * i915_perf_open_ioctl(); considering that we register after 3040 * being exposed to userspace. 3041 */ 3042 mutex_lock(&dev_priv->perf.lock); 3043 3044 dev_priv->perf.metrics_kobj = 3045 kobject_create_and_add("metrics", 3046 &dev_priv->drm.primary->kdev->kobj); 3047 if (!dev_priv->perf.metrics_kobj) 3048 goto exit; 3049 3050 sysfs_attr_init(&dev_priv->perf.test_config.sysfs_metric_id.attr); 3051 3052 if (INTEL_GEN(dev_priv) >= 11) { 3053 i915_perf_load_test_config_icl(dev_priv); 3054 } else if (IS_CANNONLAKE(dev_priv)) { 3055 i915_perf_load_test_config_cnl(dev_priv); 3056 } else if (IS_COFFEELAKE(dev_priv)) { 3057 if (IS_CFL_GT2(dev_priv)) 3058 i915_perf_load_test_config_cflgt2(dev_priv); 3059 if (IS_CFL_GT3(dev_priv)) 3060 i915_perf_load_test_config_cflgt3(dev_priv); 3061 } else if (IS_GEMINILAKE(dev_priv)) { 3062 i915_perf_load_test_config_glk(dev_priv); 3063 } else if (IS_KABYLAKE(dev_priv)) { 3064 if (IS_KBL_GT2(dev_priv)) 3065 i915_perf_load_test_config_kblgt2(dev_priv); 3066 else if (IS_KBL_GT3(dev_priv)) 3067 i915_perf_load_test_config_kblgt3(dev_priv); 3068 } else if (IS_BROXTON(dev_priv)) { 3069 i915_perf_load_test_config_bxt(dev_priv); 3070 } else if (IS_SKYLAKE(dev_priv)) { 3071 if (IS_SKL_GT2(dev_priv)) 3072 i915_perf_load_test_config_sklgt2(dev_priv); 3073 else if (IS_SKL_GT3(dev_priv)) 3074 i915_perf_load_test_config_sklgt3(dev_priv); 3075 else if (IS_SKL_GT4(dev_priv)) 3076 i915_perf_load_test_config_sklgt4(dev_priv); 3077 } else if (IS_CHERRYVIEW(dev_priv)) { 3078 i915_perf_load_test_config_chv(dev_priv); 3079 } else if (IS_BROADWELL(dev_priv)) { 3080 i915_perf_load_test_config_bdw(dev_priv); 3081 } else if (IS_HASWELL(dev_priv)) { 3082 i915_perf_load_test_config_hsw(dev_priv); 3083 } 3084 3085 if (dev_priv->perf.test_config.id == 0) 3086 goto sysfs_error; 3087 3088 ret = sysfs_create_group(dev_priv->perf.metrics_kobj, 3089 &dev_priv->perf.test_config.sysfs_metric); 3090 if (ret) 3091 goto sysfs_error; 3092 3093 atomic_set(&dev_priv->perf.test_config.ref_count, 1); 3094 3095 goto exit; 3096 3097 sysfs_error: 3098 kobject_put(dev_priv->perf.metrics_kobj); 3099 dev_priv->perf.metrics_kobj = NULL; 3100 3101 exit: 3102 mutex_unlock(&dev_priv->perf.lock); 3103 } 3104 3105 /** 3106 * i915_perf_unregister - hide i915-perf from userspace 3107 * @dev_priv: i915 device instance 3108 * 3109 * i915-perf state cleanup is split up into an 'unregister' and 3110 * 'deinit' phase where the interface is first hidden from 3111 * userspace by i915_perf_unregister() before cleaning up 3112 * remaining state in i915_perf_fini(). 3113 */ 3114 void i915_perf_unregister(struct drm_i915_private *dev_priv) 3115 { 3116 if (!dev_priv->perf.metrics_kobj) 3117 return; 3118 3119 sysfs_remove_group(dev_priv->perf.metrics_kobj, 3120 &dev_priv->perf.test_config.sysfs_metric); 3121 3122 kobject_put(dev_priv->perf.metrics_kobj); 3123 dev_priv->perf.metrics_kobj = NULL; 3124 } 3125 3126 static bool gen8_is_valid_flex_addr(struct drm_i915_private *dev_priv, u32 addr) 3127 { 3128 static const i915_reg_t flex_eu_regs[] = { 3129 EU_PERF_CNTL0, 3130 EU_PERF_CNTL1, 3131 EU_PERF_CNTL2, 3132 EU_PERF_CNTL3, 3133 EU_PERF_CNTL4, 3134 EU_PERF_CNTL5, 3135 EU_PERF_CNTL6, 3136 }; 3137 int i; 3138 3139 for (i = 0; i < ARRAY_SIZE(flex_eu_regs); i++) { 3140 if (i915_mmio_reg_offset(flex_eu_regs[i]) == addr) 3141 return true; 3142 } 3143 return false; 3144 } 3145 3146 static bool gen7_is_valid_b_counter_addr(struct drm_i915_private *dev_priv, u32 addr) 3147 { 3148 return (addr >= i915_mmio_reg_offset(OASTARTTRIG1) && 3149 addr <= i915_mmio_reg_offset(OASTARTTRIG8)) || 3150 (addr >= i915_mmio_reg_offset(OAREPORTTRIG1) && 3151 addr <= i915_mmio_reg_offset(OAREPORTTRIG8)) || 3152 (addr >= i915_mmio_reg_offset(OACEC0_0) && 3153 addr <= i915_mmio_reg_offset(OACEC7_1)); 3154 } 3155 3156 static bool gen7_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) 3157 { 3158 return addr == i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) || 3159 (addr >= i915_mmio_reg_offset(MICRO_BP0_0) && 3160 addr <= i915_mmio_reg_offset(NOA_WRITE)) || 3161 (addr >= i915_mmio_reg_offset(OA_PERFCNT1_LO) && 3162 addr <= i915_mmio_reg_offset(OA_PERFCNT2_HI)) || 3163 (addr >= i915_mmio_reg_offset(OA_PERFMATRIX_LO) && 3164 addr <= i915_mmio_reg_offset(OA_PERFMATRIX_HI)); 3165 } 3166 3167 static bool gen8_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) 3168 { 3169 return gen7_is_valid_mux_addr(dev_priv, addr) || 3170 addr == i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) || 3171 (addr >= i915_mmio_reg_offset(RPM_CONFIG0) && 3172 addr <= i915_mmio_reg_offset(NOA_CONFIG(8))); 3173 } 3174 3175 static bool gen10_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) 3176 { 3177 return gen8_is_valid_mux_addr(dev_priv, addr) || 3178 addr == i915_mmio_reg_offset(GEN10_NOA_WRITE_HIGH) || 3179 (addr >= i915_mmio_reg_offset(OA_PERFCNT3_LO) && 3180 addr <= i915_mmio_reg_offset(OA_PERFCNT4_HI)); 3181 } 3182 3183 static bool hsw_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) 3184 { 3185 return gen7_is_valid_mux_addr(dev_priv, addr) || 3186 (addr >= 0x25100 && addr <= 0x2FF90) || 3187 (addr >= i915_mmio_reg_offset(HSW_MBVID2_NOA0) && 3188 addr <= i915_mmio_reg_offset(HSW_MBVID2_NOA9)) || 3189 addr == i915_mmio_reg_offset(HSW_MBVID2_MISR0); 3190 } 3191 3192 static bool chv_is_valid_mux_addr(struct drm_i915_private *dev_priv, u32 addr) 3193 { 3194 return gen7_is_valid_mux_addr(dev_priv, addr) || 3195 (addr >= 0x182300 && addr <= 0x1823A4); 3196 } 3197 3198 static u32 mask_reg_value(u32 reg, u32 val) 3199 { 3200 /* HALF_SLICE_CHICKEN2 is programmed with a the 3201 * WaDisableSTUnitPowerOptimization workaround. Make sure the value 3202 * programmed by userspace doesn't change this. 3203 */ 3204 if (i915_mmio_reg_offset(HALF_SLICE_CHICKEN2) == reg) 3205 val = val & ~_MASKED_BIT_ENABLE(GEN8_ST_PO_DISABLE); 3206 3207 /* WAIT_FOR_RC6_EXIT has only one bit fullfilling the function 3208 * indicated by its name and a bunch of selection fields used by OA 3209 * configs. 3210 */ 3211 if (i915_mmio_reg_offset(WAIT_FOR_RC6_EXIT) == reg) 3212 val = val & ~_MASKED_BIT_ENABLE(HSW_WAIT_FOR_RC6_EXIT_ENABLE); 3213 3214 return val; 3215 } 3216 3217 static struct i915_oa_reg *alloc_oa_regs(struct drm_i915_private *dev_priv, 3218 bool (*is_valid)(struct drm_i915_private *dev_priv, u32 addr), 3219 u32 __user *regs, 3220 u32 n_regs) 3221 { 3222 struct i915_oa_reg *oa_regs; 3223 int err; 3224 u32 i; 3225 3226 if (!n_regs) 3227 return NULL; 3228 3229 if (!access_ok(regs, n_regs * sizeof(u32) * 2)) 3230 return ERR_PTR(-EFAULT); 3231 3232 /* No is_valid function means we're not allowing any register to be programmed. */ 3233 GEM_BUG_ON(!is_valid); 3234 if (!is_valid) 3235 return ERR_PTR(-EINVAL); 3236 3237 oa_regs = kmalloc_array(n_regs, sizeof(*oa_regs), GFP_KERNEL); 3238 if (!oa_regs) 3239 return ERR_PTR(-ENOMEM); 3240 3241 for (i = 0; i < n_regs; i++) { 3242 u32 addr, value; 3243 3244 err = get_user(addr, regs); 3245 if (err) 3246 goto addr_err; 3247 3248 if (!is_valid(dev_priv, addr)) { 3249 DRM_DEBUG("Invalid oa_reg address: %X\n", addr); 3250 err = -EINVAL; 3251 goto addr_err; 3252 } 3253 3254 err = get_user(value, regs + 1); 3255 if (err) 3256 goto addr_err; 3257 3258 oa_regs[i].addr = _MMIO(addr); 3259 oa_regs[i].value = mask_reg_value(addr, value); 3260 3261 regs += 2; 3262 } 3263 3264 return oa_regs; 3265 3266 addr_err: 3267 kfree(oa_regs); 3268 return ERR_PTR(err); 3269 } 3270 3271 static ssize_t show_dynamic_id(struct device *dev, 3272 struct device_attribute *attr, 3273 char *buf) 3274 { 3275 struct i915_oa_config *oa_config = 3276 container_of(attr, typeof(*oa_config), sysfs_metric_id); 3277 3278 return sprintf(buf, "%d\n", oa_config->id); 3279 } 3280 3281 static int create_dynamic_oa_sysfs_entry(struct drm_i915_private *dev_priv, 3282 struct i915_oa_config *oa_config) 3283 { 3284 sysfs_attr_init(&oa_config->sysfs_metric_id.attr); 3285 oa_config->sysfs_metric_id.attr.name = "id"; 3286 oa_config->sysfs_metric_id.attr.mode = S_IRUGO; 3287 oa_config->sysfs_metric_id.show = show_dynamic_id; 3288 oa_config->sysfs_metric_id.store = NULL; 3289 3290 oa_config->attrs[0] = &oa_config->sysfs_metric_id.attr; 3291 oa_config->attrs[1] = NULL; 3292 3293 oa_config->sysfs_metric.name = oa_config->uuid; 3294 oa_config->sysfs_metric.attrs = oa_config->attrs; 3295 3296 return sysfs_create_group(dev_priv->perf.metrics_kobj, 3297 &oa_config->sysfs_metric); 3298 } 3299 3300 /** 3301 * i915_perf_add_config_ioctl - DRM ioctl() for userspace to add a new OA config 3302 * @dev: drm device 3303 * @data: ioctl data (pointer to struct drm_i915_perf_oa_config) copied from 3304 * userspace (unvalidated) 3305 * @file: drm file 3306 * 3307 * Validates the submitted OA register to be saved into a new OA config that 3308 * can then be used for programming the OA unit and its NOA network. 3309 * 3310 * Returns: A new allocated config number to be used with the perf open ioctl 3311 * or a negative error code on failure. 3312 */ 3313 int i915_perf_add_config_ioctl(struct drm_device *dev, void *data, 3314 struct drm_file *file) 3315 { 3316 struct drm_i915_private *dev_priv = dev->dev_private; 3317 struct drm_i915_perf_oa_config *args = data; 3318 struct i915_oa_config *oa_config, *tmp; 3319 int err, id; 3320 3321 if (!dev_priv->perf.initialized) { 3322 DRM_DEBUG("i915 perf interface not available for this system\n"); 3323 return -ENOTSUPP; 3324 } 3325 3326 if (!dev_priv->perf.metrics_kobj) { 3327 DRM_DEBUG("OA metrics weren't advertised via sysfs\n"); 3328 return -EINVAL; 3329 } 3330 3331 if (i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) { 3332 DRM_DEBUG("Insufficient privileges to add i915 OA config\n"); 3333 return -EACCES; 3334 } 3335 3336 if ((!args->mux_regs_ptr || !args->n_mux_regs) && 3337 (!args->boolean_regs_ptr || !args->n_boolean_regs) && 3338 (!args->flex_regs_ptr || !args->n_flex_regs)) { 3339 DRM_DEBUG("No OA registers given\n"); 3340 return -EINVAL; 3341 } 3342 3343 oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL); 3344 if (!oa_config) { 3345 DRM_DEBUG("Failed to allocate memory for the OA config\n"); 3346 return -ENOMEM; 3347 } 3348 3349 atomic_set(&oa_config->ref_count, 1); 3350 3351 if (!uuid_is_valid(args->uuid)) { 3352 DRM_DEBUG("Invalid uuid format for OA config\n"); 3353 err = -EINVAL; 3354 goto reg_err; 3355 } 3356 3357 /* Last character in oa_config->uuid will be 0 because oa_config is 3358 * kzalloc. 3359 */ 3360 memcpy(oa_config->uuid, args->uuid, sizeof(args->uuid)); 3361 3362 oa_config->mux_regs_len = args->n_mux_regs; 3363 oa_config->mux_regs = 3364 alloc_oa_regs(dev_priv, 3365 dev_priv->perf.ops.is_valid_mux_reg, 3366 u64_to_user_ptr(args->mux_regs_ptr), 3367 args->n_mux_regs); 3368 3369 if (IS_ERR(oa_config->mux_regs)) { 3370 DRM_DEBUG("Failed to create OA config for mux_regs\n"); 3371 err = PTR_ERR(oa_config->mux_regs); 3372 goto reg_err; 3373 } 3374 3375 oa_config->b_counter_regs_len = args->n_boolean_regs; 3376 oa_config->b_counter_regs = 3377 alloc_oa_regs(dev_priv, 3378 dev_priv->perf.ops.is_valid_b_counter_reg, 3379 u64_to_user_ptr(args->boolean_regs_ptr), 3380 args->n_boolean_regs); 3381 3382 if (IS_ERR(oa_config->b_counter_regs)) { 3383 DRM_DEBUG("Failed to create OA config for b_counter_regs\n"); 3384 err = PTR_ERR(oa_config->b_counter_regs); 3385 goto reg_err; 3386 } 3387 3388 if (INTEL_GEN(dev_priv) < 8) { 3389 if (args->n_flex_regs != 0) { 3390 err = -EINVAL; 3391 goto reg_err; 3392 } 3393 } else { 3394 oa_config->flex_regs_len = args->n_flex_regs; 3395 oa_config->flex_regs = 3396 alloc_oa_regs(dev_priv, 3397 dev_priv->perf.ops.is_valid_flex_reg, 3398 u64_to_user_ptr(args->flex_regs_ptr), 3399 args->n_flex_regs); 3400 3401 if (IS_ERR(oa_config->flex_regs)) { 3402 DRM_DEBUG("Failed to create OA config for flex_regs\n"); 3403 err = PTR_ERR(oa_config->flex_regs); 3404 goto reg_err; 3405 } 3406 } 3407 3408 err = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); 3409 if (err) 3410 goto reg_err; 3411 3412 /* We shouldn't have too many configs, so this iteration shouldn't be 3413 * too costly. 3414 */ 3415 idr_for_each_entry(&dev_priv->perf.metrics_idr, tmp, id) { 3416 if (!strcmp(tmp->uuid, oa_config->uuid)) { 3417 DRM_DEBUG("OA config already exists with this uuid\n"); 3418 err = -EADDRINUSE; 3419 goto sysfs_err; 3420 } 3421 } 3422 3423 err = create_dynamic_oa_sysfs_entry(dev_priv, oa_config); 3424 if (err) { 3425 DRM_DEBUG("Failed to create sysfs entry for OA config\n"); 3426 goto sysfs_err; 3427 } 3428 3429 /* Config id 0 is invalid, id 1 for kernel stored test config. */ 3430 oa_config->id = idr_alloc(&dev_priv->perf.metrics_idr, 3431 oa_config, 2, 3432 0, GFP_KERNEL); 3433 if (oa_config->id < 0) { 3434 DRM_DEBUG("Failed to create sysfs entry for OA config\n"); 3435 err = oa_config->id; 3436 goto sysfs_err; 3437 } 3438 3439 mutex_unlock(&dev_priv->perf.metrics_lock); 3440 3441 DRM_DEBUG("Added config %s id=%i\n", oa_config->uuid, oa_config->id); 3442 3443 return oa_config->id; 3444 3445 sysfs_err: 3446 mutex_unlock(&dev_priv->perf.metrics_lock); 3447 reg_err: 3448 put_oa_config(dev_priv, oa_config); 3449 DRM_DEBUG("Failed to add new OA config\n"); 3450 return err; 3451 } 3452 3453 /** 3454 * i915_perf_remove_config_ioctl - DRM ioctl() for userspace to remove an OA config 3455 * @dev: drm device 3456 * @data: ioctl data (pointer to u64 integer) copied from userspace 3457 * @file: drm file 3458 * 3459 * Configs can be removed while being used, the will stop appearing in sysfs 3460 * and their content will be freed when the stream using the config is closed. 3461 * 3462 * Returns: 0 on success or a negative error code on failure. 3463 */ 3464 int i915_perf_remove_config_ioctl(struct drm_device *dev, void *data, 3465 struct drm_file *file) 3466 { 3467 struct drm_i915_private *dev_priv = dev->dev_private; 3468 u64 *arg = data; 3469 struct i915_oa_config *oa_config; 3470 int ret; 3471 3472 if (!dev_priv->perf.initialized) { 3473 DRM_DEBUG("i915 perf interface not available for this system\n"); 3474 return -ENOTSUPP; 3475 } 3476 3477 if (i915_perf_stream_paranoid && !capable(CAP_SYS_ADMIN)) { 3478 DRM_DEBUG("Insufficient privileges to remove i915 OA config\n"); 3479 return -EACCES; 3480 } 3481 3482 ret = mutex_lock_interruptible(&dev_priv->perf.metrics_lock); 3483 if (ret) 3484 goto lock_err; 3485 3486 oa_config = idr_find(&dev_priv->perf.metrics_idr, *arg); 3487 if (!oa_config) { 3488 DRM_DEBUG("Failed to remove unknown OA config\n"); 3489 ret = -ENOENT; 3490 goto config_err; 3491 } 3492 3493 GEM_BUG_ON(*arg != oa_config->id); 3494 3495 sysfs_remove_group(dev_priv->perf.metrics_kobj, 3496 &oa_config->sysfs_metric); 3497 3498 idr_remove(&dev_priv->perf.metrics_idr, *arg); 3499 3500 DRM_DEBUG("Removed config %s id=%i\n", oa_config->uuid, oa_config->id); 3501 3502 put_oa_config(dev_priv, oa_config); 3503 3504 config_err: 3505 mutex_unlock(&dev_priv->perf.metrics_lock); 3506 lock_err: 3507 return ret; 3508 } 3509 3510 static struct ctl_table oa_table[] = { 3511 { 3512 .procname = "perf_stream_paranoid", 3513 .data = &i915_perf_stream_paranoid, 3514 .maxlen = sizeof(i915_perf_stream_paranoid), 3515 .mode = 0644, 3516 .proc_handler = proc_dointvec_minmax, 3517 .extra1 = SYSCTL_ZERO, 3518 .extra2 = SYSCTL_ONE, 3519 }, 3520 { 3521 .procname = "oa_max_sample_rate", 3522 .data = &i915_oa_max_sample_rate, 3523 .maxlen = sizeof(i915_oa_max_sample_rate), 3524 .mode = 0644, 3525 .proc_handler = proc_dointvec_minmax, 3526 .extra1 = SYSCTL_ZERO, 3527 .extra2 = &oa_sample_rate_hard_limit, 3528 }, 3529 {} 3530 }; 3531 3532 static struct ctl_table i915_root[] = { 3533 { 3534 .procname = "i915", 3535 .maxlen = 0, 3536 .mode = 0555, 3537 .child = oa_table, 3538 }, 3539 {} 3540 }; 3541 3542 static struct ctl_table dev_root[] = { 3543 { 3544 .procname = "dev", 3545 .maxlen = 0, 3546 .mode = 0555, 3547 .child = i915_root, 3548 }, 3549 {} 3550 }; 3551 3552 /** 3553 * i915_perf_init - initialize i915-perf state on module load 3554 * @dev_priv: i915 device instance 3555 * 3556 * Initializes i915-perf state without exposing anything to userspace. 3557 * 3558 * Note: i915-perf initialization is split into an 'init' and 'register' 3559 * phase with the i915_perf_register() exposing state to userspace. 3560 */ 3561 void i915_perf_init(struct drm_i915_private *dev_priv) 3562 { 3563 if (IS_HASWELL(dev_priv)) { 3564 dev_priv->perf.ops.is_valid_b_counter_reg = 3565 gen7_is_valid_b_counter_addr; 3566 dev_priv->perf.ops.is_valid_mux_reg = 3567 hsw_is_valid_mux_addr; 3568 dev_priv->perf.ops.is_valid_flex_reg = NULL; 3569 dev_priv->perf.ops.enable_metric_set = hsw_enable_metric_set; 3570 dev_priv->perf.ops.disable_metric_set = hsw_disable_metric_set; 3571 dev_priv->perf.ops.oa_enable = gen7_oa_enable; 3572 dev_priv->perf.ops.oa_disable = gen7_oa_disable; 3573 dev_priv->perf.ops.read = gen7_oa_read; 3574 dev_priv->perf.ops.oa_hw_tail_read = 3575 gen7_oa_hw_tail_read; 3576 3577 dev_priv->perf.oa_formats = hsw_oa_formats; 3578 } else if (HAS_LOGICAL_RING_CONTEXTS(dev_priv)) { 3579 /* Note: that although we could theoretically also support the 3580 * legacy ringbuffer mode on BDW (and earlier iterations of 3581 * this driver, before upstreaming did this) it didn't seem 3582 * worth the complexity to maintain now that BDW+ enable 3583 * execlist mode by default. 3584 */ 3585 dev_priv->perf.oa_formats = gen8_plus_oa_formats; 3586 3587 dev_priv->perf.ops.oa_enable = gen8_oa_enable; 3588 dev_priv->perf.ops.oa_disable = gen8_oa_disable; 3589 dev_priv->perf.ops.read = gen8_oa_read; 3590 dev_priv->perf.ops.oa_hw_tail_read = gen8_oa_hw_tail_read; 3591 3592 if (IS_GEN_RANGE(dev_priv, 8, 9)) { 3593 dev_priv->perf.ops.is_valid_b_counter_reg = 3594 gen7_is_valid_b_counter_addr; 3595 dev_priv->perf.ops.is_valid_mux_reg = 3596 gen8_is_valid_mux_addr; 3597 dev_priv->perf.ops.is_valid_flex_reg = 3598 gen8_is_valid_flex_addr; 3599 3600 if (IS_CHERRYVIEW(dev_priv)) { 3601 dev_priv->perf.ops.is_valid_mux_reg = 3602 chv_is_valid_mux_addr; 3603 } 3604 3605 dev_priv->perf.ops.enable_metric_set = gen8_enable_metric_set; 3606 dev_priv->perf.ops.disable_metric_set = gen8_disable_metric_set; 3607 3608 if (IS_GEN(dev_priv, 8)) { 3609 dev_priv->perf.ctx_oactxctrl_offset = 0x120; 3610 dev_priv->perf.ctx_flexeu0_offset = 0x2ce; 3611 3612 dev_priv->perf.gen8_valid_ctx_bit = BIT(25); 3613 } else { 3614 dev_priv->perf.ctx_oactxctrl_offset = 0x128; 3615 dev_priv->perf.ctx_flexeu0_offset = 0x3de; 3616 3617 dev_priv->perf.gen8_valid_ctx_bit = BIT(16); 3618 } 3619 } else if (IS_GEN_RANGE(dev_priv, 10, 11)) { 3620 dev_priv->perf.ops.is_valid_b_counter_reg = 3621 gen7_is_valid_b_counter_addr; 3622 dev_priv->perf.ops.is_valid_mux_reg = 3623 gen10_is_valid_mux_addr; 3624 dev_priv->perf.ops.is_valid_flex_reg = 3625 gen8_is_valid_flex_addr; 3626 3627 dev_priv->perf.ops.enable_metric_set = gen8_enable_metric_set; 3628 dev_priv->perf.ops.disable_metric_set = gen10_disable_metric_set; 3629 3630 if (IS_GEN(dev_priv, 10)) { 3631 dev_priv->perf.ctx_oactxctrl_offset = 0x128; 3632 dev_priv->perf.ctx_flexeu0_offset = 0x3de; 3633 } else { 3634 dev_priv->perf.ctx_oactxctrl_offset = 0x124; 3635 dev_priv->perf.ctx_flexeu0_offset = 0x78e; 3636 } 3637 dev_priv->perf.gen8_valid_ctx_bit = BIT(16); 3638 } 3639 } 3640 3641 if (dev_priv->perf.ops.enable_metric_set) { 3642 INIT_LIST_HEAD(&dev_priv->perf.streams); 3643 mutex_init(&dev_priv->perf.lock); 3644 3645 oa_sample_rate_hard_limit = 1000 * 3646 (RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_khz / 2); 3647 dev_priv->perf.sysctl_header = register_sysctl_table(dev_root); 3648 3649 mutex_init(&dev_priv->perf.metrics_lock); 3650 idr_init(&dev_priv->perf.metrics_idr); 3651 3652 /* We set up some ratelimit state to potentially throttle any 3653 * _NOTES about spurious, invalid OA reports which we don't 3654 * forward to userspace. 3655 * 3656 * We print a _NOTE about any throttling when closing the 3657 * stream instead of waiting until driver _fini which no one 3658 * would ever see. 3659 * 3660 * Using the same limiting factors as printk_ratelimit() 3661 */ 3662 ratelimit_state_init(&dev_priv->perf.spurious_report_rs, 3663 5 * HZ, 10); 3664 /* Since we use a DRM_NOTE for spurious reports it would be 3665 * inconsistent to let __ratelimit() automatically print a 3666 * warning for throttling. 3667 */ 3668 ratelimit_set_flags(&dev_priv->perf.spurious_report_rs, 3669 RATELIMIT_MSG_ON_RELEASE); 3670 3671 dev_priv->perf.initialized = true; 3672 } 3673 } 3674 3675 static int destroy_config(int id, void *p, void *data) 3676 { 3677 struct drm_i915_private *dev_priv = data; 3678 struct i915_oa_config *oa_config = p; 3679 3680 put_oa_config(dev_priv, oa_config); 3681 3682 return 0; 3683 } 3684 3685 /** 3686 * i915_perf_fini - Counter part to i915_perf_init() 3687 * @dev_priv: i915 device instance 3688 */ 3689 void i915_perf_fini(struct drm_i915_private *dev_priv) 3690 { 3691 if (!dev_priv->perf.initialized) 3692 return; 3693 3694 idr_for_each(&dev_priv->perf.metrics_idr, destroy_config, dev_priv); 3695 idr_destroy(&dev_priv->perf.metrics_idr); 3696 3697 unregister_sysctl_table(dev_priv->perf.sysctl_header); 3698 3699 memset(&dev_priv->perf.ops, 0, sizeof(dev_priv->perf.ops)); 3700 3701 dev_priv->perf.initialized = false; 3702 } 3703